Browse Source

feat(w11):W11-孟鑫垚-202506010204

main
Mengxinyao 2 weeks ago
parent
commit
481ad449fd
  1. 11
      w11/exception/CrawlerException.java
  2. 11
      w11/exception/NetworkException.java
  3. 11
      w11/exception/ParseException.java
  4. 11
      w11/exception/UrlFormatException.java
  5. 54
      w11/util/ColorUtil.java
  6. 152
      w11/util/DataPersistence.java

11
w11/exception/CrawlerException.java

@ -0,0 +1,11 @@
package com.crawler.exception;
public class CrawlerException extends RuntimeException {
public CrawlerException(String message) {
super(message);
}
public CrawlerException(String message, Throwable cause) {
super(message, cause);
}
}

11
w11/exception/NetworkException.java

@ -0,0 +1,11 @@
package com.crawler.exception;
public class NetworkException extends CrawlerException {
public NetworkException(String message) {
super(message);
}
public NetworkException(String message, Throwable cause) {
super(message, cause);
}
}

11
w11/exception/ParseException.java

@ -0,0 +1,11 @@
package com.crawler.exception;
public class ParseException extends CrawlerException {
public ParseException(String message) {
super(message);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
}

11
w11/exception/UrlFormatException.java

@ -0,0 +1,11 @@
package com.crawler.exception;
public class UrlFormatException extends CrawlerException {
public UrlFormatException(String message) {
super(message);
}
public UrlFormatException(String message, Throwable cause) {
super(message, cause);
}
}

54
w11/util/ColorUtil.java

@ -0,0 +1,54 @@
package com.crawler.util;
public class ColorUtil {
public static final String RESET = "\u001B[0m";
public static final String BLACK = "\u001B[30m";
public static final String RED = "\u001B[31m";
public static final String GREEN = "\u001B[32m";
public static final String YELLOW = "\u001B[33m";
public static final String BLUE = "\u001B[34m";
public static final String PURPLE = "\u001B[35m";
public static final String CYAN = "\u001B[36m";
public static final String WHITE = "\u001B[37m";
public static final String BLACK_BG = "\u001B[40m";
public static final String RED_BG = "\u001B[41m";
public static final String GREEN_BG = "\u001B[42m";
public static final String YELLOW_BG = "\u001B[43m";
public static final String BLUE_BG = "\u001B[44m";
public static final String PURPLE_BG = "\u001B[45m";
public static final String CYAN_BG = "\u001B[46m";
public static final String WHITE_BG = "\u001B[47m";
public static String colorize(String text, String color) {
return color + text + RESET;
}
public static String green(String text) {
return colorize(text, GREEN);
}
public static String red(String text) {
return colorize(text, RED);
}
public static String yellow(String text) {
return colorize(text, YELLOW);
}
public static String blue(String text) {
return colorize(text, BLUE);
}
public static String cyan(String text) {
return colorize(text, CYAN);
}
public static String purple(String text) {
return colorize(text, PURPLE);
}
public static String bold(String text) {
return "\u001B[1m" + text + RESET;
}
}

152
w11/util/DataPersistence.java

@ -0,0 +1,152 @@
package com.crawler.util;
import com.crawler.model.Article;
import java.io.*;
import java.nio.charset.StandardCharsets;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
public class DataPersistence {
private static final String DATA_FOLDER = "data";
private static final String INDEX_FILE = DATA_FOLDER + File.separator + "index.txt";
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
static {
File folder = new File(DATA_FOLDER);
if (!folder.exists()) {
folder.mkdirs();
}
}
public static void saveArticles(List<Article> articles) {
try {
for (Article article : articles) {
saveSingleArticle(article);
}
saveIndex(articles);
System.out.println(ColorUtil.green("✓ Saved " + articles.size() + " articles to '" + DATA_FOLDER + "' folder"));
} catch (Exception e) {
System.err.println(ColorUtil.red("✗ Failed to save articles: " + e.getMessage()));
}
}
private static void saveSingleArticle(Article article) throws IOException {
String filename = DATA_FOLDER + File.separator + "article_" + article.getId() + ".txt";
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), StandardCharsets.UTF_8))) {
writer.write("========================================\n");
writer.write(" 文章详细信息\n");
writer.write("========================================\n\n");
writer.write("ID: " + article.getId() + "\n");
writer.write("标题: " + article.getTitle() + "\n");
writer.write("URL: " + article.getUrl() + "\n");
if (article.getAuthor() != null) {
writer.write("作者: " + article.getAuthor() + "\n");
}
if (article.getSource() != null) {
writer.write("来源: " + article.getSource() + "\n");
}
if (article.getPublishDate() != null) {
writer.write("发布时间: " + article.getPublishDate().format(DATE_FORMATTER) + "\n");
}
writer.write("爬取时间: " + article.getCrawlDate().format(DATE_FORMATTER) + "\n");
writer.write("\n========================================\n");
writer.write(" 文章内容\n");
writer.write("========================================\n");
if (article.getContent() != null) {
writer.write(article.getContent());
}
writer.write("\n\n");
}
}
private static void saveIndex(List<Article> articles) throws IOException {
try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(INDEX_FILE), StandardCharsets.UTF_8))) {
writer.write("========================================\n");
writer.write(" 文章索引\n");
writer.write("========================================\n\n");
writer.write("共有 " + articles.size() + " 篇文章\n\n");
for (Article article : articles) {
writer.write("[" + article.getId() + "] " + article.getTitle() + "\n");
writer.write(" URL: " + article.getUrl() + "\n");
writer.write(" 文件名: article_" + article.getId() + ".txt\n");
if (article.getCrawlDate() != null) {
writer.write(" 爬取时间: " + article.getCrawlDate().format(DATE_FORMATTER) + "\n");
}
writer.write("\n");
}
}
}
public static List<Article> loadArticles() {
List<Article> articles = new ArrayList<>();
File indexFile = new File(INDEX_FILE);
if (!indexFile.exists()) {
return articles;
}
File folder = new File(DATA_FOLDER);
File[] files = folder.listFiles((dir, name) -> name.startsWith("article_") && name.endsWith(".txt"));
if (files != null) {
for (File file : files) {
try {
Article article = loadSingleArticle(file);
if (article != null) {
articles.add(article);
}
} catch (Exception e) {
System.err.println(ColorUtil.yellow("⚠ 无法加载文件: " + file.getName()));
}
}
}
System.out.println(ColorUtil.green("✓ Loaded " + articles.size() + " articles from '" + DATA_FOLDER + "' folder"));
return articles;
}
private static Article loadSingleArticle(File file) throws IOException {
Article article = new Article();
try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8))) {
String line;
StringBuilder content = new StringBuilder();
boolean inContent = false;
while ((line = reader.readLine()) != null) {
if (line.contains("文章内容")) {
inContent = true;
// Skip the next separator line
reader.readLine();
continue;
}
if (!inContent) {
if (line.startsWith("ID: ")) {
article.setId(line.substring(4));
} else if (line.startsWith("标题: ")) {
article.setTitle(line.substring(4));
} else if (line.startsWith("URL: ")) {
article.setUrl(line.substring(5));
} else if (line.startsWith("作者: ")) {
article.setAuthor(line.substring(4));
} else if (line.startsWith("来源: ")) {
article.setSource(line.substring(4));
}
} else {
if (content.length() > 0) {
content.append("\n");
}
content.append(line);
}
}
if (content.length() > 0) {
article.setContent(content.toString());
}
}
return article;
}
}
Loading…
Cancel
Save