package com.crawler.util; import com.crawler.model.Article; import java.io.*; import java.nio.charset.StandardCharsets; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.List; public class DataPersistence { private static final String DATA_FOLDER = "data"; private static final String INDEX_FILE = DATA_FOLDER + File.separator + "index.txt"; private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); static { File folder = new File(DATA_FOLDER); if (!folder.exists()) { folder.mkdirs(); } } public static void saveArticles(List
articles) { try { for (Article article : articles) { saveSingleArticle(article); } saveIndex(articles); System.out.println(ColorUtil.green("✓ Saved " + articles.size() + " articles to '" + DATA_FOLDER + "' folder")); } catch (Exception e) { System.err.println(ColorUtil.red("✗ Failed to save articles: " + e.getMessage())); } } private static void saveSingleArticle(Article article) throws IOException { String filename = DATA_FOLDER + File.separator + "article_" + article.getId() + ".txt"; try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(filename), StandardCharsets.UTF_8))) { writer.write("========================================\n"); writer.write(" 文章详细信息\n"); writer.write("========================================\n\n"); writer.write("ID: " + article.getId() + "\n"); writer.write("标题: " + article.getTitle() + "\n"); writer.write("URL: " + article.getUrl() + "\n"); if (article.getAuthor() != null) { writer.write("作者: " + article.getAuthor() + "\n"); } if (article.getSource() != null) { writer.write("来源: " + article.getSource() + "\n"); } if (article.getPublishDate() != null) { writer.write("发布时间: " + article.getPublishDate().format(DATE_FORMATTER) + "\n"); } writer.write("爬取时间: " + article.getCrawlDate().format(DATE_FORMATTER) + "\n"); writer.write("\n========================================\n"); writer.write(" 文章内容\n"); writer.write("========================================\n"); if (article.getContent() != null) { writer.write(article.getContent()); } writer.write("\n\n"); } } private static void saveIndex(List
articles) throws IOException { try (BufferedWriter writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(INDEX_FILE), StandardCharsets.UTF_8))) { writer.write("========================================\n"); writer.write(" 文章索引\n"); writer.write("========================================\n\n"); writer.write("共有 " + articles.size() + " 篇文章\n\n"); for (Article article : articles) { writer.write("[" + article.getId() + "] " + article.getTitle() + "\n"); writer.write(" URL: " + article.getUrl() + "\n"); writer.write(" 文件名: article_" + article.getId() + ".txt\n"); if (article.getCrawlDate() != null) { writer.write(" 爬取时间: " + article.getCrawlDate().format(DATE_FORMATTER) + "\n"); } writer.write("\n"); } } } public static List
loadArticles() { List
articles = new ArrayList<>(); File indexFile = new File(INDEX_FILE); if (!indexFile.exists()) { return articles; } File folder = new File(DATA_FOLDER); File[] files = folder.listFiles((dir, name) -> name.startsWith("article_") && name.endsWith(".txt")); if (files != null) { for (File file : files) { try { Article article = loadSingleArticle(file); if (article != null) { articles.add(article); } } catch (Exception e) { System.err.println(ColorUtil.yellow("⚠ 无法加载文件: " + file.getName())); } } } System.out.println(ColorUtil.green("✓ Loaded " + articles.size() + " articles from '" + DATA_FOLDER + "' folder")); return articles; } private static Article loadSingleArticle(File file) throws IOException { Article article = new Article(); try (BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(file), StandardCharsets.UTF_8))) { String line; StringBuilder content = new StringBuilder(); boolean inContent = false; while ((line = reader.readLine()) != null) { if (line.contains("文章内容")) { inContent = true; // Skip the next separator line reader.readLine(); continue; } if (!inContent) { if (line.startsWith("ID: ")) { article.setId(line.substring(4)); } else if (line.startsWith("标题: ")) { article.setTitle(line.substring(4)); } else if (line.startsWith("URL: ")) { article.setUrl(line.substring(5)); } else if (line.startsWith("作者: ")) { article.setAuthor(line.substring(4)); } else if (line.startsWith("来源: ")) { article.setSource(line.substring(4)); } } else { if (content.length() > 0) { content.append("\n"); } content.append(line); } } if (content.length() > 0) { article.setContent(content.toString()); } } return article; } }