package repository; import model.Paper; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.SerializationFeature; import utils.Utils; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; public class PaperRepository { private String baseDir = "论文爬取"; private String subDir; private ObjectMapper objectMapper; public PaperRepository() { objectMapper = new ObjectMapper(); objectMapper.enable(SerializationFeature.INDENT_OUTPUT); } public void init(String platformName) { this.subDir = baseDir + File.separator + Utils.cleanFileName(platformName); File dir = new File(subDir); if (!dir.exists()) { dir.mkdirs(); } } public List removeDuplicates(List papers) { Set existingTitles = new HashSet<>(); List uniquePapers = new ArrayList<>(); File[] files = new File(subDir).listFiles(); if (files != null) { for (File file : files) { if (file.isFile() && file.getName().endsWith(".json")) { try { Paper[] existingPapers = objectMapper.readValue(file, Paper[].class); for (Paper paper : existingPapers) { existingTitles.add(paper.getTitle()); } } catch (IOException e) { e.printStackTrace(); } } } } for (Paper paper : papers) { if (!existingTitles.contains(paper.getTitle())) { uniquePapers.add(paper); existingTitles.add(paper.getTitle()); } } return uniquePapers; } public void savePapers(List papers) throws Exception { if (papers.isEmpty()) { System.out.println("没有论文需要保存"); return; } int savedCount = 0; for (Paper paper : papers) { String title = paper.getTitle(); String fileName = Utils.cleanTitleForFileName(title) + ".json"; String filePath = subDir + File.separator + fileName; List singlePaperList = new ArrayList<>(); singlePaperList.add(paper); objectMapper.writeValue(new File(filePath), singlePaperList); savedCount++; System.out.println("论文已保存: " + filePath); } System.out.println("共保存 " + savedCount + " 篇论文到: " + subDir); } public List loadPapers() throws IOException { List allPapers = new ArrayList<>(); File[] files = new File(subDir).listFiles(); if (files != null) { for (File file : files) { if (file.isFile() && file.getName().endsWith(".json")) { Paper[] papers = objectMapper.readValue(file, Paper[].class); for (Paper paper : papers) { allPapers.add(paper); } } } } return allPapers; } public Map> loadAllPapersGroupedByPlatform() throws IOException { Map> papersByPlatform = new HashMap<>(); File baseDirFile = new File(baseDir); if (!baseDirFile.exists()) { return papersByPlatform; } File[] platformDirs = baseDirFile.listFiles(); if (platformDirs != null) { for (File platformDir : platformDirs) { if (platformDir.isDirectory()) { String platformName = platformDir.getName(); List platformPapers = new ArrayList<>(); File[] files = platformDir.listFiles(); if (files != null) { for (File file : files) { if (file.isFile() && file.getName().endsWith(".json")) { try { Paper[] papers = objectMapper.readValue(file, Paper[].class); for (Paper paper : papers) { platformPapers.add(paper); } } catch (IOException e) { System.out.println("读取文件失败: " + file.getName()); } } } } if (!platformPapers.isEmpty()) { papersByPlatform.put(platformName, platformPapers); } } } } return papersByPlatform; } }