You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

145 lines
5.0 KiB

package repository;
import model.Paper;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import utils.Utils;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
public class PaperRepository {
private String baseDir = "论文爬取";
private String subDir;
private ObjectMapper objectMapper;
public PaperRepository() {
objectMapper = new ObjectMapper();
objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
}
public void init(String platformName) {
this.subDir = baseDir + File.separator + Utils.cleanFileName(platformName);
File dir = new File(subDir);
if (!dir.exists()) {
dir.mkdirs();
}
}
public List<Paper> removeDuplicates(List<Paper> papers) {
Set<String> existingTitles = new HashSet<>();
List<Paper> uniquePapers = new ArrayList<>();
File[] files = new File(subDir).listFiles();
if (files != null) {
for (File file : files) {
if (file.isFile() && file.getName().endsWith(".json")) {
try {
Paper[] existingPapers = objectMapper.readValue(file, Paper[].class);
for (Paper paper : existingPapers) {
existingTitles.add(paper.getTitle());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
for (Paper paper : papers) {
if (!existingTitles.contains(paper.getTitle())) {
uniquePapers.add(paper);
existingTitles.add(paper.getTitle());
}
}
return uniquePapers;
}
public void savePapers(List<Paper> papers) throws Exception {
if (papers.isEmpty()) {
System.out.println("没有论文需要保存");
return;
}
int savedCount = 0;
for (Paper paper : papers) {
String title = paper.getTitle();
String fileName = Utils.cleanTitleForFileName(title) + ".json";
String filePath = subDir + File.separator + fileName;
List<Paper> singlePaperList = new ArrayList<>();
singlePaperList.add(paper);
objectMapper.writeValue(new File(filePath), singlePaperList);
savedCount++;
System.out.println("论文已保存: " + filePath);
}
System.out.println("共保存 " + savedCount + " 篇论文到: " + subDir);
}
public List<Paper> loadPapers() throws IOException {
List<Paper> allPapers = new ArrayList<>();
File[] files = new File(subDir).listFiles();
if (files != null) {
for (File file : files) {
if (file.isFile() && file.getName().endsWith(".json")) {
Paper[] papers = objectMapper.readValue(file, Paper[].class);
for (Paper paper : papers) {
allPapers.add(paper);
}
}
}
}
return allPapers;
}
public Map<String, List<Paper>> loadAllPapersGroupedByPlatform() throws IOException {
Map<String, List<Paper>> papersByPlatform = new HashMap<>();
File baseDirFile = new File(baseDir);
if (!baseDirFile.exists()) {
return papersByPlatform;
}
File[] platformDirs = baseDirFile.listFiles();
if (platformDirs != null) {
for (File platformDir : platformDirs) {
if (platformDir.isDirectory()) {
String platformName = platformDir.getName();
List<Paper> platformPapers = new ArrayList<>();
File[] files = platformDir.listFiles();
if (files != null) {
for (File file : files) {
if (file.isFile() && file.getName().endsWith(".json")) {
try {
Paper[] papers = objectMapper.readValue(file, Paper[].class);
for (Paper paper : papers) {
platformPapers.add(paper);
}
} catch (IOException e) {
System.out.println("读取文件失败: " + file.getName());
}
}
}
}
if (!platformPapers.isEmpty()) {
papersByPlatform.put(platformName, platformPapers);
}
}
}
}
return papersByPlatform;
}
}