You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
145 lines
5.0 KiB
145 lines
5.0 KiB
package repository;
|
|
|
|
import model.Paper;
|
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
|
import com.fasterxml.jackson.databind.SerializationFeature;
|
|
import utils.Utils;
|
|
import java.io.File;
|
|
import java.io.IOException;
|
|
import java.util.ArrayList;
|
|
import java.util.HashMap;
|
|
import java.util.HashSet;
|
|
import java.util.List;
|
|
import java.util.Map;
|
|
import java.util.Set;
|
|
|
|
public class PaperRepository {
|
|
private String baseDir = "论文爬取";
|
|
private String subDir;
|
|
private ObjectMapper objectMapper;
|
|
|
|
public PaperRepository() {
|
|
objectMapper = new ObjectMapper();
|
|
objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
|
|
}
|
|
|
|
public void init(String platformName) {
|
|
this.subDir = baseDir + File.separator + Utils.cleanFileName(platformName);
|
|
|
|
File dir = new File(subDir);
|
|
if (!dir.exists()) {
|
|
dir.mkdirs();
|
|
}
|
|
}
|
|
|
|
public List<Paper> removeDuplicates(List<Paper> papers) {
|
|
Set<String> existingTitles = new HashSet<>();
|
|
List<Paper> uniquePapers = new ArrayList<>();
|
|
|
|
File[] files = new File(subDir).listFiles();
|
|
if (files != null) {
|
|
for (File file : files) {
|
|
if (file.isFile() && file.getName().endsWith(".json")) {
|
|
try {
|
|
Paper[] existingPapers = objectMapper.readValue(file, Paper[].class);
|
|
for (Paper paper : existingPapers) {
|
|
existingTitles.add(paper.getTitle());
|
|
}
|
|
} catch (IOException e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for (Paper paper : papers) {
|
|
if (!existingTitles.contains(paper.getTitle())) {
|
|
uniquePapers.add(paper);
|
|
existingTitles.add(paper.getTitle());
|
|
}
|
|
}
|
|
|
|
return uniquePapers;
|
|
}
|
|
|
|
public void savePapers(List<Paper> papers) throws Exception {
|
|
if (papers.isEmpty()) {
|
|
System.out.println("没有论文需要保存");
|
|
return;
|
|
}
|
|
|
|
int savedCount = 0;
|
|
for (Paper paper : papers) {
|
|
String title = paper.getTitle();
|
|
String fileName = Utils.cleanTitleForFileName(title) + ".json";
|
|
String filePath = subDir + File.separator + fileName;
|
|
|
|
List<Paper> singlePaperList = new ArrayList<>();
|
|
singlePaperList.add(paper);
|
|
|
|
objectMapper.writeValue(new File(filePath), singlePaperList);
|
|
savedCount++;
|
|
System.out.println("论文已保存: " + filePath);
|
|
}
|
|
System.out.println("共保存 " + savedCount + " 篇论文到: " + subDir);
|
|
}
|
|
|
|
public List<Paper> loadPapers() throws IOException {
|
|
List<Paper> allPapers = new ArrayList<>();
|
|
|
|
File[] files = new File(subDir).listFiles();
|
|
if (files != null) {
|
|
for (File file : files) {
|
|
if (file.isFile() && file.getName().endsWith(".json")) {
|
|
Paper[] papers = objectMapper.readValue(file, Paper[].class);
|
|
for (Paper paper : papers) {
|
|
allPapers.add(paper);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return allPapers;
|
|
}
|
|
|
|
public Map<String, List<Paper>> loadAllPapersGroupedByPlatform() throws IOException {
|
|
Map<String, List<Paper>> papersByPlatform = new HashMap<>();
|
|
|
|
File baseDirFile = new File(baseDir);
|
|
if (!baseDirFile.exists()) {
|
|
return papersByPlatform;
|
|
}
|
|
|
|
File[] platformDirs = baseDirFile.listFiles();
|
|
if (platformDirs != null) {
|
|
for (File platformDir : platformDirs) {
|
|
if (platformDir.isDirectory()) {
|
|
String platformName = platformDir.getName();
|
|
List<Paper> platformPapers = new ArrayList<>();
|
|
|
|
File[] files = platformDir.listFiles();
|
|
if (files != null) {
|
|
for (File file : files) {
|
|
if (file.isFile() && file.getName().endsWith(".json")) {
|
|
try {
|
|
Paper[] papers = objectMapper.readValue(file, Paper[].class);
|
|
for (Paper paper : papers) {
|
|
platformPapers.add(paper);
|
|
}
|
|
} catch (IOException e) {
|
|
System.out.println("读取文件失败: " + file.getName());
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
if (!platformPapers.isEmpty()) {
|
|
papersByPlatform.put(platformName, platformPapers);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return papersByPlatform;
|
|
}
|
|
}
|