You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

71 lines
2.6 KiB

package com.example.datacollect.persist;
import com.example.datacollect.model.Article;
import com.example.datacollect.repository.ArticleRepository;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.List;
public class JsonImporter {
private static final Logger logger = LoggerFactory.getLogger(JsonImporter.class);
private final ObjectMapper objectMapper;
public JsonImporter() {
this.objectMapper = new ObjectMapper();
objectMapper.registerModule(new JavaTimeModule());
}
/**
* 从JSON文件增量导入文章(跳过已存在URL)
* @param repository 文章仓库
* @param filePath 导入文件路径
*/
public void importArticles(ArticleRepository repository, String filePath) throws IOException {
logger.info("开始从文件导入文章: {}", filePath);
File file = new File(filePath);
if (!file.exists()) {
String errorMsg = "导入文件不存在: " + filePath;
logger.error(errorMsg);
throw new IOException(errorMsg);
}
// try-with-resources 自动关闭FileReader
try (FileReader reader = new FileReader(file)) {
// 反序列化为Article列表
List<Article> importedArticles = objectMapper.readValue(
reader,
objectMapper.getTypeFactory().constructCollectionType(List.class, Article.class)
);
// 增量导入:过滤已存在的URL
int importedCount = 0;
int skippedCount = 0;
for (Article article : importedArticles) {
if (article == null || article.getUrl() == null || article.getUrl().isBlank()) {
logger.warn("跳过无效文章(URL为空)");
skippedCount++;
continue;
}
if (repository.containsUrl(article.getUrl())) {
logger.debug("URL已存在,跳过: {}", article.getUrl());
skippedCount++;
continue;
}
repository.add(article);
importedCount++;
}
logger.info("导入完成 | 成功: {} 篇 | 跳过: {} 篇", importedCount, skippedCount);
} catch (IOException e) {
logger.error("导入失败: {}", filePath, e);
throw e;
}
}
}