You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
73 lines
2.3 KiB
73 lines
2.3 KiB
package command;
|
|
|
|
import model.Paper;
|
|
import strategy.CrawlerStrategy;
|
|
import strategy.StrategyFactory;
|
|
import repository.PaperRepository;
|
|
import view.ConsoleView;
|
|
import java.util.List;
|
|
|
|
public class CrawlCommand implements Command {
|
|
private ConsoleView view;
|
|
private StrategyFactory strategyFactory;
|
|
|
|
public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
|
|
this.view = view;
|
|
this.strategyFactory = strategyFactory;
|
|
}
|
|
|
|
@Override
|
|
public void execute(String[] args, PaperRepository repository) {
|
|
if (args.length < 2) {
|
|
view.showError("请提供要爬取的论文网站URL,格式: crawl <URL>");
|
|
return;
|
|
}
|
|
|
|
String url = args[1];
|
|
|
|
if (!isValidUrl(url)) {
|
|
view.showError("无效的URL格式,请提供有效的论文网站URL");
|
|
return;
|
|
}
|
|
|
|
try {
|
|
view.showInfo("开始爬取论文...");
|
|
view.showInfo("目标URL: " + url);
|
|
|
|
CrawlerStrategy crawler = strategyFactory.createCrawlerByUrl(url);
|
|
if (crawler == null) {
|
|
view.showError("不支持的网站,请提供支持的论文网站URL");
|
|
return;
|
|
}
|
|
|
|
String platformName = crawler.getPlatformName();
|
|
repository.init(platformName);
|
|
|
|
List<Paper> papers = crawler.crawl(url, 10);
|
|
|
|
if (papers.isEmpty()) {
|
|
view.showInfo("未获取到论文");
|
|
} else {
|
|
List<Paper> uniquePapers = repository.removeDuplicates(papers);
|
|
repository.savePapers(uniquePapers);
|
|
view.showSuccess("成功爬取 " + uniquePapers.size() + " 篇论文");
|
|
}
|
|
} catch (Exception e) {
|
|
view.showError("爬取失败: " + e.getMessage());
|
|
}
|
|
}
|
|
|
|
private boolean isValidUrl(String url) {
|
|
return url != null && (url.startsWith("http://") || url.startsWith("https://"));
|
|
}
|
|
|
|
@Override
|
|
public String getDescription() {
|
|
return "爬取指定URL的论文,格式: crawl <论文网站URL>";
|
|
}
|
|
|
|
@Override
|
|
public String getName() {
|
|
return "crawl";
|
|
}
|
|
}
|