From 5da9fc0d12040ce9b0d007fdbdb0cd751091cbe5 Mon Sep 17 00:00:00 2001 From: Zhengjie <2044415419@qq.com> Date: Thu, 14 May 2026 14:39:48 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20'w10'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- w10/SpringerLinkStrategy.java | 64 +++++++++++++++++++++++++++++++++++ w10/StrategyFactory.java | 40 ++++++++++++++++++++++ 2 files changed, 104 insertions(+) create mode 100644 w10/SpringerLinkStrategy.java create mode 100644 w10/StrategyFactory.java diff --git a/w10/SpringerLinkStrategy.java b/w10/SpringerLinkStrategy.java new file mode 100644 index 0000000..822bc36 --- /dev/null +++ b/w10/SpringerLinkStrategy.java @@ -0,0 +1,64 @@ +package strategy; + +import model.Paper; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import utils.Utils; +import java.util.ArrayList; +import java.util.List; + +public class SpringerLinkStrategy extends AbstractCrawlerStrategy { + @Override + public String getPlatformName() { + return "Springer Link"; + } + + @Override + public boolean supportsUrl(String url) { + return url != null && url.contains("link.springer.com"); + } + + @Override + protected List fetchPapers(String url, int count) throws Exception { + List papers = new ArrayList<>(); + System.out.println("=== 开始使用Springer Link获取论文 ==="); + + addDelay(2000, 3000); + + String html = Utils.sendGetRequest(url); + if (html.isEmpty()) return papers; + + Document doc = Jsoup.parse(html); + + Elements paperElements = doc.select(".result-item"); + + int collected = 0; + for (Element element : paperElements) { + if (collected >= count) break; + + try { + Element titleElement = element.selectFirst("h2 a"); + String title = titleElement != null ? titleElement.text() : ""; + + String paperUrl = titleElement != null ? titleElement.attr("href") : ""; + if (!paperUrl.startsWith("http")) { + paperUrl = "https://link.springer.com" + paperUrl; + } + + Element authorsElement = element.selectFirst(".authors"); + String authors = authorsElement != null ? authorsElement.text() : ""; + + if (title.length() < 5 || paperUrl.isEmpty()) continue; + + papers.add(new Paper(title, authors, "", paperUrl, getPlatformName())); + collected++; + } catch (Exception e) { + continue; + } + } + + return papers; + } +} \ No newline at end of file diff --git a/w10/StrategyFactory.java b/w10/StrategyFactory.java new file mode 100644 index 0000000..d50d562 --- /dev/null +++ b/w10/StrategyFactory.java @@ -0,0 +1,40 @@ +package strategy; + +import java.util.List; +import java.util.ArrayList; + +public class StrategyFactory { + private final List STRATEGIES = new ArrayList<>(); + + public StrategyFactory() { + STRATEGIES.add(new CNKIStrategy()); + STRATEGIES.add(new IEEEStrategy()); + STRATEGIES.add(new ACMDigitalLibraryStrategy()); + STRATEGIES.add(new SpringerLinkStrategy()); + STRATEGIES.add(new ScienceDirectStrategy()); + STRATEGIES.add(new ArXivStrategy()); + STRATEGIES.add(new SemanticScholarStrategy()); + } + + /** + * 根据URL创建对应的爬虫策略 + * @param url 要创建策略的URL + * @return 对应的爬虫策略,如果URL不支持任何策略则返回null + */ + public CrawlerStrategy createCrawlerByUrl(String url) { + for (CrawlerStrategy strategy : STRATEGIES) { + if (strategy.supportsUrl(url)) { + return strategy; + } + } + return null; + } + + public int getPlatformCount() { + return STRATEGIES.size(); + } + + public List getAllStrategies() { + return STRATEGIES; + } +} \ No newline at end of file