Browse Source

上传文件至 'w10'

main
Zhengjie 1 month ago
parent
commit
5da9fc0d12
  1. 64
      w10/SpringerLinkStrategy.java
  2. 40
      w10/StrategyFactory.java

64
w10/SpringerLinkStrategy.java

@ -0,0 +1,64 @@
package strategy;
import model.Paper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import utils.Utils;
import java.util.ArrayList;
import java.util.List;
public class SpringerLinkStrategy extends AbstractCrawlerStrategy {
@Override
public String getPlatformName() {
return "Springer Link";
}
@Override
public boolean supportsUrl(String url) {
return url != null && url.contains("link.springer.com");
}
@Override
protected List<Paper> fetchPapers(String url, int count) throws Exception {
List<Paper> papers = new ArrayList<>();
System.out.println("=== 开始使用Springer Link获取论文 ===");
addDelay(2000, 3000);
String html = Utils.sendGetRequest(url);
if (html.isEmpty()) return papers;
Document doc = Jsoup.parse(html);
Elements paperElements = doc.select(".result-item");
int collected = 0;
for (Element element : paperElements) {
if (collected >= count) break;
try {
Element titleElement = element.selectFirst("h2 a");
String title = titleElement != null ? titleElement.text() : "";
String paperUrl = titleElement != null ? titleElement.attr("href") : "";
if (!paperUrl.startsWith("http")) {
paperUrl = "https://link.springer.com" + paperUrl;
}
Element authorsElement = element.selectFirst(".authors");
String authors = authorsElement != null ? authorsElement.text() : "";
if (title.length() < 5 || paperUrl.isEmpty()) continue;
papers.add(new Paper(title, authors, "", paperUrl, getPlatformName()));
collected++;
} catch (Exception e) {
continue;
}
}
return papers;
}
}

40
w10/StrategyFactory.java

@ -0,0 +1,40 @@
package strategy;
import java.util.List;
import java.util.ArrayList;
public class StrategyFactory {
private final List<CrawlerStrategy> STRATEGIES = new ArrayList<>();
public StrategyFactory() {
STRATEGIES.add(new CNKIStrategy());
STRATEGIES.add(new IEEEStrategy());
STRATEGIES.add(new ACMDigitalLibraryStrategy());
STRATEGIES.add(new SpringerLinkStrategy());
STRATEGIES.add(new ScienceDirectStrategy());
STRATEGIES.add(new ArXivStrategy());
STRATEGIES.add(new SemanticScholarStrategy());
}
/**
* 根据URL创建对应的爬虫策略
* @param url 要创建策略的URL
* @return 对应的爬虫策略如果URL不支持任何策略则返回null
*/
public CrawlerStrategy createCrawlerByUrl(String url) {
for (CrawlerStrategy strategy : STRATEGIES) {
if (strategy.supportsUrl(url)) {
return strategy;
}
}
return null;
}
public int getPlatformCount() {
return STRATEGIES.size();
}
public List<CrawlerStrategy> getAllStrategies() {
return STRATEGIES;
}
}
Loading…
Cancel
Save