You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
64 lines
2.0 KiB
64 lines
2.0 KiB
package strategy;
|
|
|
|
import model.Paper;
|
|
import org.jsoup.Jsoup;
|
|
import org.jsoup.nodes.Document;
|
|
import org.jsoup.nodes.Element;
|
|
import org.jsoup.select.Elements;
|
|
import utils.Utils;
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
|
|
public class SpringerLinkStrategy extends AbstractCrawlerStrategy {
|
|
@Override
|
|
public String getPlatformName() {
|
|
return "Springer Link";
|
|
}
|
|
|
|
@Override
|
|
public boolean supportsUrl(String url) {
|
|
return url != null && url.contains("link.springer.com");
|
|
}
|
|
|
|
@Override
|
|
protected List<Paper> fetchPapers(String url, int count) throws Exception {
|
|
List<Paper> papers = new ArrayList<>();
|
|
System.out.println("=== 开始使用Springer Link获取论文 ===");
|
|
|
|
addDelay(2000, 3000);
|
|
|
|
String html = Utils.sendGetRequest(url);
|
|
if (html.isEmpty()) return papers;
|
|
|
|
Document doc = Jsoup.parse(html);
|
|
|
|
Elements paperElements = doc.select(".result-item");
|
|
|
|
int collected = 0;
|
|
for (Element element : paperElements) {
|
|
if (collected >= count) break;
|
|
|
|
try {
|
|
Element titleElement = element.selectFirst("h2 a");
|
|
String title = titleElement != null ? titleElement.text() : "";
|
|
|
|
String paperUrl = titleElement != null ? titleElement.attr("href") : "";
|
|
if (!paperUrl.startsWith("http")) {
|
|
paperUrl = "https://link.springer.com" + paperUrl;
|
|
}
|
|
|
|
Element authorsElement = element.selectFirst(".authors");
|
|
String authors = authorsElement != null ? authorsElement.text() : "";
|
|
|
|
if (title.length() < 5 || paperUrl.isEmpty()) continue;
|
|
|
|
papers.add(new Paper(title, authors, "", paperUrl, getPlatformName()));
|
|
collected++;
|
|
} catch (Exception e) {
|
|
continue;
|
|
}
|
|
}
|
|
|
|
return papers;
|
|
}
|
|
}
|