Browse Source

上传文件至 'w10'

main
Zhengjie 1 month ago
parent
commit
765f8861e9
  1. 82
      w10/CNKIStrategy.java
  2. 10
      w10/CrawlerStrategy.java
  3. 64
      w10/IEEEStrategy.java
  4. 64
      w10/ScienceDirectStrategy.java
  5. 102
      w10/SemanticScholarStrategy.java

82
w10/CNKIStrategy.java

@ -0,0 +1,82 @@
package strategy;
import model.Paper;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import utils.Utils;
import java.util.ArrayList;
import java.util.List;
public class CNKIStrategy extends AbstractCrawlerStrategy {
@Override
public String getPlatformName() {
return "中国知网 (CNKI)";
}
@Override
public boolean supportsUrl(String url) {
return url != null && (url.contains("cnki.net") || url.contains("cnki.cn"));
}
@Override
protected List<Paper> fetchPapers(String url, int count) throws Exception {
List<Paper> papers = new ArrayList<>();
System.out.println("=== 开始使用中国知网获取论文 ===");
String html = Utils.sendGetRequest(url);
if (html.isEmpty()) return papers;
Document doc = Utils.parseHtml(html);
String[] selectors = {".list-item", ".article-item", ".result-item", "tr[class*='item']", "div[class*='result']", "li[class*='result']"};
Elements paperElements = null;
for (String selector : selectors) {
paperElements = doc.select(selector);
if (paperElements.size() > 0) {
break;
}
}
if (paperElements != null && paperElements.size() > 0) {
int collected = 0;
for (Element element : paperElements) {
if (collected >= count) break;
try {
Element titleElement = element.select("a").first();
if (titleElement == null) continue;
String title = titleElement.text();
String paperUrl = titleElement.attr("href");
if (title.length() < 10 || paperUrl.isEmpty()) continue;
if (!paperUrl.startsWith("http")) {
paperUrl = "https://kns.cnki.net" + paperUrl;
}
String authors = "";
Elements authorElements = element.select(".author");
if (!authorElements.isEmpty()) {
authors = authorElements.first().text();
}
String abstractText = "";
Elements abstractElements = element.select(".abstract");
if (!abstractElements.isEmpty()) {
abstractText = abstractElements.first().text();
}
papers.add(new Paper(title, authors, abstractText, paperUrl, getPlatformName()));
collected++;
} catch (Exception e) {
continue;
}
}
}
return papers;
}
}

10
w10/CrawlerStrategy.java

@ -0,0 +1,10 @@
package strategy;
import model.Paper;
import java.util.List;
public interface CrawlerStrategy {
String getPlatformName();
List<Paper> crawl(String url, int count) throws Exception;
boolean supportsUrl(String url);
}

64
w10/IEEEStrategy.java

@ -0,0 +1,64 @@
package strategy;
import model.Paper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import utils.Utils;
import java.util.ArrayList;
import java.util.List;
public class IEEEStrategy extends AbstractCrawlerStrategy {
@Override
public String getPlatformName() {
return "IEEE Xplore";
}
@Override
public boolean supportsUrl(String url) {
return url != null && url.contains("ieeexplore.ieee.org");
}
@Override
protected List<Paper> fetchPapers(String url, int count) throws Exception {
List<Paper> papers = new ArrayList<>();
System.out.println("=== 开始使用IEEE Xplore获取论文 ===");
addDelay(2000, 3000);
String html = Utils.sendGetRequest(url);
if (html.isEmpty()) return papers;
Document doc = Jsoup.parse(html);
Elements paperElements = doc.select(".List-results-items li");
int collected = 0;
for (Element element : paperElements) {
if (collected >= count) break;
try {
Element titleElement = element.selectFirst("h2 a");
String title = titleElement != null ? titleElement.text() : "";
String paperUrl = titleElement != null ? titleElement.attr("href") : "";
if (!paperUrl.startsWith("http")) {
paperUrl = "https://ieeexplore.ieee.org" + paperUrl;
}
Element authorsElement = element.selectFirst(".authors");
String authors = authorsElement != null ? authorsElement.text() : "";
if (title.length() < 5 || paperUrl.isEmpty()) continue;
papers.add(new Paper(title, authors, "", paperUrl, getPlatformName()));
collected++;
} catch (Exception e) {
continue;
}
}
return papers;
}
}

64
w10/ScienceDirectStrategy.java

@ -0,0 +1,64 @@
package strategy;
import model.Paper;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import utils.Utils;
import java.util.ArrayList;
import java.util.List;
public class ScienceDirectStrategy extends AbstractCrawlerStrategy {
@Override
public String getPlatformName() {
return "ScienceDirect";
}
@Override
public boolean supportsUrl(String url) {
return url != null && url.contains("sciencedirect.com");
}
@Override
protected List<Paper> fetchPapers(String url, int count) throws Exception {
List<Paper> papers = new ArrayList<>();
System.out.println("=== 开始使用ScienceDirect获取论文 ===");
addDelay(2000, 3000);
String html = Utils.sendGetRequest(url);
if (html.isEmpty()) return papers;
Document doc = Jsoup.parse(html);
Elements paperElements = doc.select(".result-item-content");
int collected = 0;
for (Element element : paperElements) {
if (collected >= count) break;
try {
Element titleElement = element.selectFirst("h2 a");
String title = titleElement != null ? titleElement.text() : "";
String paperUrl = titleElement != null ? titleElement.attr("href") : "";
if (!paperUrl.startsWith("http")) {
paperUrl = "https://www.sciencedirect.com" + paperUrl;
}
Element authorsElement = element.selectFirst(".author-group");
String authors = authorsElement != null ? authorsElement.text() : "";
if (title.length() < 5 || paperUrl.isEmpty()) continue;
papers.add(new Paper(title, authors, "", paperUrl, getPlatformName()));
collected++;
} catch (Exception e) {
continue;
}
}
return papers;
}
}

102
w10/SemanticScholarStrategy.java

@ -0,0 +1,102 @@
package strategy;
import model.Paper;
import com.fasterxml.jackson.databind.ObjectMapper;
import utils.Utils;
import java.util.ArrayList;
import java.util.List;
public class SemanticScholarStrategy extends AbstractCrawlerStrategy {
@Override
public String getPlatformName() {
return "Semantic Scholar";
}
@Override
public boolean supportsUrl(String url) {
return url != null && url.contains("semanticscholar.org");
}
@Override
protected List<Paper> fetchPapers(String url, int count) throws Exception {
List<Paper> papers = new ArrayList<>();
System.out.println("=== 开始使用Semantic Scholar获取论文 ===");
addDelay(1000, 1500);
try {
String response = Utils.sendGetRequest(url);
if (response.isEmpty()) return papers;
ObjectMapper objectMapper = new ObjectMapper();
SemanticScholarResponse apiResponse = objectMapper.readValue(response, SemanticScholarResponse.class);
if (apiResponse != null && apiResponse.getItems() != null) {
int collected = 0;
for (SemanticScholarPaper apiPaper : apiResponse.getItems()) {
if (collected >= count) break;
String title = apiPaper.getTitle();
String paperUrl = apiPaper.getUrl();
String abstractText = apiPaper.getAbstractText();
StringBuilder authorsBuilder = new StringBuilder();
if (apiPaper.getAuthors() != null) {
for (SemanticScholarAuthor author : apiPaper.getAuthors()) {
if (author != null && author.getName() != null) {
if (authorsBuilder.length() > 0) {
authorsBuilder.append(", ");
}
authorsBuilder.append(author.getName());
}
}
}
String authors = authorsBuilder.toString();
if (title == null || title.length() < 5 || paperUrl == null || paperUrl.isEmpty()) continue;
papers.add(new Paper(title, authors, abstractText != null ? abstractText : "", paperUrl, getPlatformName()));
collected++;
}
}
} catch (Exception e) {
System.out.println("Semantic Scholar解析失败: " + e.getMessage());
}
return papers;
}
private static class SemanticScholarResponse {
private List<SemanticScholarPaper> items;
public List<SemanticScholarPaper> getItems() { return items; }
@SuppressWarnings("unused")
public void setItems(List<SemanticScholarPaper> items) { this.items = items; }
}
private static class SemanticScholarPaper {
private String title;
private String url;
private List<SemanticScholarAuthor> authors;
private String abstractText;
public String getTitle() { return title; }
@SuppressWarnings("unused")
public void setTitle(String title) { this.title = title; }
public String getUrl() { return url; }
@SuppressWarnings("unused")
public void setUrl(String url) { this.url = url; }
public List<SemanticScholarAuthor> getAuthors() { return authors; }
@SuppressWarnings("unused")
public void setAuthors(List<SemanticScholarAuthor> authors) { this.authors = authors; }
public String getAbstractText() { return abstractText; }
@SuppressWarnings("unused")
public void setAbstractText(String abstractText) { this.abstractText = abstractText; }
}
private static class SemanticScholarAuthor {
private String name;
public String getName() { return name; }
@SuppressWarnings("unused")
public void setName(String name) { this.name = name; }
}
}
Loading…
Cancel
Save