上传文件至 'w10'

1 month ago · 5da9fc0d12
2 changed files with 104 additions and 0 deletions
--- a/w10/SpringerLinkStrategy.java
+++ b/w10/SpringerLinkStrategy.java
@ -0,0 +1,64 @@
+package strategy;
+
+import model.Paper;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import utils.Utils;
+import java.util.ArrayList;
+import java.util.List;
+
+public class SpringerLinkStrategy extends AbstractCrawlerStrategy {
+    @Override
+    public String getPlatformName() {
+        return "Springer Link";
+    }
+    
+    @Override
+    public boolean supportsUrl(String url) {
+        return url != null && url.contains("link.springer.com");
+    }
+    
+    @Override
+    protected List<Paper> fetchPapers(String url, int count) throws Exception {
+        List<Paper> papers = new ArrayList<>();
+        System.out.println("=== 开始使用Springer Link获取论文 ===");
+        
+        addDelay(2000, 3000);
+        
+        String html = Utils.sendGetRequest(url);
+        if (html.isEmpty()) return papers;
+        
+        Document doc = Jsoup.parse(html);
+        
+        Elements paperElements = doc.select(".result-item");
+        
+        int collected = 0;
+        for (Element element : paperElements) {
+            if (collected >= count) break;
+            
+            try {
+                Element titleElement = element.selectFirst("h2 a");
+                String title = titleElement != null ? titleElement.text() : "";
+                
+                String paperUrl = titleElement != null ? titleElement.attr("href") : "";
+                if (!paperUrl.startsWith("http")) {
+                    paperUrl = "https://link.springer.com" + paperUrl;
+                }
+                
+                Element authorsElement = element.selectFirst(".authors");
+                String authors = authorsElement != null ? authorsElement.text() : "";
+                
+                if (title.length() < 5 || paperUrl.isEmpty()) continue;
+                
+                papers.add(new Paper(title, authors, "", paperUrl, getPlatformName()));
+                collected++;
+            } catch (Exception e) {
+                continue;
+            }
+        }
+        
+        return papers;
+    }
+}
--- a/w10/StrategyFactory.java
+++ b/w10/StrategyFactory.java
@ -0,0 +1,40 @@
+package strategy;
+
+import java.util.List;
+import java.util.ArrayList;
+
+public class StrategyFactory {
+    private final List<CrawlerStrategy> STRATEGIES = new ArrayList<>();
+        
+    public StrategyFactory() {
+        STRATEGIES.add(new CNKIStrategy());
+        STRATEGIES.add(new IEEEStrategy());
+        STRATEGIES.add(new ACMDigitalLibraryStrategy());
+        STRATEGIES.add(new SpringerLinkStrategy());
+        STRATEGIES.add(new ScienceDirectStrategy());
+        STRATEGIES.add(new ArXivStrategy());
+        STRATEGIES.add(new SemanticScholarStrategy());
+    }
+
+    /**
+     * 根据URL创建对应的爬虫策略
+     * @param url 要创建策略的URL
+     * @return 对应的爬虫策略，如果URL不支持任何策略则返回null
+     */
+    public CrawlerStrategy createCrawlerByUrl(String url) {
+        for (CrawlerStrategy strategy : STRATEGIES) {
+            if (strategy.supportsUrl(url)) {
+                return strategy;
+            }
+        }
+        return null;
+    }
+    
+    public int getPlatformCount() {
+        return STRATEGIES.size();
+    }
+    
+    public List<CrawlerStrategy> getAllStrategies() {
+        return STRATEGIES;
+    }
+}