删除 'project/java-cli/src/main/java/com/example/datacollect/strategy/ChinaStrategy.java'

3 weeks ago · bfc7b46296
1 changed files with 0 additions and 47 deletions
--- a/project/java-cli/src/main/java/com/example/datacollect/strategy/ChinaStrategy.java
+++ b/project/java-cli/src/main/java/com/example/datacollect/strategy/ChinaStrategy.java
@ -1,47 +0,0 @@
 package com.example.datacollect.strategy;
 import com.example.datacollect.exception.ParseException;
 import com.example.datacollect.model.Article;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
 import org.jsoup.select.Elements;
 import java.util.ArrayList;
 import java.util.List;
 public class ChinaStrategy implements CrawlStrategy {
    @Override
    public boolean supports(String url) {
        return url.contains("china.com.cn");
    }
    @Override
    public List<Article> parse(String url, Document doc) throws ParseException {
        List<Article> articles = new ArrayList<>();
        Elements listItems = doc.select("ul:not([class]) li, ul.chinaWorld_txt li");
        for (Element li : listItems) {
            Element link = li.selectFirst("a");
            if (link == null) continue;
            String articleUrl = link.attr("href");
            if (!articleUrl.startsWith("http")) {
                if (articleUrl.startsWith("//")) {
                    articleUrl = "https:" + articleUrl;
                } else if (articleUrl.startsWith("/")) {
                    articleUrl = "https://www.china.com.cn" + articleUrl;
                } else {
                    articleUrl = "https://www.china.com.cn/" + articleUrl;
                }
            }
            String title = link.text().trim();
            String content = "";
            if (!title.isEmpty() && title.length() > 15) {
                articles.add(new Article(title, articleUrl, content));
            }
        }
        return articles;
    }
 }