From 97f9d049bfd7ed1aa6c571a303ff467c62dc53a9 Mon Sep 17 00:00:00 2001 From: LeiJuntao <2606542098@qq.com> Date: Sun, 31 May 2026 14:45:47 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=20'project/java-cli/src/main?= =?UTF-8?q?/java/com/example/datacollect/strategy/GmwStrategy.java'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../datacollect/strategy/GmwStrategy.java | 49 ------------------- 1 file changed, 49 deletions(-) delete mode 100644 project/java-cli/src/main/java/com/example/datacollect/strategy/GmwStrategy.java diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/GmwStrategy.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/GmwStrategy.java deleted file mode 100644 index b9e0231..0000000 --- a/project/java-cli/src/main/java/com/example/datacollect/strategy/GmwStrategy.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.example.datacollect.strategy; - -import java.util.ArrayList; -import java.util.List; - -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - -import com.example.datacollect.exception.ParseException; -import com.example.datacollect.model.Article; - -public class GmwStrategy implements CrawlStrategy { - @Override - public boolean supports(String url) { - return url.contains("gmw.cn"); - } - - @Override - public List
parse(String url, Document doc) throws ParseException { - List
articles = new ArrayList<>(); - Elements listItems = doc.select("ul.m_ulList li"); - - for (Element li : listItems) { - Element link = li.selectFirst("a"); - if (link == null) continue; - - String articleUrl = link.attr("href"); - if (!articleUrl.startsWith("http")) { - if (articleUrl.startsWith("//")) { - articleUrl = "https:" + articleUrl; - } else if (articleUrl.startsWith("/")) { - articleUrl = "https://www.gmw.cn" + articleUrl; - } else { - articleUrl = "https://www.gmw.cn/" + articleUrl; - } - } - - String title = link.text().trim(); - String content = ""; - - if (!title.isEmpty() && title.length() > 10) { - articles.add(new Article(title, articleUrl, content)); - } - } - - return articles; - } -} \ No newline at end of file