From 92307697fd8e4784ddb4ddf99f192d97e285e72c Mon Sep 17 00:00:00 2001 From: LeiJuntao <2606542098@qq.com> Date: Sun, 31 May 2026 15:14:33 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=20'project/java-cli/src/main?= =?UTF-8?q?/java/com/example/datacollect/strategy/CCTVStrategy.java'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../datacollect/strategy/CCTVStrategy.java | 49 ------------------- 1 file changed, 49 deletions(-) delete mode 100644 project/java-cli/src/main/java/com/example/datacollect/strategy/CCTVStrategy.java diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/CCTVStrategy.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/CCTVStrategy.java deleted file mode 100644 index 23e17a0..0000000 --- a/project/java-cli/src/main/java/com/example/datacollect/strategy/CCTVStrategy.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.example.datacollect.strategy; - -import java.util.ArrayList; -import java.util.List; - -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - -import com.example.datacollect.exception.ParseException; -import com.example.datacollect.model.Article; - -public class CCTVStrategy implements CrawlStrategy { - @Override - public boolean supports(String url) { - return url.contains("cctv.com"); - } - - @Override - public List
parse(String url, Document doc) throws ParseException { - List
articles = new ArrayList<>(); - Elements listItems = doc.select("ul:not([class]) li"); - - for (Element li : listItems) { - Element link = li.selectFirst("a"); - if (link == null) continue; - - String articleUrl = link.attr("href"); - if (!articleUrl.startsWith("http")) { - if (articleUrl.startsWith("//")) { - articleUrl = "https:" + articleUrl; - } else if (articleUrl.startsWith("/")) { - articleUrl = "https://www.cctv.com" + articleUrl; - } else { - articleUrl = "https://www.cctv.com/" + articleUrl; - } - } - - String title = link.text().trim(); - String content = ""; - - if (!title.isEmpty() && title.length() > 10) { - articles.add(new Article(title, articleUrl, content)); - } - } - - return articles; - } -} \ No newline at end of file