From 0faf992175edc870aecfee700cf1cefc506702cd Mon Sep 17 00:00:00 2001 From: LeiJuntao <2606542098@qq.com> Date: Sun, 31 May 2026 15:14:49 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=20'project/java-cli/src/main?= =?UTF-8?q?/java/com/example/datacollect/strategy/HnuNewsStrategy.java'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../datacollect/strategy/HnuNewsStrategy.java | 41 ------------------- 1 file changed, 41 deletions(-) delete mode 100644 project/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java deleted file mode 100644 index 59526e8..0000000 --- a/project/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.example.datacollect.strategy; - -import com.example.datacollect.exception.ParseException; -import com.example.datacollect.model.Article; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import java.util.ArrayList; -import java.util.List; - -public class HnuNewsStrategy implements CrawlStrategy { - @Override - public boolean supports(String url) { - return url.contains("news.hnu.edu.cn"); - } - - @Override - public List
parse(String url, Document doc) throws ParseException { - List
articles = new ArrayList<>(); - Elements listItems = doc.select("ul.list3 li, ul.list6 li"); - - for (Element li : listItems) { - Element link = li.selectFirst("a"); - if (link == null) continue; - - String articleUrl = link.attr("href"); - if (!articleUrl.startsWith("http")) { - articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", ""); - } - - String title = link.text().trim(); - String content = ""; - - if (!title.isEmpty()) { - articles.add(new Article(title, articleUrl, content)); - } - } - - return articles; - } -}