diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java deleted file mode 100644 index 59526e8..0000000 --- a/project/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.example.datacollect.strategy; - -import com.example.datacollect.exception.ParseException; -import com.example.datacollect.model.Article; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import java.util.ArrayList; -import java.util.List; - -public class HnuNewsStrategy implements CrawlStrategy { - @Override - public boolean supports(String url) { - return url.contains("news.hnu.edu.cn"); - } - - @Override - public List
parse(String url, Document doc) throws ParseException { - List
articles = new ArrayList<>(); - Elements listItems = doc.select("ul.list3 li, ul.list6 li"); - - for (Element li : listItems) { - Element link = li.selectFirst("a"); - if (link == null) continue; - - String articleUrl = link.attr("href"); - if (!articleUrl.startsWith("http")) { - articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", ""); - } - - String title = link.text().trim(); - String content = ""; - - if (!title.isEmpty()) { - articles.add(new Article(title, articleUrl, content)); - } - } - - return articles; - } -}