From b76d506dec9ab836fcb47a7aee5050d82d69d3a7 Mon Sep 17 00:00:00 2001 From: LeiJuntao <2606542098@qq.com> Date: Sun, 31 May 2026 14:45:31 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=20'project/java-cli/src/main?= =?UTF-8?q?/java/com/example/datacollect/strategy/BlogStrategy.java'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../datacollect/strategy/BlogStrategy.java | 59 ------------------- 1 file changed, 59 deletions(-) delete mode 100644 project/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java deleted file mode 100644 index da351f7..0000000 --- a/project/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java +++ /dev/null @@ -1,59 +0,0 @@ -package com.example.datacollect.strategy; - -import com.example.datacollect.exception.ParseException; -import com.example.datacollect.model.Article; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import java.util.ArrayList; -import java.util.List; - -public class BlogStrategy implements CrawlStrategy { - @Override - public boolean supports(String url) { - return url.contains("blog.example.com"); - } - - @Override - public List
parse(String url, Document doc) throws ParseException { - List
articles = new ArrayList<>(); - Elements listItems = doc.select("ul.blog-list li"); - - for (Element li : listItems) { - Element link = li.selectFirst("a"); - if (link == null) continue; - - String articleUrl = link.attr("href"); - if (!articleUrl.startsWith("http")) { - if (articleUrl.startsWith("//")) { - articleUrl = "https:" + articleUrl; - } else if (articleUrl.startsWith("/")) { - articleUrl = "https://blog.example.com" + articleUrl; - } else { - articleUrl = "https://blog.example.com/" + articleUrl; - } - } - - String title = ""; - Element titleEl = link.selectFirst("h3.post-title"); - if (titleEl != null) { - title = titleEl.text().trim(); - } - if (title.isEmpty()) { - title = link.text().trim(); - } - - String content = ""; - Element contentEl = li.selectFirst("p.post-excerpt"); - if (contentEl != null) { - content = contentEl.text().trim(); - } - - if (!title.isEmpty()) { - articles.add(new Article(title, articleUrl, content)); - } - } - - return articles; - } -} \ No newline at end of file