From e300412c66c5f62269e287be8e8dd8c60ad98cd4 Mon Sep 17 00:00:00 2001 From: YuWeixia Date: Tue, 12 May 2026 11:37:19 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20'w10'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 思考题解答:两个策略都 supports 同一 URL 时怎么办? 解决方案:优先级机制 优势: 1.确定性:多个策略匹配时,总是选择优先级最高的 2.灵活性:可动态调整策略优先级 3.可扩展性:新增策略只需实现接口并设置合适的优先级 4.健壮性:总有默认策略兜底,不会返回 null --- w10/AnalyzeCommand.java | 70 ++++++++++++++++++++++++++++++++++++++ w10/ArticleRepository.java | 45 ++++++++++++++++++++++++ w10/CrawlStrategy.java | 14 ++++++++ 3 files changed, 129 insertions(+) create mode 100644 w10/AnalyzeCommand.java create mode 100644 w10/ArticleRepository.java create mode 100644 w10/CrawlStrategy.java diff --git a/w10/AnalyzeCommand.java b/w10/AnalyzeCommand.java new file mode 100644 index 0000000..f86efab --- /dev/null +++ b/w10/AnalyzeCommand.java @@ -0,0 +1,70 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; + +import java.util.List; + +public class AnalyzeCommand implements Command { + private final ConsoleView view; + private final StrategyFactory strategyFactory; + + public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { + this.view = view; + this.strategyFactory = strategyFactory; + } + + @Override + public String getName() { + return "analyze"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + if (args.length < 2) { + view.printError("Usage: analyze "); + return; + } + String url = args[1]; + + CrawlStrategy strategy = strategyFactory.getStrategy(url); + if (strategy == null) { + view.printError("No strategy found for: " + url); + return; + } + + try { + view.printInfo("Analyzing: " + url); + view.printInfo("Using strategy: " + strategy.getClass().getSimpleName()); + + Document doc = Jsoup.connect(url).get(); + List
articles = strategy.parse(url, doc); + + view.printSuccess("=== Analysis Statistics ==="); + view.printSuccess("URL: " + url); + view.printSuccess("Strategy: " + strategy.getClass().getSimpleName()); + view.printSuccess("Articles found: " + articles.size()); + + if (!articles.isEmpty()) { + view.printSuccess("Sample titles:"); + int count = Math.min(3, articles.size()); + for (int i = 0; i < count; i++) { + view.printSuccess(" - " + articles.get(i).getTitle()); + } + if (articles.size() > 3) { + view.printSuccess(" ... and " + (articles.size() - 3) + " more"); + } + } + + view.printSuccess("Note: Articles were NOT stored in repository"); + + } catch (Exception e) { + view.printError("Failed to analyze: " + e.getMessage()); + } + } +} \ No newline at end of file diff --git a/w10/ArticleRepository.java b/w10/ArticleRepository.java new file mode 100644 index 0000000..747444a --- /dev/null +++ b/w10/ArticleRepository.java @@ -0,0 +1,45 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ArticleRepository { + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + throw new IllegalArgumentException("Article cannot be null"); + } + articles.add(article); + } + + public void addAll(List
articleList) { + if (articleList == null) { + throw new IllegalArgumentException("Article list cannot be null"); + } + for (Article article : articleList) { + if (article == null) { + throw new IllegalArgumentException("Article cannot be null"); + } + articles.add(article); + } + } + + public List
getAll() { + return Collections.unmodifiableList(articles); + } + + public int size() { + return articles.size(); + } + + public void clear() { + articles.clear(); + } + + public boolean isEmpty() { + return articles.isEmpty(); + } +} diff --git a/w10/CrawlStrategy.java b/w10/CrawlStrategy.java new file mode 100644 index 0000000..972b0fb --- /dev/null +++ b/w10/CrawlStrategy.java @@ -0,0 +1,14 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import java.util.List; + +public interface CrawlStrategy { + List
parse(String url, Document doc); + boolean supports(String url); + + default int getPriority() { + return 0; + } +}