diff --git a/w10-巫秋雨-202506050320/AnalyzeCommand.java b/w10-巫秋雨-202506050320/AnalyzeCommand.java new file mode 100644 index 0000000..65d14b9 --- /dev/null +++ b/w10-巫秋雨-202506050320/AnalyzeCommand.java @@ -0,0 +1,90 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import java.io.IOException; +import java.util.List; + +public class AnalyzeCommand implements Command { + + private final StrategyFactory strategyFactory; + + public AnalyzeCommand(StrategyFactory strategyFactory) { + this.strategyFactory = strategyFactory; + } + + // 1. 实现 Command 接口的 getName() 方法 + @Override + public String getName() { + return "analyze"; + } + + // 2. 实现 Command 接口的 execute() 方法(必须和接口签名一致,带 ArticleRepository 参数) + @Override + public void execute(String[] args, ArticleRepository repository) { + // 参数校验:用户必须传入要分析的URL + if (args.length < 2) { + System.out.println("[错误] 请输入要分析的URL,格式:analyze "); + return; + } + String url = args[1]; + + // 用工厂匹配对应的解析策略 + CrawlStrategy strategy = strategyFactory.getStrategy(url); + if (strategy == null) { + System.out.println("[错误] 不支持解析该URL:" + url); + return; + } + + // 爬取网页源码 + Document doc; + try { + doc = Jsoup.connect(url) + .timeout(5000) + .get(); + } catch (IOException e) { + System.out.println("[错误] 网页爬取失败:" + e.getMessage()); + return; + } + + // ====================== 作业核心逻辑 ====================== + // 复用策略解析网页,但不存入仓库(不调用 repository.add 或 addAll) + List
articles = strategy.parse(url, doc); + + // 输出统计信息 + printAnalysisResult(articles, url); + // ========================================================== + } + + /** + * 打印分析结果:统计文章数量、标题平均长度等信息 + */ + private void printAnalysisResult(List
articles, String url) { + System.out.println("\n===== 网页分析结果(不存储数据) ====="); + System.out.println("分析URL:" + url); + System.out.println("解析到文章数量:" + articles.size()); + + if (!articles.isEmpty()) { + // 统计标题平均长度 + int totalTitleLength = 0; + for (Article article : articles) { + totalTitleLength += article.getTitle().length(); + } + double avgTitleLength = (double) totalTitleLength / articles.size(); + System.out.printf("文章标题平均长度:%.2f 字符\n", avgTitleLength); + + // 打印前3篇文章标题预览 + System.out.println("\n前3篇文章标题预览:"); + for (int i = 0; i < Math.min(3, articles.size()); i++) { + System.out.println((i + 1) + ". " + articles.get(i).getTitle()); + } + } else { + System.out.println("未解析到任何文章内容"); + } + System.out.println("=====================================\n"); + } +} \ No newline at end of file diff --git a/w10-巫秋雨-202506050320/ArticleRepository.java b/w10-巫秋雨-202506050320/ArticleRepository.java new file mode 100644 index 0000000..2ea96b7 --- /dev/null +++ b/w10-巫秋雨-202506050320/ArticleRepository.java @@ -0,0 +1,38 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ArticleRepository { + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + throw new IllegalArgumentException("Article cannot be null"); + } + articles.add(article); + } + + public void addAll(List
articleList) { + if (articleList == null) { + throw new IllegalArgumentException("列表不能为 null"); + } + for (Article article : articleList) { + add(article); // 复用上面的 add,自动防 null + } + } + + public List
getAll() { + return Collections.unmodifiableList(articles); + } + + public int size() { + return articles.size(); + } + + public void clear() { + articles.clear(); + } +}