package com.example.datacollect.command; import com.example.datacollect.model.Article; import com.example.datacollect.repository.ArticleRepository; import com.example.datacollect.strategy.CrawlStrategy; import com.example.datacollect.strategy.StrategyFactory; import com.example.datacollect.view.ConsoleView; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.stream.Collectors; public class AnalyzeCommand implements Command { private final ConsoleView view; private final StrategyFactory strategyFactory; public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { this.view = view; this.strategyFactory = strategyFactory; } @Override public String getName() { return "analyze"; } @Override public String getDescription() { return "analyze - 分析URL页面内容,输出统计信息(不保存)"; } @Override public void execute(String[] args, ArticleRepository repository) { if (args.length < 2) { view.printError("Usage: analyze "); return; } String url = args[1]; CrawlStrategy strategy = strategyFactory.getStrategy(url); if (strategy == null) { view.printError("No strategy found for: " + url); return; } try { view.printInfo("Analyzing: " + url); Document doc = Jsoup.connect(url).get(); List
articles = strategy.parse(url, doc); printStatistics(articles, url); } catch (Exception e) { view.printError("Failed to analyze: " + e.getMessage()); } } private void printStatistics(List
articles, String url) { view.printSuccess("=== 分析报告 ==="); view.printInfo("解析策略: " + strategyFactory.getStrategy(url).getClass().getSimpleName()); view.printInfo("URL: " + url); view.printInfo("文章数量: " + articles.size()); if (articles.isEmpty()) { view.printInfo("未解析到任何文章"); return; } int emptyTitles = 0; int minTitleLength = Integer.MAX_VALUE; int maxTitleLength = 0; int totalTitleLength = 0; for (Article article : articles) { String title = article.getTitle(); if (title == null || title.trim().isEmpty()) { emptyTitles++; } else { int len = title.length(); minTitleLength = Math.min(minTitleLength, len); maxTitleLength = Math.max(maxTitleLength, len); totalTitleLength += len; } } view.printInfo("--- 标题统计 ---"); if (emptyTitles > 0) { view.printInfo("空标题数量: " + emptyTitles); } view.printInfo("最短标题长度: " + (minTitleLength == Integer.MAX_VALUE ? 0 : minTitleLength)); view.printInfo("最长标题长度: " + maxTitleLength); view.printInfo("平均标题长度: " + String.format("%.1f", (double) totalTitleLength / (articles.size() - emptyTitles))); String domain = extractDomain(url); Map domainDistribution = articles.stream() .map(a -> extractDomain(a.getUrl())) .collect(Collectors.groupingBy(d -> d, Collectors.counting())); view.printInfo("--- 来源域名分布 ---"); for (Map.Entry entry : domainDistribution.entrySet()) { view.printInfo(" " + entry.getKey() + ": " + entry.getValue() + " 篇"); } } private String extractDomain(String url) { try { int start = url.indexOf("://"); if (start == -1) return "unknown"; int end = url.indexOf("/", start + 3); if (end == -1) return url.substring(start + 3); return url.substring(start + 3, end); } catch (Exception e) { return "unknown"; } } }