import java.util.*; class CrawlerException extends RuntimeException { public CrawlerException(String message) { super(message); } public CrawlerException(String message, Throwable cause) { super(message, cause); } } class NetworkException extends CrawlerException { public NetworkException(String message) { super("网络请求失败: " + message); } } class ParseException extends CrawlerException { public ParseException(String message) { super("数据解析失败: " + message); } } class Article { private String title, author, publishDate, content, url; public Article(String t, String a, String pd, String c, String u) { title = t; author = a; publishDate = pd; content = c; url = u; } public String getTitle() { return title; } public String getAuthor() { return author; } public String getPublishDate() { return publishDate; } } class ArticleRepository { private List
articles = new ArrayList<>(); public void add(Article a) { if (a == null) throw new CrawlerException("Article cannot be null"); articles.add(a); } public void addAll(List
list) { if (list == null) throw new CrawlerException("List cannot be null"); articles.addAll(list); } public List
getAll() { return new ArrayList<>(articles); } public int size() { return articles.size(); } public void clear() { articles.clear(); } } interface CrawlStrategy { boolean supports(String url); List
parse(String html) throws ParseException; String getName(); } class BlogStrategy implements CrawlStrategy { public boolean supports(String url) { return url.contains("blog") || url.contains("csdn"); } public List
parse(String html) throws ParseException { List
list = new ArrayList<>(); for (int i = 1; i <= 3; i++) { list.add(new Article("博客文章"+i, "博主"+i, java.time.LocalDate.now().minusDays(i).toString(), "内容"+i, "http://blog.com/"+i)); } return list; } public String getName() { return "BlogStrategy"; } } class NewsStrategy implements CrawlStrategy { public boolean supports(String url) { return url.contains("news") || url.contains("sina"); } public List
parse(String html) throws ParseException { List
list = new ArrayList<>(); for (int i = 1; i <= 3; i++) { list.add(new Article("新闻标题"+i, "记者"+i, java.time.LocalDateTime.now().toString().substring(0,19), "内容"+i, "http://news.com/"+i)); } return list; } public String getName() { return "NewsStrategy"; } } class DefaultStrategy implements CrawlStrategy { public boolean supports(String url) { return true; } public List
parse(String html) throws ParseException { List
list = new ArrayList<>(); list.add(new Article("默认文章", "未知作者", java.time.LocalDate.now().toString(), "默认内容", "http://example.com")); return list; } public String getName() { return "DefaultStrategy"; } } class ScraperService { public String fetch(String url) throws NetworkException { int retries = 3; while (retries-- > 0) { try { System.out.println("[Scraper] 获取: " + url); if (Math.random() > 0.3) return "内容"; throw new RuntimeException("超时"); } catch (Exception e) { if (retries == 0) throw new NetworkException("重试3次失败"); } } throw new NetworkException("失败"); } } interface Command { void execute(String... args); String getName(); String getDescription(); } class CrawlCommand implements Command { private ArticleRepository repo; private List strategies; private ScraperService scraper; public CrawlCommand(ArticleRepository r, ScraperService s) { repo = r; scraper = s; strategies = List.of(new BlogStrategy(), new NewsStrategy(), new DefaultStrategy()); } public void execute(String... args) { if (args.length == 0) { System.out.println("请输入URL"); return; } String url = args[0]; if (!url.startsWith("http")) { System.out.println("无效URL"); return; } try { String html = scraper.fetch(url); CrawlStrategy strategy = strategies.stream().filter(s -> s.supports(url)).findFirst().orElse(new DefaultStrategy()); List
articles = strategy.parse(html); repo.addAll(articles); System.out.println("抓取完成: " + articles.size() + "篇"); } catch (NetworkException | ParseException e) { System.out.println(e.getMessage()); } } public String getName() { return "crawl"; } public String getDescription() { return "抓取URL,别名c"; } } class ListCommand implements Command { private ArticleRepository repo; public ListCommand(ArticleRepository r) { repo = r; } public void execute(String... args) { List
list = repo.getAll(); if (list.isEmpty()) { System.out.println("暂无文章"); return; } for (int i = 0; i < list.size(); i++) { Article a = list.get(i); System.out.printf("[%d] %s - %s (%s)%n", i+1, a.getTitle(), a.getAuthor(), a.getPublishDate()); } } public String getName() { return "list"; } public String getDescription() { return "列出文章"; } } class AnalyzeCommand implements Command { private List strategies; private ScraperService scraper; public AnalyzeCommand(ScraperService s) { scraper = s; strategies = List.of(new BlogStrategy(), new NewsStrategy(), new DefaultStrategy()); } public void execute(String... args) { if (args.length == 0) { System.out.println("请输入URL"); return; } String url = args[0]; if (!url.startsWith("http")) { System.out.println("无效URL"); return; } try { String html = scraper.fetch(url); CrawlStrategy strategy = strategies.stream().filter(s -> s.supports(url)).findFirst().orElse(new DefaultStrategy()); List
articles = strategy.parse(html); System.out.println("=== 分析结果(不存储)==="); System.out.println("URL: " + url + " | 策略: " + strategy.getName() + " | 文章数: " + articles.size()); Map authors = new HashMap<>(); articles.forEach(a -> authors.merge(a.getAuthor(), 1, Integer::sum)); System.out.println("作者分布: " + authors); } catch (NetworkException | ParseException e) { System.out.println(e.getMessage()); } } public String getName() { return "analyze"; } public String getDescription() { return "分析URL但不存储"; } } class HistoryCommand implements Command { private List history; public HistoryCommand(List h) { history = h; } public void execute(String... args) { if (history.isEmpty()) { System.out.println("暂无历史"); return; } System.out.println("命令历史:"); history.forEach(h -> System.out.println(" " + h)); System.out.println("\n=== AI审计 ==="); System.out.println("类名: Article, ArticleRepository, CrawlStrategy, CrawlCommand, Controller"); System.out.println("请检查MVC三层划分是否越权"); } public String getName() { return "history"; } public String getDescription() { return "查看命令历史"; } } class ClearCommand implements Command { private ArticleRepository repo; public ClearCommand(ArticleRepository r) { repo = r; } public void execute(String... args) { repo.clear(); System.out.println("已清空"); } public String getName() { return "clear"; } public String getDescription() { return "清空文章"; } } class ExitCommand implements Command { public void execute(String... args) { System.out.println("退出"); System.exit(0); } public String getName() { return "exit"; } public String getDescription() { return "退出程序"; } } class HelpCommand implements Command { private Map commands; public HelpCommand(Map c) { commands = c; } public void execute(String... args) { System.out.println("可用命令:"); commands.forEach((k, v) -> System.out.printf(" %s - %s%n", k, v.getDescription())); } public String getName() { return "help"; } public String getDescription() { return "显示帮助"; } } class CommandManager { private Map commands = new HashMap<>(); private Map aliases = new HashMap<>(); public void register(Command c) { commands.put(c.getName(), c); } public void alias(String a, String n) { aliases.put(a, n); } public Command get(String n) { return commands.get(aliases.getOrDefault(n, n)); } public boolean has(String n) { return commands.containsKey(aliases.getOrDefault(n, n)); } public Map getAll() { return commands; } } public class App { public static void main(String[] args) { ArticleRepository repo = new ArticleRepository(); ScraperService scraper = new ScraperService(); CommandManager cmdMgr = new CommandManager(); List history = new ArrayList<>(); cmdMgr.register(new CrawlCommand(repo, scraper)); cmdMgr.register(new ListCommand(repo)); cmdMgr.register(new AnalyzeCommand(scraper)); cmdMgr.register(new HistoryCommand(history)); cmdMgr.register(new ClearCommand(repo)); cmdMgr.register(new ExitCommand()); cmdMgr.register(new HelpCommand(cmdMgr.getAll())); cmdMgr.alias("c", "crawl"); Scanner scanner = new Scanner(System.in); System.out.println("========== 命令行工具 =========="); while (true) { System.out.print("> "); String input = scanner.nextLine().trim(); if (input.isEmpty()) continue; history.add(input); String[] parts = input.split("\\s+"); String cmdName = parts[0]; String[] cmdArgs = parts.length > 1 ? Arrays.copyOfRange(parts, 1, parts.length) : new String[0]; if (cmdMgr.has(cmdName)) { cmdMgr.get(cmdName).execute(cmdArgs); } else { System.out.println("未知命令: " + cmdName); } } } }