From 6ba9fcf0c5c07dd08182e32e958eac55b9d09650 Mon Sep 17 00:00:00 2001 From: JiangYouhan <3080587852@qq.com> Date: Sat, 30 May 2026 11:36:23 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20'w11'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- w11/App.java | 253 ++++++++++++++++++++++++++++++++++++++++++++++++++ w11/Main.java | 33 +++++++ 2 files changed, 286 insertions(+) create mode 100644 w11/App.java create mode 100644 w11/Main.java diff --git a/w11/App.java b/w11/App.java new file mode 100644 index 0000000..a55e83f --- /dev/null +++ b/w11/App.java @@ -0,0 +1,253 @@ +import java.util.*; + +class CrawlerException extends RuntimeException { + public CrawlerException(String message) { super(message); } + public CrawlerException(String message, Throwable cause) { super(message, cause); } +} + +class NetworkException extends CrawlerException { + public NetworkException(String message) { super("网络请求失败: " + message); } +} + +class ParseException extends CrawlerException { + public ParseException(String message) { super("数据解析失败: " + message); } +} + +class Article { + private String title, author, publishDate, content, url; + public Article(String t, String a, String pd, String c, String u) { + title = t; author = a; publishDate = pd; content = c; url = u; + } + public String getTitle() { return title; } + public String getAuthor() { return author; } + public String getPublishDate() { return publishDate; } +} + +class ArticleRepository { + private List
articles = new ArrayList<>(); + public void add(Article a) { if (a == null) throw new CrawlerException("Article cannot be null"); articles.add(a); } + public void addAll(List
list) { if (list == null) throw new CrawlerException("List cannot be null"); articles.addAll(list); } + public List
getAll() { return new ArrayList<>(articles); } + public int size() { return articles.size(); } + public void clear() { articles.clear(); } +} + +interface CrawlStrategy { + boolean supports(String url); + List
parse(String html) throws ParseException; + String getName(); +} + +class BlogStrategy implements CrawlStrategy { + public boolean supports(String url) { return url.contains("blog") || url.contains("csdn"); } + public List
parse(String html) throws ParseException { + List
list = new ArrayList<>(); + for (int i = 1; i <= 3; i++) { + list.add(new Article("博客文章"+i, "博主"+i, + java.time.LocalDate.now().minusDays(i).toString(), "内容"+i, "http://blog.com/"+i)); + } + return list; + } + public String getName() { return "BlogStrategy"; } +} + +class NewsStrategy implements CrawlStrategy { + public boolean supports(String url) { return url.contains("news") || url.contains("sina"); } + public List
parse(String html) throws ParseException { + List
list = new ArrayList<>(); + for (int i = 1; i <= 3; i++) { + list.add(new Article("新闻标题"+i, "记者"+i, + java.time.LocalDateTime.now().toString().substring(0,19), "内容"+i, "http://news.com/"+i)); + } + return list; + } + public String getName() { return "NewsStrategy"; } +} + +class DefaultStrategy implements CrawlStrategy { + public boolean supports(String url) { return true; } + public List
parse(String html) throws ParseException { + List
list = new ArrayList<>(); + list.add(new Article("默认文章", "未知作者", + java.time.LocalDate.now().toString(), "默认内容", "http://example.com")); + return list; + } + public String getName() { return "DefaultStrategy"; } +} + +class ScraperService { + public String fetch(String url) throws NetworkException { + int retries = 3; + while (retries-- > 0) { + try { + System.out.println("[Scraper] 获取: " + url); + if (Math.random() > 0.3) return "内容"; + throw new RuntimeException("超时"); + } catch (Exception e) { if (retries == 0) throw new NetworkException("重试3次失败"); } + } + throw new NetworkException("失败"); + } +} + +interface Command { + void execute(String... args); + String getName(); + String getDescription(); +} + +class CrawlCommand implements Command { + private ArticleRepository repo; + private List strategies; + private ScraperService scraper; + + public CrawlCommand(ArticleRepository r, ScraperService s) { + repo = r; scraper = s; + strategies = List.of(new BlogStrategy(), new NewsStrategy(), new DefaultStrategy()); + } + + public void execute(String... args) { + if (args.length == 0) { System.out.println("请输入URL"); return; } + String url = args[0]; + if (!url.startsWith("http")) { System.out.println("无效URL"); return; } + try { + String html = scraper.fetch(url); + CrawlStrategy strategy = strategies.stream().filter(s -> s.supports(url)).findFirst().orElse(new DefaultStrategy()); + List
articles = strategy.parse(html); + repo.addAll(articles); + System.out.println("抓取完成: " + articles.size() + "篇"); + } catch (NetworkException | ParseException e) { System.out.println(e.getMessage()); } + } + public String getName() { return "crawl"; } + public String getDescription() { return "抓取URL,别名c"; } +} + +class ListCommand implements Command { + private ArticleRepository repo; + public ListCommand(ArticleRepository r) { repo = r; } + public void execute(String... args) { + List
list = repo.getAll(); + if (list.isEmpty()) { System.out.println("暂无文章"); return; } + for (int i = 0; i < list.size(); i++) { + Article a = list.get(i); + System.out.printf("[%d] %s - %s (%s)%n", i+1, a.getTitle(), a.getAuthor(), a.getPublishDate()); + } + } + public String getName() { return "list"; } + public String getDescription() { return "列出文章"; } +} + +class AnalyzeCommand implements Command { + private List strategies; + private ScraperService scraper; + + public AnalyzeCommand(ScraperService s) { + scraper = s; + strategies = List.of(new BlogStrategy(), new NewsStrategy(), new DefaultStrategy()); + } + + public void execute(String... args) { + if (args.length == 0) { System.out.println("请输入URL"); return; } + String url = args[0]; + if (!url.startsWith("http")) { System.out.println("无效URL"); return; } + try { + String html = scraper.fetch(url); + CrawlStrategy strategy = strategies.stream().filter(s -> s.supports(url)).findFirst().orElse(new DefaultStrategy()); + List
articles = strategy.parse(html); + System.out.println("=== 分析结果(不存储)==="); + System.out.println("URL: " + url + " | 策略: " + strategy.getName() + " | 文章数: " + articles.size()); + Map authors = new HashMap<>(); + articles.forEach(a -> authors.merge(a.getAuthor(), 1, Integer::sum)); + System.out.println("作者分布: " + authors); + } catch (NetworkException | ParseException e) { System.out.println(e.getMessage()); } + } + public String getName() { return "analyze"; } + public String getDescription() { return "分析URL但不存储"; } +} + +class HistoryCommand implements Command { + private List history; + public HistoryCommand(List h) { history = h; } + public void execute(String... args) { + if (history.isEmpty()) { System.out.println("暂无历史"); return; } + System.out.println("命令历史:"); + history.forEach(h -> System.out.println(" " + h)); + System.out.println("\n=== AI审计 ==="); + System.out.println("类名: Article, ArticleRepository, CrawlStrategy, CrawlCommand, Controller"); + System.out.println("请检查MVC三层划分是否越权"); + } + public String getName() { return "history"; } + public String getDescription() { return "查看命令历史"; } +} + +class ClearCommand implements Command { + private ArticleRepository repo; + public ClearCommand(ArticleRepository r) { repo = r; } + public void execute(String... args) { repo.clear(); System.out.println("已清空"); } + public String getName() { return "clear"; } + public String getDescription() { return "清空文章"; } +} + +class ExitCommand implements Command { + public void execute(String... args) { System.out.println("退出"); System.exit(0); } + public String getName() { return "exit"; } + public String getDescription() { return "退出程序"; } +} + +class HelpCommand implements Command { + private Map commands; + public HelpCommand(Map c) { commands = c; } + public void execute(String... args) { + System.out.println("可用命令:"); + commands.forEach((k, v) -> System.out.printf(" %s - %s%n", k, v.getDescription())); + } + public String getName() { return "help"; } + public String getDescription() { return "显示帮助"; } +} + +class CommandManager { + private Map commands = new HashMap<>(); + private Map aliases = new HashMap<>(); + public void register(Command c) { commands.put(c.getName(), c); } + public void alias(String a, String n) { aliases.put(a, n); } + public Command get(String n) { return commands.get(aliases.getOrDefault(n, n)); } + public boolean has(String n) { return commands.containsKey(aliases.getOrDefault(n, n)); } + public Map getAll() { return commands; } +} + +public class App { + public static void main(String[] args) { + ArticleRepository repo = new ArticleRepository(); + ScraperService scraper = new ScraperService(); + CommandManager cmdMgr = new CommandManager(); + List history = new ArrayList<>(); + + cmdMgr.register(new CrawlCommand(repo, scraper)); + cmdMgr.register(new ListCommand(repo)); + cmdMgr.register(new AnalyzeCommand(scraper)); + cmdMgr.register(new HistoryCommand(history)); + cmdMgr.register(new ClearCommand(repo)); + cmdMgr.register(new ExitCommand()); + cmdMgr.register(new HelpCommand(cmdMgr.getAll())); + cmdMgr.alias("c", "crawl"); + + Scanner scanner = new Scanner(System.in); + System.out.println("========== 命令行工具 =========="); + + while (true) { + System.out.print("> "); + String input = scanner.nextLine().trim(); + if (input.isEmpty()) continue; + + history.add(input); + String[] parts = input.split("\\s+"); + String cmdName = parts[0]; + String[] cmdArgs = parts.length > 1 ? Arrays.copyOfRange(parts, 1, parts.length) : new String[0]; + + if (cmdMgr.has(cmdName)) { + cmdMgr.get(cmdName).execute(cmdArgs); + } else { + System.out.println("未知命令: " + cmdName); + } + } + } +} diff --git a/w11/Main.java b/w11/Main.java new file mode 100644 index 0000000..9f29aff --- /dev/null +++ b/w11/Main.java @@ -0,0 +1,33 @@ +import com.example.datacollect.*; + +public class Main { + public static void main(String[] args) { + CrawlStrategy strategyA = new ASiteCrawlStrategyImpl(); + CrawlStrategy strategyB = new BSiteCrawlStrategyImpl(); + CrawlStrategy strategyC = new CSiteCrawlStrategyImpl(); + CrawlStrategy strategyD = new DSiteCrawlStrategyImpl(); + + strategyA.crawl("http://www.example-a.com"); + strategyB.crawl("http://www.example-b.com"); + strategyC.crawl("http://www.example-c.com"); + strategyD.crawl("http://www.example-d.com"); + + try { + throw new NetworkException("连接超时"); + } catch (CrawlException e) { + System.out.println("捕获异常: " + e.getMessage()); + } + + try { + throw new ParseException("HTML格式错误"); + } catch (CrawlException e) { + System.out.println("捕获异常: " + e.getMessage()); + } + + try { + throw new UnsupportedSiteException("UNKNOWN"); + } catch (CrawlException e) { + System.out.println("捕获异常: " + e.getMessage()); + } + } +}