From 2ae3ae04ad9857901d2de4bf81b53d047ff38044 Mon Sep 17 00:00:00 2001 From: wanglixia <3035026499@qq.com> Date: Sun, 31 May 2026 15:51:46 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BD=9C=E4=B8=9A?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/workspace.xml | 3 +- w10/AnalyzeCommand.java | 76 ++++++++++++++++++++++++++++ w10/ArticleRepository.java | 43 ++++++++++++++++ w11/RetryUtils.java | 40 +++++++++++++++ w11/UrlFormatException.java | 15 ++++++ w9/Main.java | 23 +++++++++ w9/command/Command.java | 9 ++++ w9/command/CrawlCommand.java | 51 +++++++++++++++++++ w9/command/ExitCommand.java | 24 +++++++++ w9/command/HelpCommand.java | 28 ++++++++++ w9/command/HistoryCommand.java | 32 ++++++++++++ w9/command/ListCommand.java | 23 +++++++++ w9/controller/CrawlerController.java | 57 +++++++++++++++++++++ w9/model/Article.java | 46 +++++++++++++++++ w9/view/ConsoleView.java | 43 ++++++++++++++++ 15 files changed, 512 insertions(+), 1 deletion(-) create mode 100644 w10/AnalyzeCommand.java create mode 100644 w10/ArticleRepository.java create mode 100644 w11/RetryUtils.java create mode 100644 w11/UrlFormatException.java create mode 100644 w9/Main.java create mode 100644 w9/command/Command.java create mode 100644 w9/command/CrawlCommand.java create mode 100644 w9/command/ExitCommand.java create mode 100644 w9/command/HelpCommand.java create mode 100644 w9/command/HistoryCommand.java create mode 100644 w9/command/ListCommand.java create mode 100644 w9/controller/CrawlerController.java create mode 100644 w9/model/Article.java create mode 100644 w9/view/ConsoleView.java diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 0d37eff..6cb3448 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -2,7 +2,7 @@ - + diff --git a/w10/AnalyzeCommand.java b/w10/AnalyzeCommand.java new file mode 100644 index 0000000..db1a5f5 --- /dev/null +++ b/w10/AnalyzeCommand.java @@ -0,0 +1,76 @@ +package com.example.datacollect.command; + +import com.example.datacollect.factory.StrategyFactory; +import com.example.datacollect.model.Article; +import com.example.datacollect.view.ConsoleView; +import java.util.List; +import java.util.regex.Pattern; + +public class AnalyzeCommand implements Command { + // URL 格式校验正则(和 CrawlCommand 保持一致) + private static final Pattern URL_PATTERN = + Pattern.compile("^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)*$"); + + private final ConsoleView view; + private final StrategyFactory strategyFactory; + + // 构造方法:只依赖 View 和 StrategyFactory,不依赖 Repository + public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { + this.view = view; + this.strategyFactory = strategyFactory; + } + + @Override + public String getName() { + // 命令名:analyze + return "analyze"; + } + + @Override + public void execute(String[] args, List
unused) { + // 1. 校验参数 + if (args.length < 2) { + view.printError("用法:analyze "); + return; + } + + String url = args[1]; + + // 2. 校验 URL 格式 + if (!isValidUrl(url)) { + view.printError("无效的 URL 格式:" + url); + return; + } + + try { + // 3. 复用 StrategyFactory 获取策略,解析 URL + List
parsedArticles = strategyFactory.getStrategy(url).crawl(url); + + // 4. 关键:只输出统计信息,不存入 ArticleRepository + printAnalysisResult(url, parsedArticles); + + } catch (Exception e) { + view.printError("解析失败:" + e.getMessage()); + } + } + + /** + * 只输出解析结果,不修改任何数据存储 + */ + private void printAnalysisResult(String url, List
articles) { + view.printInfo("===== 解析统计结果 ====="); + view.printInfo("目标 URL:" + url); + view.printInfo("解析到文章数量:" + articles.size()); + + if (!articles.isEmpty()) { + Article first = articles.get(0); + view.printInfo("首篇文章标题:" + first.getTitle()); + view.printInfo("首篇文章作者:" + first.getAuthor()); + view.printInfo("首篇发布日期:" + first.getPublishDate()); + } + } + + private boolean isValidUrl(String url) { + return url != null && URL_PATTERN.matcher(url).matches(); + } +} \ No newline at end of file diff --git a/w10/ArticleRepository.java b/w10/ArticleRepository.java new file mode 100644 index 0000000..a0f3d51 --- /dev/null +++ b/w10/ArticleRepository.java @@ -0,0 +1,43 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ArticleRepository { + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + throw new IllegalArgumentException("Article cannot be null"); + } + articles.add(article); + } + + public void addAll(List
newArticles) { + // 防御 null:传入的集合不能为 null + if (newArticles == null) { + return; + } + // 遍历添加,同时防御集合中的 null 元素 + for (Article article : newArticles) { + if (article != null) { + articles.add(article); + } + } + } + + public List
getAll() { + // 返回不可修改集合(作业要求:防止外部篡改) + return Collections.unmodifiableList(articles); + } + + public int size() { + return articles.size(); + } + + public void clear() { + articles.clear(); + } +} diff --git a/w11/RetryUtils.java b/w11/RetryUtils.java new file mode 100644 index 0000000..6d2eaea --- /dev/null +++ b/w11/RetryUtils.java @@ -0,0 +1,40 @@ +package com.example.datacollect.utils; + +/** + * 指数退避重试工具类 + * wait = 500 * 2^attempt + */ +public class RetryUtils { + + // 基础延迟 500ms + private static final long BASE_DELAY_MS = 500; + + @FunctionalInterface + public interface RetryTask { + T run() throws Exception; + } + + /** + * 执行带指数退避的重试 + * @param maxRetries 最大重试次数(不含第一次) + * @param task 要执行的任务 + * @return 执行结果 + * @throws Exception 最后一次失败抛出 + */ + public static T retry(int maxRetries, RetryTask task) throws Exception { + int attempt = 0; + while (true) { + try { + return task.run(); + } catch (Exception e) { + if (attempt >= maxRetries) { + throw e; // 重试次数用完,抛出 + } + // 指数退避:500 * 2^attempt + long delay = BASE_DELAY_MS * (1L << attempt); + Thread.sleep(delay); + attempt++; + } + } + } +} \ No newline at end of file diff --git a/w11/UrlFormatException.java b/w11/UrlFormatException.java new file mode 100644 index 0000000..75ae802 --- /dev/null +++ b/w11/UrlFormatException.java @@ -0,0 +1,15 @@ +package com.example.datacollect.exception; + +/** + * URL 格式错误异常 + */ +public class UrlFormatException extends RuntimeException { + + public UrlFormatException(String message) { + super(message); + } + + public UrlFormatException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/w9/Main.java b/w9/Main.java new file mode 100644 index 0000000..eb27a55 --- /dev/null +++ b/w9/Main.java @@ -0,0 +1,23 @@ +package com.example.datacollect.w9; + +import com.example.datacollect.w9.controller.CrawlerController; +import com.example.datacollect.w9.model.Article; +import com.example.datacollect.w9.view.ConsoleView; +import java.util.ArrayList; +import java.util.List; + +public class Main { + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + List
articles = new ArrayList<>(); + CrawlerController controller = new CrawlerController(view, articles); + + view.printInfo("=== 文章爬虫系统已启动 ==="); + view.printInfo("输入 help 查看命令"); + + while (true) { + String input = view.readLine(); + controller.handle(input); + } + } +} diff --git a/w9/command/Command.java b/w9/command/Command.java new file mode 100644 index 0000000..0f3005e --- /dev/null +++ b/w9/command/Command.java @@ -0,0 +1,9 @@ +package com.example.datacollect.w9.command; + +import com.example.datacollect.w9.model.Article; +import java.util.List; + +public interface Command { + String getName(); + void execute(String[] args, List
articles); +} diff --git a/w9/command/CrawlCommand.java b/w9/command/CrawlCommand.java new file mode 100644 index 0000000..1c0af15 --- /dev/null +++ b/w9/command/CrawlCommand.java @@ -0,0 +1,51 @@ +package com.example.datacollect.w9.command; + +import com.example.datacollect.w9.model.Article; +import com.example.datacollect.w9.view.ConsoleView; +import java.util.List; +import java.util.regex.Pattern; + +public class CrawlCommand implements Command { + private static final Pattern URL_PATTERN = + Pattern.compile("^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)*$"); + + private final ConsoleView view; + private final List
articles; + + public CrawlCommand(ConsoleView view, List
articles) { + this.view = view; + this.articles = articles; + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public void execute(String[] args, List
articles) { + if (args.length < 2) { + view.printError("用法:crawl "); + return; + } + String url = args[1]; + if (!isValidUrl(url)) { + view.printError("URL 格式不正确"); + return; + } + // 模拟爬取 + Article art = new Article( + "模拟标题-" + (articles.size() + 1), + url, + "模拟正文内容", + "模拟作者", + "2026-05-31" + ); + articles.add(art); + view.printSuccess("爬取成功:" + art.getTitle()); + } + + private boolean isValidUrl(String url) { + return url != null && URL_PATTERN.matcher(url).matches(); + } +} \ No newline at end of file diff --git a/w9/command/ExitCommand.java b/w9/command/ExitCommand.java new file mode 100644 index 0000000..f151b2d --- /dev/null +++ b/w9/command/ExitCommand.java @@ -0,0 +1,24 @@ +package com.example.datacollect.w9.command; + +import com.example.datacollect.w9.model.Article; +import com.example.datacollect.w9.view.ConsoleView; +import java.util.List; + +public class ExitCommand implements Command { + private final ConsoleView view; + + public ExitCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "exit"; + } + + @Override + public void execute(String[] args, List
articles) { + view.printInfo("程序退出"); + System.exit(0); + } +} \ No newline at end of file diff --git a/w9/command/HelpCommand.java b/w9/command/HelpCommand.java new file mode 100644 index 0000000..4d97e89 --- /dev/null +++ b/w9/command/HelpCommand.java @@ -0,0 +1,28 @@ +package com.example.datacollect.w9.command; + +import com.example.datacollect.w9.model.Article; +import com.example.datacollect.w9.view.ConsoleView; +import java.util.List; + +public class HelpCommand implements Command { + private final ConsoleView view; + + public HelpCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "help"; + } + + @Override + public void execute(String[] args, List
articles) { + view.printInfo("=== 可用命令 ==="); + System.out.println("help 显示帮助"); + System.out.println("list 列出所有文章"); + System.out.println("crawl 爬取文章(或简写 c )"); + System.out.println("history 查看命令历史"); + System.out.println("exit 退出程序"); + } +} \ No newline at end of file diff --git a/w9/command/HistoryCommand.java b/w9/command/HistoryCommand.java new file mode 100644 index 0000000..9371eb5 --- /dev/null +++ b/w9/command/HistoryCommand.java @@ -0,0 +1,32 @@ +package com.example.datacollect.w9.command; + +import com.example.datacollect.w9.model.Article; +import com.example.datacollect.w9.view.ConsoleView; +import java.util.List; + +public class HistoryCommand implements Command { + private final ConsoleView view; + private final List history; + + public HistoryCommand(ConsoleView view, List history) { + this.view = view; + this.history = history; + } + + @Override + public String getName() { + return "history"; + } + + @Override + public void execute(String[] args, List
articles) { + if (history.isEmpty()) { + view.printInfo("暂无历史记录"); + return; + } + view.printInfo("=== 命令历史 ==="); + for (int i = 0; i < history.size(); i++) { + System.out.println((i + 1) + ". " + history.get(i)); + } + } +} \ No newline at end of file diff --git a/w9/command/ListCommand.java b/w9/command/ListCommand.java new file mode 100644 index 0000000..ce94ee3 --- /dev/null +++ b/w9/command/ListCommand.java @@ -0,0 +1,23 @@ +package com.example.datacollect.w9.command; + +import com.example.datacollect.w9.model.Article; +import com.example.datacollect.w9.view.ConsoleView; +import java.util.List; + +public class ListCommand implements Command { + private final ConsoleView view; + + public ListCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "list"; + } + + @Override + public void execute(String[] args, List
articles) { + view.display(articles); + } +} diff --git a/w9/controller/CrawlerController.java b/w9/controller/CrawlerController.java new file mode 100644 index 0000000..5748334 --- /dev/null +++ b/w9/controller/CrawlerController.java @@ -0,0 +1,57 @@ +package com.example.datacollect.w9.controller; + +import com.example.datacollect.w9.command.*; +import com.example.datacollect.w9.model.Article; +import com.example.datacollect.w9.view.ConsoleView; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class CrawlerController { + private final Map commands = new HashMap<>(); + private final Map aliases = new HashMap<>(); + private final ConsoleView view; + private final List
articles; + private final List history = new ArrayList<>(); + + public CrawlerController(ConsoleView view, List
articles) { + this.view = view; + this.articles = articles; + register(new HelpCommand(view)); + register(new ListCommand(view)); + register(new CrawlCommand(view, articles)); + register(new ExitCommand(view)); + register(new HistoryCommand(view, history)); + registerAlias("c", "crawl"); // 别名 c = crawl + } + + private void register(Command command) { + commands.put(command.getName(), command); + } + + private void registerAlias(String alias, String commandName) { + aliases.put(alias, commandName); + } + + public void handle(String input) { + String text = input == null ? "" : input.trim(); + if (text.isEmpty()) return; + + history.add(text); + + String[] args = text.split("\\s+"); + String cmdName = args[0].toLowerCase(); + + if (aliases.containsKey(cmdName)) { + cmdName = aliases.get(cmdName); + } + + Command command = commands.get(cmdName); + if (command == null) { + view.printError("Unknown command: " + cmdName); + return; + } + command.execute(args, articles); + } +} diff --git a/w9/model/Article.java b/w9/model/Article.java new file mode 100644 index 0000000..090148e --- /dev/null +++ b/w9/model/Article.java @@ -0,0 +1,46 @@ +package com.example.datacollect.w9.model; + +public class Article { + private String title; + private String url; + private String content; + private String author; + private String publishDate; + + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + } + + public Article(String title, String url, String content, String author, String publishDate) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishDate = publishDate; + } + + // getter / setter + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getContent() { return content; } + public String getAuthor() { return author; } + public String getPublishDate() { return publishDate; } + + public void setTitle(String title) { this.title = title; } + public void setUrl(String url) { this.url = url; } + public void setContent(String content) { this.content = content; } + public void setAuthor(String author) { this.author = author; } + public void setPublishDate(String publishDate) { this.publishDate = publishDate; } + + @Override + public String toString() { + return "Article{" + + "title='" + title + '\'' + + ", url='" + url + '\'' + + ", author='" + author + '\'' + + ", publishDate='" + publishDate + '\'' + + '}'; + } +} \ No newline at end of file diff --git a/w9/view/ConsoleView.java b/w9/view/ConsoleView.java new file mode 100644 index 0000000..140bd23 --- /dev/null +++ b/w9/view/ConsoleView.java @@ -0,0 +1,43 @@ +package com.example.datacollect.w9.view; + +import com.example.datacollect.w9.model.Article; +import java.util.List; +import java.util.Scanner; + +public class ConsoleView { + // 颜色 + private static final String ANSI_RESET = "\u001B[0m"; + private static final String ANSI_GREEN = "\u001B[32m"; + private static final String ANSI_RED = "\u001B[31m"; + private static final String ANSI_BLUE = "\u001B[34m"; + + private final Scanner scanner = new Scanner(System.in); + + public String readLine() { + System.out.print("> "); + return scanner.nextLine(); + } + + public void printSuccess(String msg) { + System.out.println(ANSI_GREEN + msg + ANSI_RESET); + } + + public void printError(String msg) { + System.out.println(ANSI_RED + msg + ANSI_RESET); + } + + public void printInfo(String msg) { + System.out.println(ANSI_BLUE + msg + ANSI_RESET); + } + + public void display(List
articles) { + if (articles.isEmpty()) { + printInfo("暂无文章,请先执行 crawl。"); + return; + } + for (int i = 0; i < articles.size(); i++) { + Article a = articles.get(i); + System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); + } + } +}