15 changed files with 512 additions and 1 deletions
@ -0,0 +1,76 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.factory.StrategyFactory; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
public class AnalyzeCommand implements Command { |
||||
|
// URL 格式校验正则(和 CrawlCommand 保持一致)
|
||||
|
private static final Pattern URL_PATTERN = |
||||
|
Pattern.compile("^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)*$"); |
||||
|
|
||||
|
private final ConsoleView view; |
||||
|
private final StrategyFactory strategyFactory; |
||||
|
|
||||
|
// 构造方法:只依赖 View 和 StrategyFactory,不依赖 Repository
|
||||
|
public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.strategyFactory = strategyFactory; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
// 命令名:analyze
|
||||
|
return "analyze"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> unused) { |
||||
|
// 1. 校验参数
|
||||
|
if (args.length < 2) { |
||||
|
view.printError("用法:analyze <url>"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
String url = args[1]; |
||||
|
|
||||
|
// 2. 校验 URL 格式
|
||||
|
if (!isValidUrl(url)) { |
||||
|
view.printError("无效的 URL 格式:" + url); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
// 3. 复用 StrategyFactory 获取策略,解析 URL
|
||||
|
List<Article> parsedArticles = strategyFactory.getStrategy(url).crawl(url); |
||||
|
|
||||
|
// 4. 关键:只输出统计信息,不存入 ArticleRepository
|
||||
|
printAnalysisResult(url, parsedArticles); |
||||
|
|
||||
|
} catch (Exception e) { |
||||
|
view.printError("解析失败:" + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 只输出解析结果,不修改任何数据存储 |
||||
|
*/ |
||||
|
private void printAnalysisResult(String url, List<Article> articles) { |
||||
|
view.printInfo("===== 解析统计结果 ====="); |
||||
|
view.printInfo("目标 URL:" + url); |
||||
|
view.printInfo("解析到文章数量:" + articles.size()); |
||||
|
|
||||
|
if (!articles.isEmpty()) { |
||||
|
Article first = articles.get(0); |
||||
|
view.printInfo("首篇文章标题:" + first.getTitle()); |
||||
|
view.printInfo("首篇文章作者:" + first.getAuthor()); |
||||
|
view.printInfo("首篇发布日期:" + first.getPublishDate()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private boolean isValidUrl(String url) { |
||||
|
return url != null && URL_PATTERN.matcher(url).matches(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,43 @@ |
|||||
|
package com.example.datacollect.repository; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Collections; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ArticleRepository { |
||||
|
private final List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
public void add(Article article) { |
||||
|
if (article == null) { |
||||
|
throw new IllegalArgumentException("Article cannot be null"); |
||||
|
} |
||||
|
articles.add(article); |
||||
|
} |
||||
|
|
||||
|
public void addAll(List<Article> newArticles) { |
||||
|
// 防御 null:传入的集合不能为 null
|
||||
|
if (newArticles == null) { |
||||
|
return; |
||||
|
} |
||||
|
// 遍历添加,同时防御集合中的 null 元素
|
||||
|
for (Article article : newArticles) { |
||||
|
if (article != null) { |
||||
|
articles.add(article); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public List<Article> getAll() { |
||||
|
// 返回不可修改集合(作业要求:防止外部篡改)
|
||||
|
return Collections.unmodifiableList(articles); |
||||
|
} |
||||
|
|
||||
|
public int size() { |
||||
|
return articles.size(); |
||||
|
} |
||||
|
|
||||
|
public void clear() { |
||||
|
articles.clear(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,40 @@ |
|||||
|
package com.example.datacollect.utils; |
||||
|
|
||||
|
/** |
||||
|
* 指数退避重试工具类 |
||||
|
* wait = 500 * 2^attempt |
||||
|
*/ |
||||
|
public class RetryUtils { |
||||
|
|
||||
|
// 基础延迟 500ms
|
||||
|
private static final long BASE_DELAY_MS = 500; |
||||
|
|
||||
|
@FunctionalInterface |
||||
|
public interface RetryTask<T> { |
||||
|
T run() throws Exception; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 执行带指数退避的重试 |
||||
|
* @param maxRetries 最大重试次数(不含第一次) |
||||
|
* @param task 要执行的任务 |
||||
|
* @return 执行结果 |
||||
|
* @throws Exception 最后一次失败抛出 |
||||
|
*/ |
||||
|
public static <T> T retry(int maxRetries, RetryTask<T> task) throws Exception { |
||||
|
int attempt = 0; |
||||
|
while (true) { |
||||
|
try { |
||||
|
return task.run(); |
||||
|
} catch (Exception e) { |
||||
|
if (attempt >= maxRetries) { |
||||
|
throw e; // 重试次数用完,抛出
|
||||
|
} |
||||
|
// 指数退避:500 * 2^attempt
|
||||
|
long delay = BASE_DELAY_MS * (1L << attempt); |
||||
|
Thread.sleep(delay); |
||||
|
attempt++; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,15 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
/** |
||||
|
* URL 格式错误异常 |
||||
|
*/ |
||||
|
public class UrlFormatException extends RuntimeException { |
||||
|
|
||||
|
public UrlFormatException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public UrlFormatException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,23 @@ |
|||||
|
package com.example.datacollect.w9; |
||||
|
|
||||
|
import com.example.datacollect.w9.controller.CrawlerController; |
||||
|
import com.example.datacollect.w9.model.Article; |
||||
|
import com.example.datacollect.w9.view.ConsoleView; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class Main { |
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
CrawlerController controller = new CrawlerController(view, articles); |
||||
|
|
||||
|
view.printInfo("=== 文章爬虫系统已启动 ==="); |
||||
|
view.printInfo("输入 help 查看命令"); |
||||
|
|
||||
|
while (true) { |
||||
|
String input = view.readLine(); |
||||
|
controller.handle(input); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,9 @@ |
|||||
|
package com.example.datacollect.w9.command; |
||||
|
|
||||
|
import com.example.datacollect.w9.model.Article; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, List<Article> articles); |
||||
|
} |
||||
@ -0,0 +1,51 @@ |
|||||
|
package com.example.datacollect.w9.command; |
||||
|
|
||||
|
import com.example.datacollect.w9.model.Article; |
||||
|
import com.example.datacollect.w9.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
public class CrawlCommand implements Command { |
||||
|
private static final Pattern URL_PATTERN = |
||||
|
Pattern.compile("^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)*$"); |
||||
|
|
||||
|
private final ConsoleView view; |
||||
|
private final List<Article> articles; |
||||
|
|
||||
|
public CrawlCommand(ConsoleView view, List<Article> articles) { |
||||
|
this.view = view; |
||||
|
this.articles = articles; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "crawl"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("用法:crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
if (!isValidUrl(url)) { |
||||
|
view.printError("URL 格式不正确"); |
||||
|
return; |
||||
|
} |
||||
|
// 模拟爬取
|
||||
|
Article art = new Article( |
||||
|
"模拟标题-" + (articles.size() + 1), |
||||
|
url, |
||||
|
"模拟正文内容", |
||||
|
"模拟作者", |
||||
|
"2026-05-31" |
||||
|
); |
||||
|
articles.add(art); |
||||
|
view.printSuccess("爬取成功:" + art.getTitle()); |
||||
|
} |
||||
|
|
||||
|
private boolean isValidUrl(String url) { |
||||
|
return url != null && URL_PATTERN.matcher(url).matches(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,24 @@ |
|||||
|
package com.example.datacollect.w9.command; |
||||
|
|
||||
|
import com.example.datacollect.w9.model.Article; |
||||
|
import com.example.datacollect.w9.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ExitCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ExitCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printInfo("程序退出"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,28 @@ |
|||||
|
package com.example.datacollect.w9.command; |
||||
|
|
||||
|
import com.example.datacollect.w9.model.Article; |
||||
|
import com.example.datacollect.w9.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class HelpCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public HelpCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printInfo("=== 可用命令 ==="); |
||||
|
System.out.println("help 显示帮助"); |
||||
|
System.out.println("list 列出所有文章"); |
||||
|
System.out.println("crawl <url> 爬取文章(或简写 c <url>)"); |
||||
|
System.out.println("history 查看命令历史"); |
||||
|
System.out.println("exit 退出程序"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,32 @@ |
|||||
|
package com.example.datacollect.w9.command; |
||||
|
|
||||
|
import com.example.datacollect.w9.model.Article; |
||||
|
import com.example.datacollect.w9.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class HistoryCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
private final List<String> history; |
||||
|
|
||||
|
public HistoryCommand(ConsoleView view, List<String> history) { |
||||
|
this.view = view; |
||||
|
this.history = history; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "history"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (history.isEmpty()) { |
||||
|
view.printInfo("暂无历史记录"); |
||||
|
return; |
||||
|
} |
||||
|
view.printInfo("=== 命令历史 ==="); |
||||
|
for (int i = 0; i < history.size(); i++) { |
||||
|
System.out.println((i + 1) + ". " + history.get(i)); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,23 @@ |
|||||
|
package com.example.datacollect.w9.command; |
||||
|
|
||||
|
import com.example.datacollect.w9.model.Article; |
||||
|
import com.example.datacollect.w9.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ListCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ListCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "list"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.display(articles); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,57 @@ |
|||||
|
package com.example.datacollect.w9.controller; |
||||
|
|
||||
|
import com.example.datacollect.w9.command.*; |
||||
|
import com.example.datacollect.w9.model.Article; |
||||
|
import com.example.datacollect.w9.view.ConsoleView; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
private final Map<String, Command> commands = new HashMap<>(); |
||||
|
private final Map<String, String> aliases = new HashMap<>(); |
||||
|
private final ConsoleView view; |
||||
|
private final List<Article> articles; |
||||
|
private final List<String> history = new ArrayList<>(); |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, List<Article> articles) { |
||||
|
this.view = view; |
||||
|
this.articles = articles; |
||||
|
register(new HelpCommand(view)); |
||||
|
register(new ListCommand(view)); |
||||
|
register(new CrawlCommand(view, articles)); |
||||
|
register(new ExitCommand(view)); |
||||
|
register(new HistoryCommand(view, history)); |
||||
|
registerAlias("c", "crawl"); // 别名 c = crawl
|
||||
|
} |
||||
|
|
||||
|
private void register(Command command) { |
||||
|
commands.put(command.getName(), command); |
||||
|
} |
||||
|
|
||||
|
private void registerAlias(String alias, String commandName) { |
||||
|
aliases.put(alias, commandName); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
String text = input == null ? "" : input.trim(); |
||||
|
if (text.isEmpty()) return; |
||||
|
|
||||
|
history.add(text); |
||||
|
|
||||
|
String[] args = text.split("\\s+"); |
||||
|
String cmdName = args[0].toLowerCase(); |
||||
|
|
||||
|
if (aliases.containsKey(cmdName)) { |
||||
|
cmdName = aliases.get(cmdName); |
||||
|
} |
||||
|
|
||||
|
Command command = commands.get(cmdName); |
||||
|
if (command == null) { |
||||
|
view.printError("Unknown command: " + cmdName); |
||||
|
return; |
||||
|
} |
||||
|
command.execute(args, articles); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,46 @@ |
|||||
|
package com.example.datacollect.w9.model; |
||||
|
|
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
private String author; |
||||
|
private String publishDate; |
||||
|
|
||||
|
public Article(String title, String url, String content) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public Article(String title, String url, String content, String author, String publishDate) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
this.author = author; |
||||
|
this.publishDate = publishDate; |
||||
|
} |
||||
|
|
||||
|
// getter / setter
|
||||
|
public String getTitle() { return title; } |
||||
|
public String getUrl() { return url; } |
||||
|
public String getContent() { return content; } |
||||
|
public String getAuthor() { return author; } |
||||
|
public String getPublishDate() { return publishDate; } |
||||
|
|
||||
|
public void setTitle(String title) { this.title = title; } |
||||
|
public void setUrl(String url) { this.url = url; } |
||||
|
public void setContent(String content) { this.content = content; } |
||||
|
public void setAuthor(String author) { this.author = author; } |
||||
|
public void setPublishDate(String publishDate) { this.publishDate = publishDate; } |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{" + |
||||
|
"title='" + title + '\'' + |
||||
|
", url='" + url + '\'' + |
||||
|
", author='" + author + '\'' + |
||||
|
", publishDate='" + publishDate + '\'' + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,43 @@ |
|||||
|
package com.example.datacollect.w9.view; |
||||
|
|
||||
|
import com.example.datacollect.w9.model.Article; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class ConsoleView { |
||||
|
// 颜色
|
||||
|
private static final String ANSI_RESET = "\u001B[0m"; |
||||
|
private static final String ANSI_GREEN = "\u001B[32m"; |
||||
|
private static final String ANSI_RED = "\u001B[31m"; |
||||
|
private static final String ANSI_BLUE = "\u001B[34m"; |
||||
|
|
||||
|
private final Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
public String readLine() { |
||||
|
System.out.print("> "); |
||||
|
return scanner.nextLine(); |
||||
|
} |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printError(String msg) { |
||||
|
System.out.println(ANSI_RED + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printInfo(String msg) { |
||||
|
System.out.println(ANSI_BLUE + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void display(List<Article> articles) { |
||||
|
if (articles.isEmpty()) { |
||||
|
printInfo("暂无文章,请先执行 crawl。"); |
||||
|
return; |
||||
|
} |
||||
|
for (int i = 0; i < articles.size(); i++) { |
||||
|
Article a = articles.get(i); |
||||
|
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue