15 changed files with 512 additions and 1 deletions
@ -0,0 +1,76 @@ |
|||
package com.example.datacollect.command; |
|||
|
|||
import com.example.datacollect.factory.StrategyFactory; |
|||
import com.example.datacollect.model.Article; |
|||
import com.example.datacollect.view.ConsoleView; |
|||
import java.util.List; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class AnalyzeCommand implements Command { |
|||
// URL 格式校验正则(和 CrawlCommand 保持一致)
|
|||
private static final Pattern URL_PATTERN = |
|||
Pattern.compile("^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)*$"); |
|||
|
|||
private final ConsoleView view; |
|||
private final StrategyFactory strategyFactory; |
|||
|
|||
// 构造方法:只依赖 View 和 StrategyFactory,不依赖 Repository
|
|||
public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { |
|||
this.view = view; |
|||
this.strategyFactory = strategyFactory; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
// 命令名:analyze
|
|||
return "analyze"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> unused) { |
|||
// 1. 校验参数
|
|||
if (args.length < 2) { |
|||
view.printError("用法:analyze <url>"); |
|||
return; |
|||
} |
|||
|
|||
String url = args[1]; |
|||
|
|||
// 2. 校验 URL 格式
|
|||
if (!isValidUrl(url)) { |
|||
view.printError("无效的 URL 格式:" + url); |
|||
return; |
|||
} |
|||
|
|||
try { |
|||
// 3. 复用 StrategyFactory 获取策略,解析 URL
|
|||
List<Article> parsedArticles = strategyFactory.getStrategy(url).crawl(url); |
|||
|
|||
// 4. 关键:只输出统计信息,不存入 ArticleRepository
|
|||
printAnalysisResult(url, parsedArticles); |
|||
|
|||
} catch (Exception e) { |
|||
view.printError("解析失败:" + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 只输出解析结果,不修改任何数据存储 |
|||
*/ |
|||
private void printAnalysisResult(String url, List<Article> articles) { |
|||
view.printInfo("===== 解析统计结果 ====="); |
|||
view.printInfo("目标 URL:" + url); |
|||
view.printInfo("解析到文章数量:" + articles.size()); |
|||
|
|||
if (!articles.isEmpty()) { |
|||
Article first = articles.get(0); |
|||
view.printInfo("首篇文章标题:" + first.getTitle()); |
|||
view.printInfo("首篇文章作者:" + first.getAuthor()); |
|||
view.printInfo("首篇发布日期:" + first.getPublishDate()); |
|||
} |
|||
} |
|||
|
|||
private boolean isValidUrl(String url) { |
|||
return url != null && URL_PATTERN.matcher(url).matches(); |
|||
} |
|||
} |
|||
@ -0,0 +1,43 @@ |
|||
package com.example.datacollect.repository; |
|||
|
|||
import com.example.datacollect.model.Article; |
|||
import java.util.ArrayList; |
|||
import java.util.Collections; |
|||
import java.util.List; |
|||
|
|||
public class ArticleRepository { |
|||
private final List<Article> articles = new ArrayList<>(); |
|||
|
|||
public void add(Article article) { |
|||
if (article == null) { |
|||
throw new IllegalArgumentException("Article cannot be null"); |
|||
} |
|||
articles.add(article); |
|||
} |
|||
|
|||
public void addAll(List<Article> newArticles) { |
|||
// 防御 null:传入的集合不能为 null
|
|||
if (newArticles == null) { |
|||
return; |
|||
} |
|||
// 遍历添加,同时防御集合中的 null 元素
|
|||
for (Article article : newArticles) { |
|||
if (article != null) { |
|||
articles.add(article); |
|||
} |
|||
} |
|||
} |
|||
|
|||
public List<Article> getAll() { |
|||
// 返回不可修改集合(作业要求:防止外部篡改)
|
|||
return Collections.unmodifiableList(articles); |
|||
} |
|||
|
|||
public int size() { |
|||
return articles.size(); |
|||
} |
|||
|
|||
public void clear() { |
|||
articles.clear(); |
|||
} |
|||
} |
|||
@ -0,0 +1,40 @@ |
|||
package com.example.datacollect.utils; |
|||
|
|||
/** |
|||
* 指数退避重试工具类 |
|||
* wait = 500 * 2^attempt |
|||
*/ |
|||
public class RetryUtils { |
|||
|
|||
// 基础延迟 500ms
|
|||
private static final long BASE_DELAY_MS = 500; |
|||
|
|||
@FunctionalInterface |
|||
public interface RetryTask<T> { |
|||
T run() throws Exception; |
|||
} |
|||
|
|||
/** |
|||
* 执行带指数退避的重试 |
|||
* @param maxRetries 最大重试次数(不含第一次) |
|||
* @param task 要执行的任务 |
|||
* @return 执行结果 |
|||
* @throws Exception 最后一次失败抛出 |
|||
*/ |
|||
public static <T> T retry(int maxRetries, RetryTask<T> task) throws Exception { |
|||
int attempt = 0; |
|||
while (true) { |
|||
try { |
|||
return task.run(); |
|||
} catch (Exception e) { |
|||
if (attempt >= maxRetries) { |
|||
throw e; // 重试次数用完,抛出
|
|||
} |
|||
// 指数退避:500 * 2^attempt
|
|||
long delay = BASE_DELAY_MS * (1L << attempt); |
|||
Thread.sleep(delay); |
|||
attempt++; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,15 @@ |
|||
package com.example.datacollect.exception; |
|||
|
|||
/** |
|||
* URL 格式错误异常 |
|||
*/ |
|||
public class UrlFormatException extends RuntimeException { |
|||
|
|||
public UrlFormatException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public UrlFormatException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,23 @@ |
|||
package com.example.datacollect.w9; |
|||
|
|||
import com.example.datacollect.w9.controller.CrawlerController; |
|||
import com.example.datacollect.w9.model.Article; |
|||
import com.example.datacollect.w9.view.ConsoleView; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class Main { |
|||
public static void main(String[] args) { |
|||
ConsoleView view = new ConsoleView(); |
|||
List<Article> articles = new ArrayList<>(); |
|||
CrawlerController controller = new CrawlerController(view, articles); |
|||
|
|||
view.printInfo("=== 文章爬虫系统已启动 ==="); |
|||
view.printInfo("输入 help 查看命令"); |
|||
|
|||
while (true) { |
|||
String input = view.readLine(); |
|||
controller.handle(input); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,9 @@ |
|||
package com.example.datacollect.w9.command; |
|||
|
|||
import com.example.datacollect.w9.model.Article; |
|||
import java.util.List; |
|||
|
|||
public interface Command { |
|||
String getName(); |
|||
void execute(String[] args, List<Article> articles); |
|||
} |
|||
@ -0,0 +1,51 @@ |
|||
package com.example.datacollect.w9.command; |
|||
|
|||
import com.example.datacollect.w9.model.Article; |
|||
import com.example.datacollect.w9.view.ConsoleView; |
|||
import java.util.List; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class CrawlCommand implements Command { |
|||
private static final Pattern URL_PATTERN = |
|||
Pattern.compile("^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)*$"); |
|||
|
|||
private final ConsoleView view; |
|||
private final List<Article> articles; |
|||
|
|||
public CrawlCommand(ConsoleView view, List<Article> articles) { |
|||
this.view = view; |
|||
this.articles = articles; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "crawl"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
if (args.length < 2) { |
|||
view.printError("用法:crawl <url>"); |
|||
return; |
|||
} |
|||
String url = args[1]; |
|||
if (!isValidUrl(url)) { |
|||
view.printError("URL 格式不正确"); |
|||
return; |
|||
} |
|||
// 模拟爬取
|
|||
Article art = new Article( |
|||
"模拟标题-" + (articles.size() + 1), |
|||
url, |
|||
"模拟正文内容", |
|||
"模拟作者", |
|||
"2026-05-31" |
|||
); |
|||
articles.add(art); |
|||
view.printSuccess("爬取成功:" + art.getTitle()); |
|||
} |
|||
|
|||
private boolean isValidUrl(String url) { |
|||
return url != null && URL_PATTERN.matcher(url).matches(); |
|||
} |
|||
} |
|||
@ -0,0 +1,24 @@ |
|||
package com.example.datacollect.w9.command; |
|||
|
|||
import com.example.datacollect.w9.model.Article; |
|||
import com.example.datacollect.w9.view.ConsoleView; |
|||
import java.util.List; |
|||
|
|||
public class ExitCommand implements Command { |
|||
private final ConsoleView view; |
|||
|
|||
public ExitCommand(ConsoleView view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "exit"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
view.printInfo("程序退出"); |
|||
System.exit(0); |
|||
} |
|||
} |
|||
@ -0,0 +1,28 @@ |
|||
package com.example.datacollect.w9.command; |
|||
|
|||
import com.example.datacollect.w9.model.Article; |
|||
import com.example.datacollect.w9.view.ConsoleView; |
|||
import java.util.List; |
|||
|
|||
public class HelpCommand implements Command { |
|||
private final ConsoleView view; |
|||
|
|||
public HelpCommand(ConsoleView view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "help"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
view.printInfo("=== 可用命令 ==="); |
|||
System.out.println("help 显示帮助"); |
|||
System.out.println("list 列出所有文章"); |
|||
System.out.println("crawl <url> 爬取文章(或简写 c <url>)"); |
|||
System.out.println("history 查看命令历史"); |
|||
System.out.println("exit 退出程序"); |
|||
} |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
package com.example.datacollect.w9.command; |
|||
|
|||
import com.example.datacollect.w9.model.Article; |
|||
import com.example.datacollect.w9.view.ConsoleView; |
|||
import java.util.List; |
|||
|
|||
public class HistoryCommand implements Command { |
|||
private final ConsoleView view; |
|||
private final List<String> history; |
|||
|
|||
public HistoryCommand(ConsoleView view, List<String> history) { |
|||
this.view = view; |
|||
this.history = history; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "history"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
if (history.isEmpty()) { |
|||
view.printInfo("暂无历史记录"); |
|||
return; |
|||
} |
|||
view.printInfo("=== 命令历史 ==="); |
|||
for (int i = 0; i < history.size(); i++) { |
|||
System.out.println((i + 1) + ". " + history.get(i)); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,23 @@ |
|||
package com.example.datacollect.w9.command; |
|||
|
|||
import com.example.datacollect.w9.model.Article; |
|||
import com.example.datacollect.w9.view.ConsoleView; |
|||
import java.util.List; |
|||
|
|||
public class ListCommand implements Command { |
|||
private final ConsoleView view; |
|||
|
|||
public ListCommand(ConsoleView view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "list"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
view.display(articles); |
|||
} |
|||
} |
|||
@ -0,0 +1,57 @@ |
|||
package com.example.datacollect.w9.controller; |
|||
|
|||
import com.example.datacollect.w9.command.*; |
|||
import com.example.datacollect.w9.model.Article; |
|||
import com.example.datacollect.w9.view.ConsoleView; |
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
public class CrawlerController { |
|||
private final Map<String, Command> commands = new HashMap<>(); |
|||
private final Map<String, String> aliases = new HashMap<>(); |
|||
private final ConsoleView view; |
|||
private final List<Article> articles; |
|||
private final List<String> history = new ArrayList<>(); |
|||
|
|||
public CrawlerController(ConsoleView view, List<Article> articles) { |
|||
this.view = view; |
|||
this.articles = articles; |
|||
register(new HelpCommand(view)); |
|||
register(new ListCommand(view)); |
|||
register(new CrawlCommand(view, articles)); |
|||
register(new ExitCommand(view)); |
|||
register(new HistoryCommand(view, history)); |
|||
registerAlias("c", "crawl"); // 别名 c = crawl
|
|||
} |
|||
|
|||
private void register(Command command) { |
|||
commands.put(command.getName(), command); |
|||
} |
|||
|
|||
private void registerAlias(String alias, String commandName) { |
|||
aliases.put(alias, commandName); |
|||
} |
|||
|
|||
public void handle(String input) { |
|||
String text = input == null ? "" : input.trim(); |
|||
if (text.isEmpty()) return; |
|||
|
|||
history.add(text); |
|||
|
|||
String[] args = text.split("\\s+"); |
|||
String cmdName = args[0].toLowerCase(); |
|||
|
|||
if (aliases.containsKey(cmdName)) { |
|||
cmdName = aliases.get(cmdName); |
|||
} |
|||
|
|||
Command command = commands.get(cmdName); |
|||
if (command == null) { |
|||
view.printError("Unknown command: " + cmdName); |
|||
return; |
|||
} |
|||
command.execute(args, articles); |
|||
} |
|||
} |
|||
@ -0,0 +1,46 @@ |
|||
package com.example.datacollect.w9.model; |
|||
|
|||
public class Article { |
|||
private String title; |
|||
private String url; |
|||
private String content; |
|||
private String author; |
|||
private String publishDate; |
|||
|
|||
public Article(String title, String url, String content) { |
|||
this.title = title; |
|||
this.url = url; |
|||
this.content = content; |
|||
} |
|||
|
|||
public Article(String title, String url, String content, String author, String publishDate) { |
|||
this.title = title; |
|||
this.url = url; |
|||
this.content = content; |
|||
this.author = author; |
|||
this.publishDate = publishDate; |
|||
} |
|||
|
|||
// getter / setter
|
|||
public String getTitle() { return title; } |
|||
public String getUrl() { return url; } |
|||
public String getContent() { return content; } |
|||
public String getAuthor() { return author; } |
|||
public String getPublishDate() { return publishDate; } |
|||
|
|||
public void setTitle(String title) { this.title = title; } |
|||
public void setUrl(String url) { this.url = url; } |
|||
public void setContent(String content) { this.content = content; } |
|||
public void setAuthor(String author) { this.author = author; } |
|||
public void setPublishDate(String publishDate) { this.publishDate = publishDate; } |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "Article{" + |
|||
"title='" + title + '\'' + |
|||
", url='" + url + '\'' + |
|||
", author='" + author + '\'' + |
|||
", publishDate='" + publishDate + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,43 @@ |
|||
package com.example.datacollect.w9.view; |
|||
|
|||
import com.example.datacollect.w9.model.Article; |
|||
import java.util.List; |
|||
import java.util.Scanner; |
|||
|
|||
public class ConsoleView { |
|||
// 颜色
|
|||
private static final String ANSI_RESET = "\u001B[0m"; |
|||
private static final String ANSI_GREEN = "\u001B[32m"; |
|||
private static final String ANSI_RED = "\u001B[31m"; |
|||
private static final String ANSI_BLUE = "\u001B[34m"; |
|||
|
|||
private final Scanner scanner = new Scanner(System.in); |
|||
|
|||
public String readLine() { |
|||
System.out.print("> "); |
|||
return scanner.nextLine(); |
|||
} |
|||
|
|||
public void printSuccess(String msg) { |
|||
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
|||
} |
|||
|
|||
public void printError(String msg) { |
|||
System.out.println(ANSI_RED + msg + ANSI_RESET); |
|||
} |
|||
|
|||
public void printInfo(String msg) { |
|||
System.out.println(ANSI_BLUE + msg + ANSI_RESET); |
|||
} |
|||
|
|||
public void display(List<Article> articles) { |
|||
if (articles.isEmpty()) { |
|||
printInfo("暂无文章,请先执行 crawl。"); |
|||
return; |
|||
} |
|||
for (int i = 0; i < articles.size(); i++) { |
|||
Article a = articles.get(i); |
|||
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); |
|||
} |
|||
} |
|||
} |
|||
Loading…
Reference in new issue