Browse Source

作业

main
wanglixia 3 weeks ago
parent
commit
2ae3ae04ad
  1. 3
      .idea/workspace.xml
  2. 76
      w10/AnalyzeCommand.java
  3. 43
      w10/ArticleRepository.java
  4. 40
      w11/RetryUtils.java
  5. 15
      w11/UrlFormatException.java
  6. 23
      w9/Main.java
  7. 9
      w9/command/Command.java
  8. 51
      w9/command/CrawlCommand.java
  9. 24
      w9/command/ExitCommand.java
  10. 28
      w9/command/HelpCommand.java
  11. 32
      w9/command/HistoryCommand.java
  12. 23
      w9/command/ListCommand.java
  13. 57
      w9/controller/CrawlerController.java
  14. 46
      w9/model/Article.java
  15. 43
      w9/view/ConsoleView.java

3
.idea/workspace.xml

@ -2,7 +2,7 @@
<project version="4">
<component name="ChangeListManager">
<list default="true" id="8e3e51eb-8beb-4b33-b423-c0d05e3bc0cd" name="Changes" comment="">
<change afterPath="$PROJECT_DIR$/w5/VehicleTest.java" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
@ -52,6 +52,7 @@
<workItem from="1775042617092" duration="882000" />
<workItem from="1775104128296" duration="4129000" />
<workItem from="1775653218714" duration="2004000" />
<workItem from="1779354682588" duration="1252000" />
</task>
<servers />
</component>

76
w10/AnalyzeCommand.java

@ -0,0 +1,76 @@
package com.example.datacollect.command;
import com.example.datacollect.factory.StrategyFactory;
import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
import java.util.List;
import java.util.regex.Pattern;
public class AnalyzeCommand implements Command {
// URL 格式校验正则(和 CrawlCommand 保持一致)
private static final Pattern URL_PATTERN =
Pattern.compile("^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)*$");
private final ConsoleView view;
private final StrategyFactory strategyFactory;
// 构造方法:只依赖 View 和 StrategyFactory,不依赖 Repository
public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) {
this.view = view;
this.strategyFactory = strategyFactory;
}
@Override
public String getName() {
// 命令名:analyze
return "analyze";
}
@Override
public void execute(String[] args, List<Article> unused) {
// 1. 校验参数
if (args.length < 2) {
view.printError("用法:analyze <url>");
return;
}
String url = args[1];
// 2. 校验 URL 格式
if (!isValidUrl(url)) {
view.printError("无效的 URL 格式:" + url);
return;
}
try {
// 3. 复用 StrategyFactory 获取策略,解析 URL
List<Article> parsedArticles = strategyFactory.getStrategy(url).crawl(url);
// 4. 关键:只输出统计信息,不存入 ArticleRepository
printAnalysisResult(url, parsedArticles);
} catch (Exception e) {
view.printError("解析失败:" + e.getMessage());
}
}
/**
* 只输出解析结果不修改任何数据存储
*/
private void printAnalysisResult(String url, List<Article> articles) {
view.printInfo("===== 解析统计结果 =====");
view.printInfo("目标 URL:" + url);
view.printInfo("解析到文章数量:" + articles.size());
if (!articles.isEmpty()) {
Article first = articles.get(0);
view.printInfo("首篇文章标题:" + first.getTitle());
view.printInfo("首篇文章作者:" + first.getAuthor());
view.printInfo("首篇发布日期:" + first.getPublishDate());
}
}
private boolean isValidUrl(String url) {
return url != null && URL_PATTERN.matcher(url).matches();
}
}

43
w10/ArticleRepository.java

@ -0,0 +1,43 @@
package com.example.datacollect.repository;
import com.example.datacollect.model.Article;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class ArticleRepository {
private final List<Article> articles = new ArrayList<>();
public void add(Article article) {
if (article == null) {
throw new IllegalArgumentException("Article cannot be null");
}
articles.add(article);
}
public void addAll(List<Article> newArticles) {
// 防御 null:传入的集合不能为 null
if (newArticles == null) {
return;
}
// 遍历添加,同时防御集合中的 null 元素
for (Article article : newArticles) {
if (article != null) {
articles.add(article);
}
}
}
public List<Article> getAll() {
// 返回不可修改集合(作业要求:防止外部篡改)
return Collections.unmodifiableList(articles);
}
public int size() {
return articles.size();
}
public void clear() {
articles.clear();
}
}

40
w11/RetryUtils.java

@ -0,0 +1,40 @@
package com.example.datacollect.utils;
/**
* 指数退避重试工具类
* wait = 500 * 2^attempt
*/
public class RetryUtils {
// 基础延迟 500ms
private static final long BASE_DELAY_MS = 500;
@FunctionalInterface
public interface RetryTask<T> {
T run() throws Exception;
}
/**
* 执行带指数退避的重试
* @param maxRetries 最大重试次数不含第一次
* @param task 要执行的任务
* @return 执行结果
* @throws Exception 最后一次失败抛出
*/
public static <T> T retry(int maxRetries, RetryTask<T> task) throws Exception {
int attempt = 0;
while (true) {
try {
return task.run();
} catch (Exception e) {
if (attempt >= maxRetries) {
throw e; // 重试次数用完,抛出
}
// 指数退避:500 * 2^attempt
long delay = BASE_DELAY_MS * (1L << attempt);
Thread.sleep(delay);
attempt++;
}
}
}
}

15
w11/UrlFormatException.java

@ -0,0 +1,15 @@
package com.example.datacollect.exception;
/**
* URL 格式错误异常
*/
public class UrlFormatException extends RuntimeException {
public UrlFormatException(String message) {
super(message);
}
public UrlFormatException(String message, Throwable cause) {
super(message, cause);
}
}

23
w9/Main.java

@ -0,0 +1,23 @@
package com.example.datacollect.w9;
import com.example.datacollect.w9.controller.CrawlerController;
import com.example.datacollect.w9.model.Article;
import com.example.datacollect.w9.view.ConsoleView;
import java.util.ArrayList;
import java.util.List;
public class Main {
public static void main(String[] args) {
ConsoleView view = new ConsoleView();
List<Article> articles = new ArrayList<>();
CrawlerController controller = new CrawlerController(view, articles);
view.printInfo("=== 文章爬虫系统已启动 ===");
view.printInfo("输入 help 查看命令");
while (true) {
String input = view.readLine();
controller.handle(input);
}
}
}

9
w9/command/Command.java

@ -0,0 +1,9 @@
package com.example.datacollect.w9.command;
import com.example.datacollect.w9.model.Article;
import java.util.List;
public interface Command {
String getName();
void execute(String[] args, List<Article> articles);
}

51
w9/command/CrawlCommand.java

@ -0,0 +1,51 @@
package com.example.datacollect.w9.command;
import com.example.datacollect.w9.model.Article;
import com.example.datacollect.w9.view.ConsoleView;
import java.util.List;
import java.util.regex.Pattern;
public class CrawlCommand implements Command {
private static final Pattern URL_PATTERN =
Pattern.compile("^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)*$");
private final ConsoleView view;
private final List<Article> articles;
public CrawlCommand(ConsoleView view, List<Article> articles) {
this.view = view;
this.articles = articles;
}
@Override
public String getName() {
return "crawl";
}
@Override
public void execute(String[] args, List<Article> articles) {
if (args.length < 2) {
view.printError("用法:crawl <url>");
return;
}
String url = args[1];
if (!isValidUrl(url)) {
view.printError("URL 格式不正确");
return;
}
// 模拟爬取
Article art = new Article(
"模拟标题-" + (articles.size() + 1),
url,
"模拟正文内容",
"模拟作者",
"2026-05-31"
);
articles.add(art);
view.printSuccess("爬取成功:" + art.getTitle());
}
private boolean isValidUrl(String url) {
return url != null && URL_PATTERN.matcher(url).matches();
}
}

24
w9/command/ExitCommand.java

@ -0,0 +1,24 @@
package com.example.datacollect.w9.command;
import com.example.datacollect.w9.model.Article;
import com.example.datacollect.w9.view.ConsoleView;
import java.util.List;
public class ExitCommand implements Command {
private final ConsoleView view;
public ExitCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "exit";
}
@Override
public void execute(String[] args, List<Article> articles) {
view.printInfo("程序退出");
System.exit(0);
}
}

28
w9/command/HelpCommand.java

@ -0,0 +1,28 @@
package com.example.datacollect.w9.command;
import com.example.datacollect.w9.model.Article;
import com.example.datacollect.w9.view.ConsoleView;
import java.util.List;
public class HelpCommand implements Command {
private final ConsoleView view;
public HelpCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "help";
}
@Override
public void execute(String[] args, List<Article> articles) {
view.printInfo("=== 可用命令 ===");
System.out.println("help 显示帮助");
System.out.println("list 列出所有文章");
System.out.println("crawl <url> 爬取文章(或简写 c <url>)");
System.out.println("history 查看命令历史");
System.out.println("exit 退出程序");
}
}

32
w9/command/HistoryCommand.java

@ -0,0 +1,32 @@
package com.example.datacollect.w9.command;
import com.example.datacollect.w9.model.Article;
import com.example.datacollect.w9.view.ConsoleView;
import java.util.List;
public class HistoryCommand implements Command {
private final ConsoleView view;
private final List<String> history;
public HistoryCommand(ConsoleView view, List<String> history) {
this.view = view;
this.history = history;
}
@Override
public String getName() {
return "history";
}
@Override
public void execute(String[] args, List<Article> articles) {
if (history.isEmpty()) {
view.printInfo("暂无历史记录");
return;
}
view.printInfo("=== 命令历史 ===");
for (int i = 0; i < history.size(); i++) {
System.out.println((i + 1) + ". " + history.get(i));
}
}
}

23
w9/command/ListCommand.java

@ -0,0 +1,23 @@
package com.example.datacollect.w9.command;
import com.example.datacollect.w9.model.Article;
import com.example.datacollect.w9.view.ConsoleView;
import java.util.List;
public class ListCommand implements Command {
private final ConsoleView view;
public ListCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "list";
}
@Override
public void execute(String[] args, List<Article> articles) {
view.display(articles);
}
}

57
w9/controller/CrawlerController.java

@ -0,0 +1,57 @@
package com.example.datacollect.w9.controller;
import com.example.datacollect.w9.command.*;
import com.example.datacollect.w9.model.Article;
import com.example.datacollect.w9.view.ConsoleView;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class CrawlerController {
private final Map<String, Command> commands = new HashMap<>();
private final Map<String, String> aliases = new HashMap<>();
private final ConsoleView view;
private final List<Article> articles;
private final List<String> history = new ArrayList<>();
public CrawlerController(ConsoleView view, List<Article> articles) {
this.view = view;
this.articles = articles;
register(new HelpCommand(view));
register(new ListCommand(view));
register(new CrawlCommand(view, articles));
register(new ExitCommand(view));
register(new HistoryCommand(view, history));
registerAlias("c", "crawl"); // 别名 c = crawl
}
private void register(Command command) {
commands.put(command.getName(), command);
}
private void registerAlias(String alias, String commandName) {
aliases.put(alias, commandName);
}
public void handle(String input) {
String text = input == null ? "" : input.trim();
if (text.isEmpty()) return;
history.add(text);
String[] args = text.split("\\s+");
String cmdName = args[0].toLowerCase();
if (aliases.containsKey(cmdName)) {
cmdName = aliases.get(cmdName);
}
Command command = commands.get(cmdName);
if (command == null) {
view.printError("Unknown command: " + cmdName);
return;
}
command.execute(args, articles);
}
}

46
w9/model/Article.java

@ -0,0 +1,46 @@
package com.example.datacollect.w9.model;
public class Article {
private String title;
private String url;
private String content;
private String author;
private String publishDate;
public Article(String title, String url, String content) {
this.title = title;
this.url = url;
this.content = content;
}
public Article(String title, String url, String content, String author, String publishDate) {
this.title = title;
this.url = url;
this.content = content;
this.author = author;
this.publishDate = publishDate;
}
// getter / setter
public String getTitle() { return title; }
public String getUrl() { return url; }
public String getContent() { return content; }
public String getAuthor() { return author; }
public String getPublishDate() { return publishDate; }
public void setTitle(String title) { this.title = title; }
public void setUrl(String url) { this.url = url; }
public void setContent(String content) { this.content = content; }
public void setAuthor(String author) { this.author = author; }
public void setPublishDate(String publishDate) { this.publishDate = publishDate; }
@Override
public String toString() {
return "Article{" +
"title='" + title + '\'' +
", url='" + url + '\'' +
", author='" + author + '\'' +
", publishDate='" + publishDate + '\'' +
'}';
}
}

43
w9/view/ConsoleView.java

@ -0,0 +1,43 @@
package com.example.datacollect.w9.view;
import com.example.datacollect.w9.model.Article;
import java.util.List;
import java.util.Scanner;
public class ConsoleView {
// 颜色
private static final String ANSI_RESET = "\u001B[0m";
private static final String ANSI_GREEN = "\u001B[32m";
private static final String ANSI_RED = "\u001B[31m";
private static final String ANSI_BLUE = "\u001B[34m";
private final Scanner scanner = new Scanner(System.in);
public String readLine() {
System.out.print("> ");
return scanner.nextLine();
}
public void printSuccess(String msg) {
System.out.println(ANSI_GREEN + msg + ANSI_RESET);
}
public void printError(String msg) {
System.out.println(ANSI_RED + msg + ANSI_RESET);
}
public void printInfo(String msg) {
System.out.println(ANSI_BLUE + msg + ANSI_RESET);
}
public void display(List<Article> articles) {
if (articles.isEmpty()) {
printInfo("暂无文章,请先执行 crawl。");
return;
}
for (int i = 0; i < articles.size(); i++) {
Article a = articles.get(i);
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());
}
}
}
Loading…
Cancel
Save