diff --git a/w11/java-cli-w11/.gitignore b/w11/java-cli-w11/.gitignore
new file mode 100644
index 0000000..0ebcf1a
--- /dev/null
+++ b/w11/java-cli-w11/.gitignore
@@ -0,0 +1,4 @@
+*.jar
+*.jar
+*.class
+*.log
\ No newline at end of file
diff --git a/w11/java-cli-w11/pom.xml b/w11/java-cli-w11/pom.xml
new file mode 100644
index 0000000..9987b1c
--- /dev/null
+++ b/w11/java-cli-w11/pom.xml
@@ -0,0 +1,62 @@
+
+ 4.0.0
+ com.example
+ datacollect-cli
+ 0.1.0
+
+ 11
+ 11
+
+
+
+ org.jsoup
+ jsoup
+ 1.17.2
+
+
+ org.slf4j
+ slf4j-api
+ 2.0.9
+
+
+ ch.qos.logback
+ logback-classic
+ 1.4.14
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.3.0
+
+
+
+ com.example.datacollect.Main
+
+
+
+ jar-with-dependencies
+
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/Main.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/Main.java
new file mode 100644
index 0000000..ea9d151
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/Main.java
@@ -0,0 +1,41 @@
+package com.example.datacollect;
+
+import com.example.datacollect.controller.CrawlerController;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+/*- 添加 logger 成员
+- 记录启动日志
+- 添加全局异常处理 */
+public class Main {
+ private static final Logger logger = LoggerFactory.getLogger(Main.class);
+
+ public static void main(String[] args) {
+ try {
+ logger.info("Starting CLI Crawler application");
+
+ ConsoleView view = new ConsoleView();
+ ArticleRepository repository = new ArticleRepository();
+ StrategyFactory strategyFactory = new StrategyFactory();
+ CrawlerController controller = new CrawlerController(view, repository, strategyFactory);
+
+ view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands.");
+ logger.info("Application initialized successfully");
+
+ while (true) {
+ try {
+ controller.handle(view.readLine());
+ } catch (Exception e) {
+ view.printError("Error: " + e.getMessage());
+ logger.error("Error in main loop: {}", e.getMessage(), e);
+ }
+ }
+ } catch (Exception e) {
+ logger.error("Fatal error in application: {}", e.getMessage(), e);
+ System.err.println("Fatal error: " + e.getMessage());
+ System.exit(1);
+ }
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/command/AnalyzeCommand.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
new file mode 100644
index 0000000..ec9bcc3
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
@@ -0,0 +1,103 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.util.RetryUtils;
+import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+public class AnalyzeCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class);
+ private final ConsoleView view;
+ private final StrategyFactory strategyFactory;
+
+ public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.strategyFactory = strategyFactory;
+ }
+
+ @Override
+ public String getName() {
+ return "analyze";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ view.printError("Usage: analyze ");
+ logger.warn("Invalid command: missing URL argument");
+ return;
+ }
+ String url = args[1];
+ logger.info("Analyze command executed for URL: {}", url);
+
+ try {
+ CrawlStrategy strategy = strategyFactory.getStrategy(url);
+ if (strategy == null) {
+ view.printError("No strategy found for: " + url);
+ logger.error("No strategy found for URL: {}", url);
+ return;
+ }
+
+ Callable fetchTask = () -> {
+ logger.debug("Fetching document from: {}", url);
+ try {
+ return Jsoup.connect(url)
+ .userAgent("Mozilla/5.0")
+ .timeout(5000)
+ .get();
+ } catch (IOException e) {
+ throw new NetworkException("Failed to connect to " + url + ": " + e.getMessage(), e);
+ }
+ };
+
+ Document doc = RetryUtils.executeWithRetry(fetchTask);
+ logger.info("Successfully fetched document from: {}", url);
+
+ List articles = strategy.parse(url, doc);
+ logger.info("Parsed {} articles for analysis", articles.size());
+
+ int total = articles.size();
+ int totalTitleLen = 0;
+ int totalContentLen = 0;
+
+ for (Article a : articles) {
+ totalTitleLen += a.getTitle() == null ? 0 : a.getTitle().length();
+ totalContentLen += a.getContent() == null ? 0 : a.getContent().length();
+ }
+
+ view.printInfo("===== 分析统计结果 =====");
+ view.printInfo("文章总数:" + total + " 篇");
+ view.printInfo("标题总长度:" + totalTitleLen);
+ view.printInfo("内容总长度:" + totalContentLen);
+ if (total > 0) {
+ view.printInfo("平均标题长度:" + (totalTitleLen / total));
+ view.printInfo("平均内容长度:" + (totalContentLen / total));
+ }
+ view.printInfo("======================");
+ view.printSuccess("分析完成(数据未保存)");
+
+ logger.info("Analysis completed: {} articles analyzed", total);
+ } catch (NetworkException e) {
+ view.printError("Network error: " + e.getMessage());
+ logger.error("Network error while analyzing {}: {}", url, e.getMessage(), e);
+ } catch (ParseException e) {
+ view.printError("Parse error: " + e.getMessage());
+ logger.error("Parse error while analyzing {}: {}", url, e.getMessage(), e);
+ } catch (Exception e) {
+ view.printError("分析失败:" + e.getMessage());
+ logger.error("Unexpected error while analyzing {}: {}", url, e.getMessage(), e);
+ }
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/command/Command.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/Command.java
new file mode 100644
index 0000000..029cadc
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/Command.java
@@ -0,0 +1,8 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+
+public interface Command {
+ String getName();
+ void execute(String[] args, ArticleRepository repository);
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/command/CrawlCommand.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/CrawlCommand.java
new file mode 100644
index 0000000..dd63594
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/CrawlCommand.java
@@ -0,0 +1,87 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.util.RetryUtils;
+import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.concurrent.Callable;
+
+public class CrawlCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
+ private final ConsoleView view;
+ private final StrategyFactory strategyFactory;
+
+ public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.strategyFactory = strategyFactory;
+ }
+
+ @Override
+ public String getName() {
+ return "crawl";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ view.printError("Usage: crawl ");
+ logger.warn("Invalid command: missing URL argument");
+ return;
+ }
+ String url = args[1];
+ logger.info("Crawl started for: {}", url);
+
+ CrawlStrategy strategy = strategyFactory.getStrategy(url);
+ if (strategy == null) {
+ view.printError("No strategy found for: " + url);
+ logger.error("No strategy found for URL: {}", url);
+ return;
+ }
+
+ try {
+ view.printInfo("Crawling: " + url);
+
+ Callable fetchTask = () -> {
+ logger.debug("Fetching document from: {}", url);
+ try {
+ return Jsoup.connect(url)
+ .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+ .timeout(10000)
+ .get();
+ } catch (IOException e) {
+ throw new NetworkException("Failed to connect to " + url + ": " + e.getMessage(), e);
+ }
+ };
+
+ Document doc = RetryUtils.executeWithRetry(fetchTask);
+ logger.info("Successfully fetched document from: {}", url);
+
+ var articles = strategy.parse(url, doc);
+ logger.info("Parsed {} articles", articles.size());
+
+ repository.addAll(articles);
+ logger.info("Successfully added {} articles to repository", articles.size());
+
+ view.printSuccess("Crawled " + articles.size() + " articles.");
+ logger.info("Successfully crawled {} articles from {}", articles.size(), url);
+ } catch (NetworkException e) {
+ view.printError("Network error: " + e.getMessage());
+ logger.error("Network error while crawling {}: {}", url, e.getMessage(), e);
+ } catch (ParseException e) {
+ view.printError("Parse error: " + e.getMessage());
+ logger.error("Parse error while crawling {}: {}", url, e.getMessage(), e);
+ } catch (Exception e) {
+ view.printError("Failed to crawl: " + e.getMessage());
+ logger.error("Unexpected error while crawling {}: {}", url, e.getMessage(), e);
+ }
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/command/ExitCommand.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/ExitCommand.java
new file mode 100644
index 0000000..0f1d7fd
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/ExitCommand.java
@@ -0,0 +1,27 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ExitCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class);
+ private final ConsoleView view;
+
+ public ExitCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "exit";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("Exit command executed, shutting down");
+ view.printSuccess("Bye!");
+ System.exit(0);/*退出程序 */
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/command/HelpCommand.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/HelpCommand.java
new file mode 100644
index 0000000..2087695
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/HelpCommand.java
@@ -0,0 +1,26 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HelpCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class);
+ private final ConsoleView view;
+
+ public HelpCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "help";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("Help command executed");
+ view.printInfo("Commands: crawl , list, help, exit, analyze");
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/command/ListCommand.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/ListCommand.java
new file mode 100644
index 0000000..9261a3d
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/command/ListCommand.java
@@ -0,0 +1,26 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ListCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ListCommand.class);
+ private final ConsoleView view;
+
+ public ListCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "list";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("List command executed, showing {} articles", repository.size());
+ view.display(repository.getAll());
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/controller/CrawlerController.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/controller/CrawlerController.java
new file mode 100644
index 0000000..5ef370a
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/controller/CrawlerController.java
@@ -0,0 +1,64 @@
+package com.example.datacollect.controller;
+
+import com.example.datacollect.command.AnalyzeCommand;
+import com.example.datacollect.command.Command;
+import com.example.datacollect.command.CrawlCommand;
+import com.example.datacollect.command.ExitCommand;
+import com.example.datacollect.command.HelpCommand;
+import com.example.datacollect.command.ListCommand;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.HashMap;
+import java.util.Map;
+
+public class CrawlerController {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class);
+ private final Map commands = new HashMap<>();
+ private final ConsoleView view;
+ private final ArticleRepository repository;
+
+ public CrawlerController(ConsoleView view, ArticleRepository repository, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.repository = repository;
+ register(new HelpCommand(view));
+ register(new ListCommand(view));
+ register(new CrawlCommand(view, strategyFactory));
+ register(new ExitCommand(view));
+ register(new AnalyzeCommand(view, strategyFactory));
+ logger.info("CrawlerController initialized with {} commands", commands.size());
+ }
+
+ private void register(Command command) {
+ commands.put(command.getName(), command);
+ logger.debug("Registered command: {}", command.getName());
+ }
+
+ public void handle(String input) {/* 处理用户输入 */
+ String text = input == null ? "" : input.trim();/* 处理空输入 */
+ if (text.isEmpty()) {
+ return;
+ }
+
+ String[] args = text.split("\\s+");/* 解析命令行参数 */
+ String cmdName = args[0].toLowerCase();/* 提取命令名称并转换为小写 */
+
+ logger.debug("Processing command: {}", cmdName);
+
+ Command command = commands.get(cmdName);/* 获取命令对象 */
+ if (command == null) {
+ view.printError("Unknown command: " + cmdName);
+ logger.warn("Unknown command attempted: {}", cmdName);
+ return;
+ }
+
+ try {
+ command.execute(args, repository);/* 执行命令 */
+ } catch (Exception e) {
+ view.printError("Command execution failed: " + e.getMessage());
+ logger.error("Error executing command {}: {}", cmdName, e.getMessage(), e);
+ }
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/exception/CrawlerException.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/exception/CrawlerException.java
new file mode 100644
index 0000000..230adb3
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/exception/CrawlerException.java
@@ -0,0 +1,10 @@
+package com.example.datacollect.exception;
+
+public class CrawlerException extends Exception {
+ public CrawlerException(String message) {
+ super(message);
+ }
+ public CrawlerException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/exception/NetworkException.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/exception/NetworkException.java
new file mode 100644
index 0000000..3a24c92
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/exception/NetworkException.java
@@ -0,0 +1,10 @@
+package com.example.datacollect.exception;
+
+public class NetworkException extends CrawlerException {
+ public NetworkException(String message) {
+ super(message);
+ }
+ public NetworkException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/exception/ParseException.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/exception/ParseException.java
new file mode 100644
index 0000000..09f9f20
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/exception/ParseException.java
@@ -0,0 +1,10 @@
+package com.example.datacollect.exception;
+
+public class ParseException extends CrawlerException {
+ public ParseException(String message) {
+ super(message);
+ }
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/model/Article.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/model/Article.java
new file mode 100644
index 0000000..53b138b
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/model/Article.java
@@ -0,0 +1,72 @@
+package com.example.datacollect.model;
+/*- 文章模型类
+- 添加字段验证
+- 添加 toString() 方法(已有)
+- 考虑添加 equals() 和 hashCode() */
+public class Article {
+ private String title;
+ private String url;
+ private String content;
+
+ public Article(String title, String url, String content) {
+ setTitle(title);
+ setUrl(url);
+ setContent(content);
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ if (title == null) {
+ throw new IllegalArgumentException("Title cannot be null");
+ }
+ if (title.trim().isEmpty()) {
+ throw new IllegalArgumentException("Title cannot be empty");
+ }
+ if (title.length() > 500) {
+ throw new IllegalArgumentException("Title cannot exceed 500 characters");
+ }
+ this.title = title.trim();
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ if (url == null) {
+ throw new IllegalArgumentException("URL cannot be null");
+ }
+ if (url.trim().isEmpty()) {
+ throw new IllegalArgumentException("URL cannot be empty");
+ }
+ if (!url.startsWith("http://") && !url.startsWith("https://")) {
+ throw new IllegalArgumentException("URL must start with http:// or https://");
+ }
+ this.url = url.trim();
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ public void setContent(String content) {
+ if (content == null) {
+ this.content = "";
+ } else if (content.length() > 10000) {
+ this.content = content.substring(0, 10000);/* 截断内容到 10000 个字符 */
+ } else {
+ this.content = content;
+ }
+ }
+
+ @Override
+ public String toString() {
+ return "Article{"
+ + "title='" + title + '\''
+ + ", url='" + url + '\''
+ + '}';
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java
new file mode 100644
index 0000000..8994efa
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java
@@ -0,0 +1,113 @@
+package com.example.datacollect.repository;
+
+import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+/* 文章仓库
+- 添加 logger 成员
+- 增强 add() 方法的防御检查
+- 增强 addALL() 方法的防御检查
+- 添加空值检查、重复检查、长度验证
+- 记录操作日志*/
+public class ArticleRepository {
+ private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
+ private static final int MAX_TITLE_LENGTH = 500;/* 最大标题长度 */
+ private static final int MAX_CONTENT_LENGTH = 10000;/* 最大内容长度 */
+
+ private final List articles = new ArrayList<>();
+ private final Set urlSet = new HashSet<>();
+
+ public void add(Article article) {
+ if (article == null) {
+ logger.error("Attempted to add null article");
+ throw new IllegalArgumentException("Article cannot be null");
+ }
+
+ String title = article.getTitle();
+ String url = article.getUrl();
+ String content = article.getContent();
+
+ if (title == null || title.trim().isEmpty()) {
+ logger.warn("Attempted to add article with empty title");
+ throw new IllegalArgumentException("Article title cannot be null or empty");
+ }
+
+ if (url == null || url.trim().isEmpty()) {
+ logger.warn("Attempted to add article with empty URL");
+ throw new IllegalArgumentException("Article URL cannot be null or empty");
+ }
+
+ if (title.length() > MAX_TITLE_LENGTH) {
+ logger.warn("Article title too long: {} characters (max: {})", title.length(), MAX_TITLE_LENGTH);
+ throw new IllegalArgumentException("Article title exceeds maximum length of " + MAX_TITLE_LENGTH);
+ }
+
+ if (content != null && content.length() > MAX_CONTENT_LENGTH) {
+ logger.warn("Article content too long: {} characters (max: {})", content.length(), MAX_CONTENT_LENGTH);
+ content = content.substring(0, MAX_CONTENT_LENGTH);
+ }
+
+ if (!url.startsWith("http://") && !url.startsWith("https://")) {
+ logger.warn("Invalid URL format: {}", url);
+ throw new IllegalArgumentException("Article URL must start with http:// or https://");
+ }
+
+ if (urlSet.contains(url)) {
+ logger.warn("Duplicate article URL detected: {}", url);
+ return;/* 跳过重复文章 */
+ }
+
+ Article validatedArticle = new Article(title.trim(), url.trim(), content != null ? content.trim() : "");/* 创建验证后的文章 */
+ articles.add(validatedArticle);/* 添加文章到列表 */
+ urlSet.add(url);/* 添加URL到集合 */
+ logger.debug("Added article: {}", title);/* 记录添加日志 */
+ }
+
+ public void addAll(List articleList) {
+ if (articleList == null) {
+ logger.error("Attempted to add null article list");
+ throw new IllegalArgumentException("Article list cannot be null");
+ }
+
+ int successCount = 0;/* 成功添加的文章数量 */
+ int skipCount = 0;/* 跳过的无效文章数量 */
+
+ for (Article article : articleList) {
+ if (article != null) {
+ try {
+ add(article);
+ successCount++;
+ } catch (IllegalArgumentException e) {
+ logger.warn("Skipped invalid article: {}", e.getMessage());
+ skipCount++;
+ }
+ } else {
+ logger.warn("Skipped null article in list");
+ skipCount++;
+ }
+ }
+
+ logger.info("Added {} articles, skipped {} invalid articles", successCount, skipCount);
+ }
+
+ public List getAll() {
+ logger.debug("Retrieving all articles, total: {}", articles.size());
+ return Collections.unmodifiableList(articles);/* 返回不可修改的列表 */
+ }
+
+ public int size() {
+ return articles.size();/* 返回文章数量 */
+ }
+
+ public void clear() {
+ int count = articles.size();/* 记录当前文章数量 */
+ articles.clear();
+ urlSet.clear();
+ logger.info("Cleared repository, removed {} articles", count);
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
new file mode 100644
index 0000000..1e23b2b
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
@@ -0,0 +1,25 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.util.ArrayList;
+import java.util.List;
+
+public class BlogStrategy implements CrawlStrategy {
+ @Override
+ public boolean supports(String url) {
+ return url.contains("blog.example.com");
+ }
+
+ @Override
+ public List parse(String url, Document doc) {
+ List articles = new ArrayList<>();
+ Elements titles = doc.select(".post-title");
+ for (Element e : titles) {
+ articles.add(new Article(e.text(), url, ""));
+ }
+ return articles;
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
new file mode 100644
index 0000000..ed69e19
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import java.util.List;
+
+public interface CrawlStrategy {
+ List parse(String url, Document doc) throws ParseException;
+ boolean supports(String url);
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
new file mode 100644
index 0000000..6892510
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
@@ -0,0 +1,77 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+
+/* HNU News 策略
+- 添加 logger 成员
+- 添加异常处理
+- 实现防御性编程 */
+public class HnuNewsStrategy implements CrawlStrategy {
+ private static final Logger logger = LoggerFactory.getLogger(HnuNewsStrategy.class);
+
+ @Override
+ public boolean supports(String url) {
+ return url.contains("news.hnu.edu.cn");/* 支持 HNU News 网站 */
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ logger.info("Starting to parse HNU News: {}", url);
+ List articles = new ArrayList<>();/* 存储储解析后的文章 */
+
+ try {
+ Elements listItems = doc.select("ul.list11 li");/* 选择文章列表项 */
+ logger.debug("Found {} list items", listItems.size());/* 记录找到的列表项数量 */
+
+ for (Element li : listItems) {
+ try {
+ Element link = li.selectFirst("a");/* 选择列表项中的链接 */
+ if (link == null) {
+ logger.warn("No link found in list item");/* 记录未找到链接 */
+ continue;
+ }
+
+ String articleUrl = link.attr("href");/* 获取链接的 href 属性值 */
+ if (!articleUrl.startsWith("http")) {
+ articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", "");/* 补全相对路径 */
+ }
+
+ String title = "";/* 存储文章标题 */
+ Element titleEl = link.selectFirst("h4.l2.h4s2");/* 选择标题元素 */
+ if (titleEl != null) {
+ title = titleEl.text().trim();/* 提取标题文本并移除首尾空格 */
+ }
+
+ String content = "";/* 存储文章内容 */
+ Element contentEl = link.selectFirst("p.l3.ps3");/* 选择内容元素 */
+ if (contentEl != null) {
+ content = contentEl.text().trim();/* 提取内容文本并移除首尾空格 */
+ }
+
+ if (!title.isEmpty()) {
+ Article article = new Article(title, articleUrl, content);/* 创建文章对象 */
+ articles.add(article);/* 将文章添加到列表 */
+ } else {
+ logger.warn("Empty title found, skipping article");
+ }
+ } catch (Exception e) {
+ logger.error("Error parsing individual article: {}", e.getMessage());
+ }
+ }
+
+ logger.info("Successfully parsed {} articles from HNU News", articles.size());
+ return articles;
+ } catch (Exception e) {
+ logger.error("Failed to parse HNU News page: {}", e.getMessage(), e);
+ throw new ParseException("Failed to parse HNU News: " + e.getMessage(), e);
+ }
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
new file mode 100644
index 0000000..f6eb4bd
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
@@ -0,0 +1,25 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.util.ArrayList;
+import java.util.List;
+
+public class NewsStrategy implements CrawlStrategy {
+ @Override
+ public boolean supports(String url) {
+ return url.contains("news.example.com");
+ }
+
+ @Override
+ public List parse(String url, Document doc) {
+ List articles = new ArrayList<>();
+ Elements items = doc.select(".article-headline");
+ for (Element e : items) {
+ articles.add(new Article(e.text(), url, ""));
+ }
+ return articles;
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java
new file mode 100644
index 0000000..eb25935
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java
@@ -0,0 +1,83 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+/* 人民网策略类 */
+public class PeopleStrategy implements CrawlStrategy {
+ private static final Logger logger = LoggerFactory.getLogger(PeopleStrategy.class);
+
+ @Override
+ public boolean supports(String url) {
+ return url.contains("people.com.cn");/* 检查URL是否包含people.com.cn */
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ logger.info("Starting to parse People's Daily News: {}", url);
+ List articles = new ArrayList<>();/* 初始化文章列表 */
+
+ try {
+ Elements newsItems = doc.select("div.w1000, div.news-item, li.list_item");/* 选择新闻容器 */
+ logger.debug("Found {} news containers", newsItems.size());
+
+ if (newsItems.isEmpty()) {
+ newsItems = doc.select("a[href*='/n1/']");/* 选择替代选择器 */
+ logger.debug("Trying alternative selector, found {} items", newsItems.size());
+ }
+
+ for (Element item : newsItems) {
+ try {
+ Element link = item.selectFirst("a");/* 选择链接元素 */
+ if (link == null) {
+ link = item.tagName().equals("a") ? item : null;/* 检查是否为链接元素 */
+ }
+
+ if (link == null) {
+ logger.warn("No link found in news item");
+ continue;
+ }
+
+ String articleUrl = link.attr("href");/* 获取链接URL */
+ if (!articleUrl.startsWith("http")) {/* 检查是否为绝对URL */
+ if (articleUrl.startsWith("/")) {
+ articleUrl = "https://www.people.com.cn" + articleUrl;
+ } else {
+ articleUrl = "https://www.people.com.cn/" + articleUrl;
+ }
+ }
+
+ String title = link.text().trim();/* 获取标题文本 */
+
+ String content = "";/* 初始化内容文本 */
+ Element contentEl = item.selectFirst("p, div.ed, div.summary");/* 选择内容元素 */
+ if (contentEl != null) {
+ content = contentEl.text().trim();/* 获取内容文本 */
+ }
+
+ if (!title.isEmpty() && title.length() > 5) {
+ Article article = new Article(title, articleUrl, content);/* 创建文章对象 */
+ articles.add(article);/* 添加文章到列表 */
+ logger.debug("Parsed article: {}", title);/* 记录解析文章 */
+ } else {
+ logger.warn("Invalid title found, skipping article");/* 记录无效标题 */
+ }
+ } catch (Exception e) {
+ logger.error("Error parsing individual article: {}", e.getMessage());
+ }
+ }
+
+ logger.info("Successfully parsed {} articles from People's Daily News", articles.size());
+ return articles;
+ } catch (Exception e) {
+ logger.error("Failed to parse People's Daily News page: {}", e.getMessage(), e);
+ throw new ParseException("Failed to parse People's Daily News: " + e.getMessage(), e);
+ }
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
new file mode 100644
index 0000000..e28aaac
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
@@ -0,0 +1,36 @@
+package com.example.datacollect.strategy;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+
+public class StrategyFactory {
+ private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class);
+ private final List strategies = new ArrayList<>();
+
+ public StrategyFactory() {
+ strategies.add(new HnuNewsStrategy());
+ strategies.add(new YouthStrategy());
+ strategies.add(new PeopleStrategy());
+ strategies.add(new BlogStrategy());
+ strategies.add(new NewsStrategy());
+ logger.info("Initialized StrategyFactory with {} strategies", strategies.size());
+ }
+
+ public CrawlStrategy getStrategy(String url) {
+ for (CrawlStrategy s : strategies) {
+ if (s.supports(url)) {
+ logger.debug("Found strategy {} for URL: {}", s.getClass().getSimpleName(), url);
+ return s;
+ }
+ }
+ logger.warn("No strategy found for URL: {}", url);
+ return null;
+ }
+
+ public void register(CrawlStrategy strategy) {
+ strategies.add(strategy);
+ logger.info("Registered new strategy: {}", strategy.getClass().getSimpleName());
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/YouthStrategy.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/YouthStrategy.java
new file mode 100644
index 0000000..2bdb8d1
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/strategy/YouthStrategy.java
@@ -0,0 +1,87 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+/* 青年网新闻解析策略*/
+public class YouthStrategy implements CrawlStrategy {
+ private static final Logger logger = LoggerFactory.getLogger(YouthStrategy.class);
+
+ @Override
+ public boolean supports(String url) {
+ return url.contains("youth.cn");/* 检查URL是否包含青年网域名 */
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ logger.info("Starting to parse Youth News: {}", url);
+ List articles = new ArrayList<>();
+
+ try {
+ Elements newsItems = doc.select("div.news-item, div.article-item, li.news-list-item");/* 选择新闻项元素 */
+ logger.debug("Found {} news items", newsItems.size());
+
+ if (newsItems.isEmpty()) {
+ newsItems = doc.select("a[href*='/n1/']");/* 选择替代选择器 */
+ logger.debug("Trying alternative selector, found {} items", newsItems.size());
+ }
+
+ for (Element item : newsItems) {
+ try {
+ Element link = item.selectFirst("a");/* 选择链接元素 */
+ if (link == null) {
+ link = item.tagName().equals("a") ? item : null;/* 检查是否为链接元素 */
+ }
+
+ if (link == null) {
+ logger.warn("No link found in news item");
+ continue;
+ }
+
+ String articleUrl = link.attr("href");/* 获取链接URL */
+
+ if (!articleUrl.startsWith("http")) {/* 检查URL是否为绝对URL */
+ if (articleUrl.startsWith("/")) {
+ articleUrl = "https://www.youth.cn" + articleUrl;
+ } else {
+ articleUrl = "https://www.youth.cn/" + articleUrl;
+ }
+ }
+
+ String title = link.text().trim();/* 获取链接文本 */
+ if (title.isEmpty()) {/* 检查标题是否为空 */
+ continue;
+ }
+
+ String content = "";/* 初始化内容为空字符串 */
+ Element contentEl = item.selectFirst("p.summary, p.desc, div.brief");/* 选择摘要元素 */
+ if (contentEl != null) {
+ content = contentEl.text().trim();/* 获取摘要文本 */
+ }
+
+ if (!title.isEmpty() && title.length() > 5) {
+ Article article = new Article(title, articleUrl, content);
+ articles.add(article);
+ logger.debug("Parsed article: {}", title);
+ } else {
+ logger.warn("Invalid title found, skipping article");
+ }
+ } catch (Exception e) {
+ logger.error("Error parsing individual article: {}", e.getMessage());
+ }
+ }
+
+ logger.info("Successfully parsed {} articles from Youth News", articles.size());
+ return articles;
+ } catch (Exception e) {
+ logger.error("Failed to parse Youth News page: {}", e.getMessage(), e);
+ throw new ParseException("Failed to parse Youth News: " + e.getMessage(), e);
+ }
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/util/RetryUtils.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/util/RetryUtils.java
new file mode 100644
index 0000000..96aee20
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/util/RetryUtils.java
@@ -0,0 +1,49 @@
+package com.example.datacollect.util;
+
+import com.example.datacollect.exception.NetworkException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.concurrent.Callable;
+
+public class RetryUtils {
+ private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class);
+
+ private static final int DEFAULT_MAX_RETRIES = 3;
+ private static final long DEFAULT_RETRY_DELAY_MS = 1000;
+
+ public static T executeWithRetry(Callable task) throws Exception {
+ return executeWithRetry(task, DEFAULT_MAX_RETRIES, DEFAULT_RETRY_DELAY_MS);
+ }
+
+ public static T executeWithRetry(Callable task, int maxRetries, long retryDelayMs) throws Exception {
+ Exception lastException = null;
+
+ for (int attempt = 0; attempt <= maxRetries; attempt++) {
+ try {
+ if (attempt > 0) {
+ logger.info("Retry attempt {}/{} for task", attempt, maxRetries);
+ Thread.sleep(retryDelayMs);
+ }
+
+ return task.call();
+ } catch (Exception e) {
+ lastException = e;
+
+ if (e instanceof NetworkException) {
+ logger.warn("Network error on attempt {}: {}", attempt, e.getMessage());
+
+ if (attempt < maxRetries) {
+ logger.info("Will retry in {} ms...", retryDelayMs);
+ continue;
+ }
+ } else {
+ logger.error("Non-retryable error: {}", e.getMessage());
+ throw e;
+ }
+ }
+ }
+
+ logger.error("All {} retry attempts failed", maxRetries + 1);
+ throw lastException;
+ }
+}
diff --git a/w11/java-cli-w11/src/main/java/com/example/datacollect/view/ConsoleView.java b/w11/java-cli-w11/src/main/java/com/example/datacollect/view/ConsoleView.java
new file mode 100644
index 0000000..4665db0
--- /dev/null
+++ b/w11/java-cli-w11/src/main/java/com/example/datacollect/view/ConsoleView.java
@@ -0,0 +1,46 @@
+package com.example.datacollect.view;
+
+import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.List;
+import java.util.Scanner;
+
+public class ConsoleView {
+ private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class);
+ private static final String ANSI_RESET = "\u001B[0m";
+ private static final String ANSI_GREEN = "\u001B[32m";
+ private static final String ANSI_RED = "\u001B[31m";
+ private static final String ANSI_BLUE = "\u001B[34m";
+
+ private final Scanner scanner = new Scanner(System.in);
+
+ public String readLine() {
+ System.out.print("> ");
+ String input = scanner.nextLine();
+ return input;/* 返回用户输入 */
+ }
+
+ public void printSuccess(String msg) {
+ System.out.println(ANSI_GREEN + msg + ANSI_RESET);
+ }
+
+ public void printError(String msg) {
+ System.out.println(ANSI_RED + msg + ANSI_RESET);
+ }
+
+ public void printInfo(String msg) {
+ System.out.println(ANSI_BLUE + msg + ANSI_RESET);
+ }
+
+ public void display(List articles) {
+ if (articles.isEmpty()) {
+ printInfo("暂无文章,请先执行 crawl。");
+ return;
+ }
+ for (int i = 0; i < articles.size(); i++) {
+ Article a = articles.get(i);
+ System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());
+ }
+ }
+}
diff --git a/w11/java-cli-w11/src/main/resources/logback.xml b/w11/java-cli-w11/src/main/resources/logback.xml
new file mode 100644
index 0000000..aa0a06b
--- /dev/null
+++ b/w11/java-cli-w11/src/main/resources/logback.xml
@@ -0,0 +1,24 @@
+
+
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/crawler.log
+
+ logs/crawler.%d{yyyy-MM-dd}.log
+ 30
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+
+
+
+
diff --git a/w11/java-cli-w11/target/classes/logback.xml b/w11/java-cli-w11/target/classes/logback.xml
new file mode 100644
index 0000000..aa0a06b
--- /dev/null
+++ b/w11/java-cli-w11/target/classes/logback.xml
@@ -0,0 +1,24 @@
+
+
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/crawler.log
+
+ logs/crawler.%d{yyyy-MM-dd}.log
+ 30
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+
+
+
+
diff --git a/w11/java-cli-w11/target/maven-archiver/pom.properties b/w11/java-cli-w11/target/maven-archiver/pom.properties
new file mode 100644
index 0000000..5c1de34
--- /dev/null
+++ b/w11/java-cli-w11/target/maven-archiver/pom.properties
@@ -0,0 +1,3 @@
+artifactId=datacollect-cli
+groupId=com.example
+version=0.1.0
diff --git a/w11/java-cli-w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/w11/java-cli-w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
new file mode 100644
index 0000000..1ead6c5
--- /dev/null
+++ b/w11/java-cli-w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
@@ -0,0 +1,22 @@
+com\example\datacollect\command\ListCommand.class
+com\example\datacollect\strategy\PeopleStrategy.class
+com\example\datacollect\command\CrawlCommand.class
+com\example\datacollect\strategy\BlogStrategy.class
+com\example\datacollect\repository\ArticleRepository.class
+com\example\datacollect\Main.class
+com\example\datacollect\view\ConsoleView.class
+com\example\datacollect\command\ExitCommand.class
+com\example\datacollect\command\HelpCommand.class
+com\example\datacollect\util\RetryUtils.class
+com\example\datacollect\strategy\NewsStrategy.class
+com\example\datacollect\command\Command.class
+com\example\datacollect\controller\CrawlerController.class
+com\example\datacollect\exception\CrawlerException.class
+com\example\datacollect\exception\NetworkException.class
+com\example\datacollect\command\AnalyzeCommand.class
+com\example\datacollect\strategy\StrategyFactory.class
+com\example\datacollect\strategy\HnuNewsStrategy.class
+com\example\datacollect\strategy\YouthStrategy.class
+com\example\datacollect\exception\ParseException.class
+com\example\datacollect\strategy\CrawlStrategy.class
+com\example\datacollect\model\Article.class
diff --git a/w11/java-cli-w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/w11/java-cli-w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
new file mode 100644
index 0000000..937e5d7
--- /dev/null
+++ b/w11/java-cli-w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
@@ -0,0 +1,22 @@
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\NewsStrategy.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\controller\CrawlerController.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\repository\ArticleRepository.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\ExitCommand.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\Command.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\Main.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\CrawlCommand.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\exception\NetworkException.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\StrategyFactory.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\BlogStrategy.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\util\RetryUtils.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\HelpCommand.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\exception\CrawlerException.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\exception\ParseException.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\model\Article.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\view\ConsoleView.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\AnalyzeCommand.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\YouthStrategy.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\ListCommand.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java
+C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\PeopleStrategy.java