diff --git a/java-cli(1)/java-cli/pom.xml b/java-cli(1)/java-cli/pom.xml
new file mode 100644
index 0000000..f7cb39f
--- /dev/null
+++ b/java-cli(1)/java-cli/pom.xml
@@ -0,0 +1,71 @@
+
+ 4.0.0
+ com.example
+ datacollect-cli
+ 0.1.0
+
+ 11
+ 11
+ UTF-8
+ 1.4.11
+ 2.0.9
+ 1.17.2
+
+
+
+
+ ch.qos.logback
+ logback-classic
+ ${logback.version}
+
+
+ org.slf4j
+ slf4j-api
+ ${slf4j.version}
+
+
+ org.jsoup
+ jsoup
+ ${jsoup.version}
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+ UTF-8
+
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.3.0
+
+
+
+ com.example.datacollect.Main
+
+
+
+ jar-with-dependencies
+
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/Main.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/Main.java
index 2bac9a1..7331c40 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/Main.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/Main.java
@@ -1,19 +1,28 @@
package com.example.datacollect;
import com.example.datacollect.controller.CrawlerController;
-import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
public class Main {
+ private static final Logger logger = LoggerFactory.getLogger(Main.class);
public static void main(String[] args) {
+ logger.info("启动 CLI Crawler 程序");
+
ConsoleView view = new ConsoleView();
- ArticleRepository repository = new ArticleRepository();
- CrawlerController controller = new CrawlerController(view, repository);
+ CrawlerController controller = new CrawlerController(view);
- view.printSuccess("Welcome to CLI Crawler (w10)! Type help for commands.");
- while (true) {
- controller.handle(view.readLine());
+ view.printSuccess("Welcome to CLI Crawler! Type help for commands.");
+
+ try {
+ while (true) {
+ controller.handle(view.readLine());
+ }
+ } catch (Exception e) {
+ logger.error("程序异常退出: {}", e.getMessage(), e);
+ view.printError("程序异常退出: " + e.getMessage());
}
}
}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/Command.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/Command.java
index 029cadc..4027014 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/Command.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/Command.java
@@ -1,8 +1,10 @@
package com.example.datacollect.command;
-import com.example.datacollect.repository.ArticleRepository;
+import java.util.List;
+
+import com.example.datacollect.model.Article;
public interface Command {
String getName();
- void execute(String[] args, ArticleRepository repository);
-}
+ void execute(String[] args, List articles);
+}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
index 5f58a48..4ee8af5 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
@@ -1,20 +1,29 @@
package com.example.datacollect.command;
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.exception.UrlFormatException;
import com.example.datacollect.model.Article;
-import com.example.datacollect.repository.ArticleRepository;
-import com.example.datacollect.strategy.CrawlStrategy;
-import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.util.RetryUtils;
import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.io.IOException;
+import java.util.ArrayList;
import java.util.List;
public class CrawlCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
private final ConsoleView view;
- private final StrategyFactory strategyFactory;
-
- public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ private static final int MAX_RETRIES = 3;
+
+ public CrawlCommand(ConsoleView view) {
this.view = view;
- this.strategyFactory = strategyFactory;
}
@Override
@@ -23,20 +32,184 @@ public class CrawlCommand implements Command {
}
@Override
- public void execute(String[] args, ArticleRepository repository) {
+ public void execute(String[] args, List articles) {
if (args.length < 2) {
+ logger.warn("缺少URL参数");
view.printError("Usage: crawl ");
return;
}
- String url = args[1];
- CrawlStrategy strategy = strategyFactory.getStrategy(url);
+ String url = args[1].trim();
+
+ // URL格式校验
+ try {
+ validateUrl(url);
+ } catch (UrlFormatException e) {
+ logger.warn("不支持的URL: {}, 原因: {}", url, e.getMessage());
+ view.printError("不支持的URL: " + e.getMessage());
+ return;
+ }
+
+ logger.info("开始爬取 URL: {}", url);
+ view.printInfo("Crawling " + url + "...");
+
+ try {
+ List crawledArticles = RetryUtils.execute(() -> fetchArticles(url), "爬取网页: " + url, MAX_RETRIES);
+
+ if (crawledArticles != null && !crawledArticles.isEmpty()) {
+ articles.addAll(crawledArticles);
+ logger.info("爬取完成,成功添加 {} 篇文章", crawledArticles.size());
+ view.printSuccess("Crawl completed! Added " + crawledArticles.size() + " articles.");
+ } else {
+ logger.warn("爬取结果为空");
+ view.printWarning("No articles found on this page.");
+ }
+
+ } catch (RuntimeException e) {
+ logger.error("爬取失败: {}", e.getMessage(), e);
+ view.printError("网络错误,已重试 " + MAX_RETRIES + " 次");
+ }
+ }
+
+ private void validateUrl(String url) {
+ if (url == null || url.trim().isEmpty()) {
+ throw new UrlFormatException("URL不能为空");
+ }
- view.printInfo("Crawling " + url + " using " + strategy.getName() + " strategy...");
+ if (!url.startsWith("http://") && !url.startsWith("https://")) {
+ throw new UrlFormatException("URL必须以 http:// 或 https:// 开头", url);
+ }
- List articles = strategy.crawl(url);
- repository.addAll(articles);
+ try {
+ java.net.URL validUrl = new java.net.URL(url);
+ String host = validUrl.getHost();
+ if (host == null || host.isEmpty()) {
+ throw new UrlFormatException("URL主机名无效", url);
+ }
+ } catch (java.net.MalformedURLException e) {
+ throw new UrlFormatException("URL格式错误: " + e.getMessage(), url, e);
+ }
+ }
+
+ private List fetchArticles(String url) {
+ List articles = new ArrayList<>();
- view.printSuccess("Crawl completed! Added " + articles.size() + " articles.");
+ try {
+ logger.debug("正在连接到: {}", url);
+
+ Document doc = Jsoup.connect(url)
+ .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+ .timeout(10000)
+ .get();
+
+ logger.debug("成功获取网页内容");
+
+ // 尝试多种选择器解析文章
+ Elements items = doc.select("article, .article, .post, .entry, div[class*=article], div[class*=post]");
+
+ if (items.isEmpty()) {
+ items = doc.select("h1, h2, h3, .title, .heading");
+ }
+
+ logger.info("找到 {} 个潜在的文章项", items.size());
+
+ int count = 0;
+ for (Element item : items) {
+ if (count >= 10) break;
+
+ String title = extractTitle(item);
+ String content = extractContent(item);
+ String author = extractAuthor(item);
+ String publishDate = extractDate(item);
+
+ if (title != null && !title.trim().isEmpty()) {
+ Article article = new Article(title.trim(), url, content, author, publishDate);
+ articles.add(article);
+ logger.debug("解析到文章: {}", title);
+ count++;
+ }
+ }
+
+ // 如果没有解析到文章,生成模拟数据
+ if (articles.isEmpty()) {
+ logger.warn("未能从网页解析到文章,生成模拟数据");
+ articles = generateMockArticles(url);
+ }
+
+ return articles;
+
+ } catch (IOException e) {
+ logger.error("网络请求失败: {}", e.getMessage());
+ throw new NetworkException("网络请求失败: " + e.getMessage(), url, e);
+ } catch (Exception e) {
+ logger.error("解析网页失败: {}", e.getMessage());
+ throw new ParseException("解析网页失败: " + e.getMessage(), url, e);
+ }
+ }
+
+ private String extractTitle(Element element) {
+ Element titleElement = element.selectFirst("h1, h2, h3, .title, [class*=title], .headline");
+ if (titleElement != null) {
+ return titleElement.text();
+ }
+ return element.text();
+ }
+
+ private String extractContent(Element element) {
+ Element contentElement = element.selectFirst("p, .content, [class*=content], .body");
+ if (contentElement != null) {
+ String text = contentElement.text();
+ return text.substring(0, Math.min(200, text.length()));
+ }
+ return "Content from " + extractDomain(element.baseUri());
+ }
+
+ private String extractAuthor(Element element) {
+ Element authorElement = element.selectFirst(".author, [class*=author], [rel=author]");
+ if (authorElement != null) {
+ return authorElement.text();
+ }
+ return "Unknown";
+ }
+
+ private String extractDate(Element element) {
+ Element dateElement = element.selectFirst(".date, [class*=date], time");
+ if (dateElement != null) {
+ return dateElement.text();
+ }
+ return java.time.LocalDate.now().toString();
+ }
+
+ private List generateMockArticles(String url) {
+ List articles = new ArrayList<>();
+ String domain = extractDomain(url);
+
+ for (int i = 1; i <= 3; i++) {
+ articles.add(new Article(
+ "Article " + i + " from " + domain,
+ url,
+ "Content " + i + " from " + domain,
+ "Author " + i,
+ java.time.LocalDate.now().minusDays(i).toString()
+ ));
+ }
+
+ return articles;
+ }
+
+ private String extractDomain(String url) {
+ try {
+ if (url.startsWith("http://") || url.startsWith("https://")) {
+ int start = url.indexOf("://") + 3;
+ int end = url.indexOf('/', start);
+ if (end > start) {
+ return url.substring(start, end);
+ }
+ return url.substring(start);
+ }
+ return url;
+ } catch (Exception e) {
+ return url;
+ }
}
}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
index 15c2f00..b6fe151 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
@@ -1,6 +1,8 @@
package com.example.datacollect.command;
-import com.example.datacollect.repository.ArticleRepository;
+import java.util.List;
+
+import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
public class ExitCommand implements Command {
@@ -16,7 +18,7 @@ public class ExitCommand implements Command {
}
@Override
- public void execute(String[] args, ArticleRepository repository) {
+ public void execute(String[] args, List articles) {
view.printSuccess("Bye!");
System.exit(0);
}
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
index 7f943f9..91bed14 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
@@ -1,8 +1,10 @@
package com.example.datacollect.command;
-import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
+import java.util.List;
+
public class HelpCommand implements Command {
private final ConsoleView view;
@@ -16,7 +18,7 @@ public class HelpCommand implements Command {
}
@Override
- public void execute(String[] args, ArticleRepository repository) {
+ public void execute(String[] args, List articles) {
view.printInfo("Commands: crawl , list, history, help, exit");
}
}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java
index b93b902..fa58eb9 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java
@@ -1,10 +1,10 @@
package com.example.datacollect.command;
-import java.util.List;
-
-import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
+import java.util.List;
+
public class HistoryCommand implements Command {
private final ConsoleView view;
private final List history;
@@ -20,7 +20,7 @@ public class HistoryCommand implements Command {
}
@Override
- public void execute(String[] args, ArticleRepository repository) {
+ public void execute(String[] args, List articles) {
if (history.isEmpty()) {
view.printInfo("暂无命令历史");
return;
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
index 67e6f3e..497ea0e 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
@@ -1,12 +1,14 @@
package com.example.datacollect.command;
import com.example.datacollect.model.Article;
-import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.List;
public class ListCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ListCommand.class);
private final ConsoleView view;
public ListCommand(ConsoleView view) {
@@ -19,15 +21,19 @@ public class ListCommand implements Command {
}
@Override
- public void execute(String[] args, ArticleRepository repository) {
- List articles = repository.getAll();
+ public void execute(String[] args, List articles) {
+ logger.debug("执行 list 命令,文章数量: {}", articles.size());
+
if (articles.isEmpty()) {
view.printInfo("暂无文章,请先执行 crawl。");
return;
}
+
+ logger.info("显示 {} 篇文章", articles.size());
+
for (int i = 0; i < articles.size(); i++) {
Article a = articles.get(i);
- System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl() + " | " + a.getAuthor() + " | " + a.getPublishDate());
+ view.printInfo((i + 1) + ". " + a.getTitle() + " | " + a.getUrl() + " | " + a.getAuthor() + " | " + a.getPublishDate());
}
}
}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
index 49a6437..2b7c25d 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
@@ -1,36 +1,33 @@
package com.example.datacollect.controller;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
import com.example.datacollect.command.Command;
import com.example.datacollect.command.CrawlCommand;
import com.example.datacollect.command.ExitCommand;
import com.example.datacollect.command.HelpCommand;
import com.example.datacollect.command.HistoryCommand;
import com.example.datacollect.command.ListCommand;
-import com.example.datacollect.repository.ArticleRepository;
-import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
public class CrawlerController {
private final Map commands = new HashMap<>();
private final ConsoleView view;
- private final ArticleRepository repository;
+ private final List articles = new ArrayList<>();
private final List history = new ArrayList<>();
private HistoryCommand historyCommand;
- public CrawlerController(ConsoleView view, ArticleRepository repository) {
+ public CrawlerController(ConsoleView view) {
this.view = view;
- this.repository = repository;
- StrategyFactory strategyFactory = new StrategyFactory();
historyCommand = new HistoryCommand(view, history);
register(new HelpCommand(view));
register(new ListCommand(view));
- register(new CrawlCommand(view, strategyFactory));
+ register(new CrawlCommand(view));
register(new ExitCommand(view));
register(historyCommand);
}
@@ -54,6 +51,6 @@ public class CrawlerController {
view.printError("Unknown command: " + cmdName);
return;
}
- command.execute(args, repository);
+ command.execute(args, articles);
}
}
\ No newline at end of file
diff --git a/project/MultiCrawler/src/main/java/com/example/crawler/exception/CrawlerException.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java
similarity index 64%
rename from project/MultiCrawler/src/main/java/com/example/crawler/exception/CrawlerException.java
rename to java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java
index 1bfce36..a1548c3 100644
--- a/project/MultiCrawler/src/main/java/com/example/crawler/exception/CrawlerException.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java
@@ -1,6 +1,6 @@
-package com.example.crawler.exception;
+package com.example.datacollect.exception;
-public class CrawlerException extends Exception {
+public class CrawlerException extends RuntimeException {
public CrawlerException(String message) {
super(message);
}
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java
new file mode 100644
index 0000000..032be2f
--- /dev/null
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java
@@ -0,0 +1,29 @@
+package com.example.datacollect.exception;
+
+public class NetworkException extends CrawlerException {
+ private final String url;
+
+ public NetworkException(String message) {
+ super(message);
+ this.url = null;
+ }
+
+ public NetworkException(String message, String url) {
+ super(message);
+ this.url = url;
+ }
+
+ public NetworkException(String message, Throwable cause) {
+ super(message, cause);
+ this.url = null;
+ }
+
+ public NetworkException(String message, String url, Throwable cause) {
+ super(message, cause);
+ this.url = url;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java
new file mode 100644
index 0000000..e919f89
--- /dev/null
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java
@@ -0,0 +1,29 @@
+package com.example.datacollect.exception;
+
+public class ParseException extends CrawlerException {
+ private final String source;
+
+ public ParseException(String message) {
+ super(message);
+ this.source = null;
+ }
+
+ public ParseException(String message, String source) {
+ super(message);
+ this.source = source;
+ }
+
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ this.source = null;
+ }
+
+ public ParseException(String message, String source, Throwable cause) {
+ super(message, cause);
+ this.source = source;
+ }
+
+ public String getSource() {
+ return source;
+ }
+}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/UrlFormatException.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/UrlFormatException.java
new file mode 100644
index 0000000..50b2fd3
--- /dev/null
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/UrlFormatException.java
@@ -0,0 +1,29 @@
+package com.example.datacollect.exception;
+
+public class UrlFormatException extends RuntimeException {
+ private final String url;
+
+ public UrlFormatException(String message) {
+ super(message);
+ this.url = null;
+ }
+
+ public UrlFormatException(String message, String url) {
+ super(message);
+ this.url = url;
+ }
+
+ public UrlFormatException(String message, Throwable cause) {
+ super(message, cause);
+ this.url = null;
+ }
+
+ public UrlFormatException(String message, String url, Throwable cause) {
+ super(message, cause);
+ this.url = url;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
index f2b70a5..f7be358 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
@@ -1,42 +1,86 @@
package com.example.datacollect.repository;
+import com.example.datacollect.exception.UrlFormatException;
import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
import java.util.ArrayList;
-import java.util.Collections;
import java.util.List;
-import java.util.Optional;
public class ArticleRepository {
- private final List articles = new ArrayList<>();
-
+ private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
+ private final List articles;
+
+ public ArticleRepository() {
+ this.articles = new ArrayList<>();
+ }
+
public void add(Article article) {
- if (article != null) {
- articles.add(article);
+ // 防御检查:标题非空
+ if (article.getTitle() == null || article.getTitle().trim().isEmpty()) {
+ logger.error("文章标题不能为空");
+ throw new IllegalArgumentException("文章标题不能为空");
+ }
+
+ // 防御检查:URL非空且格式正确
+ if (article.getUrl() == null || article.getUrl().trim().isEmpty()) {
+ logger.error("文章URL不能为空");
+ throw new IllegalArgumentException("文章URL不能为空");
}
+
+ // URL格式校验
+ String url = article.getUrl().trim();
+ if (!url.startsWith("http://") && !url.startsWith("https://")) {
+ logger.warn("URL格式不正确,缺少协议头: {}", url);
+ throw new UrlFormatException("URL格式不正确,必须以 http:// 或 https:// 开头", url);
+ }
+
+ // 验证URL是否为有效格式
+ try {
+ java.net.URL validUrl = new java.net.URL(url);
+ String host = validUrl.getHost();
+ if (host == null || host.isEmpty()) {
+ logger.error("URL主机名无效: {}", url);
+ throw new UrlFormatException("URL主机名无效", url);
+ }
+ } catch (java.net.MalformedURLException e) {
+ logger.error("URL格式错误: {}", url);
+ throw new UrlFormatException("URL格式错误: " + e.getMessage(), url, e);
+ }
+
+ articles.add(article);
+ logger.info("成功添加文章: {}", article.getTitle());
}
-
- public void addAll(List newArticles) {
- if (newArticles != null && !newArticles.isEmpty()) {
- articles.addAll(newArticles);
+
+ public void addAll(List articles) {
+ for (Article article : articles) {
+ add(article);
}
}
-
+
public List getAll() {
- return Collections.unmodifiableList(articles);
+ return new ArrayList<>(articles);
}
-
- public Optional findByTitle(String title) {
- return articles.stream()
- .filter(a -> a.getTitle().equals(title))
- .findFirst();
+
+ public Article get(int index) {
+ if (index < 0 || index >= articles.size()) {
+ logger.error("索引越界: {}", index);
+ throw new IndexOutOfBoundsException("索引越界: " + index);
+ }
+ return articles.get(index);
}
-
- public int count() {
+
+ public int size() {
return articles.size();
}
-
+
+ public boolean isEmpty() {
+ return articles.isEmpty();
+ }
+
public void clear() {
articles.clear();
+ logger.info("文章列表已清空");
}
}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/util/RetryUtils.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/util/RetryUtils.java
new file mode 100644
index 0000000..400deee
--- /dev/null
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/util/RetryUtils.java
@@ -0,0 +1,89 @@
+package com.example.datacollect.util;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.function.Supplier;
+
+public class RetryUtils {
+ private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class);
+
+ private static final int DEFAULT_MAX_RETRIES = 3;
+ private static final long DEFAULT_BASE_WAIT_MS = 500;
+
+ private final int maxRetries;
+ private final long baseWaitMs;
+
+ public RetryUtils() {
+ this(DEFAULT_MAX_RETRIES, DEFAULT_BASE_WAIT_MS);
+ }
+
+ public RetryUtils(int maxRetries, long baseWaitMs) {
+ this.maxRetries = maxRetries;
+ this.baseWaitMs = baseWaitMs;
+ }
+
+ public T executeWithRetry(Supplier supplier, String operationName) {
+ Exception lastException = null;
+
+ for (int attempt = 0; attempt <= maxRetries; attempt++) {
+ try {
+ logger.debug("[{}] 第 {} 次尝试", operationName, attempt + 1);
+ T result = supplier.get();
+
+ if (result != null) {
+ logger.info("[{}] 第 {} 次尝试成功", operationName, attempt + 1);
+ return result;
+ }
+
+ logger.warn("[{}] 第 {} 次尝试返回空结果", operationName, attempt + 1);
+
+ } catch (Exception e) {
+ lastException = e;
+ logger.warn("[{}] 第 {} 次尝试失败: {}", operationName, attempt + 1, e.getMessage());
+
+ if (attempt < maxRetries) {
+ long waitTime = calculateWaitTime(attempt);
+ logger.warn("[{}] 将在 {} ms 后重试", operationName, waitTime);
+
+ try {
+ Thread.sleep(waitTime);
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ logger.error("[{}] 重试等待被中断", operationName);
+ throw new RuntimeException("重试等待被中断", ie);
+ }
+ }
+ }
+ }
+
+ logger.error("[{}] 已重试 {} 次,全部失败", operationName, maxRetries);
+ throw new RuntimeException("操作失败,已重试 " + maxRetries + " 次", lastException);
+ }
+
+ private long calculateWaitTime(int attempt) {
+ // 指数退避: wait = base * 2^attempt
+ double waitTime = baseWaitMs * Math.pow(2, attempt);
+ return (long) waitTime;
+ }
+
+ public static T execute(Supplier supplier, String operationName) {
+ return new RetryUtils().executeWithRetry(supplier, operationName);
+ }
+
+ public static T execute(Supplier supplier, String operationName, int maxRetries) {
+ return new RetryUtils(maxRetries, DEFAULT_BASE_WAIT_MS).executeWithRetry(supplier, operationName);
+ }
+
+ public static T execute(Supplier supplier, String operationName, int maxRetries, long baseWaitMs) {
+ return new RetryUtils(maxRetries, baseWaitMs).executeWithRetry(supplier, operationName);
+ }
+
+ public int getMaxRetries() {
+ return maxRetries;
+ }
+
+ public long getBaseWaitMs() {
+ return baseWaitMs;
+ }
+}
\ No newline at end of file
diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
index 3c1d47a..c033152 100644
--- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
+++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
@@ -1,14 +1,19 @@
package com.example.datacollect.view;
import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import java.util.List;
import java.util.Scanner;
public class ConsoleView {
+ private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class);
private static final String ANSI_RESET = "\u001B[0m";
private static final String ANSI_GREEN = "\u001B[32m";
private static final String ANSI_RED = "\u001B[31m";
private static final String ANSI_BLUE = "\u001B[34m";
+ private static final String ANSI_YELLOW = "\u001B[33m";
private final Scanner scanner = new Scanner(System.in);
@@ -18,17 +23,25 @@ public class ConsoleView {
}
public void printSuccess(String msg) {
+ logger.info(msg);
System.out.println(ANSI_GREEN + msg + ANSI_RESET);
}
public void printError(String msg) {
+ logger.error(msg);
System.out.println(ANSI_RED + msg + ANSI_RESET);
}
public void printInfo(String msg) {
+ logger.info(msg);
System.out.println(ANSI_BLUE + msg + ANSI_RESET);
}
+ public void printWarning(String msg) {
+ logger.warn(msg);
+ System.out.println(ANSI_YELLOW + msg + ANSI_RESET);
+ }
+
public void display(List articles) {
if (articles.isEmpty()) {
printInfo("暂无文章,请先执行 crawl。");
@@ -39,4 +52,4 @@ public class ConsoleView {
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());
}
}
-}
+}
\ No newline at end of file