diff --git a/java-cli(1)/java-cli/pom.xml b/java-cli(1)/java-cli/pom.xml new file mode 100644 index 0000000..f7cb39f --- /dev/null +++ b/java-cli(1)/java-cli/pom.xml @@ -0,0 +1,71 @@ + + 4.0.0 + com.example + datacollect-cli + 0.1.0 + + 11 + 11 + UTF-8 + 1.4.11 + 2.0.9 + 1.17.2 + + + + + ch.qos.logback + logback-classic + ${logback.version} + + + org.slf4j + slf4j-api + ${slf4j.version} + + + org.jsoup + jsoup + ${jsoup.version} + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + UTF-8 + + + + org.apache.maven.plugins + maven-assembly-plugin + 3.3.0 + + + + com.example.datacollect.Main + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/Main.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/Main.java index 2bac9a1..7331c40 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/Main.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/Main.java @@ -1,19 +1,28 @@ package com.example.datacollect; import com.example.datacollect.controller.CrawlerController; -import com.example.datacollect.repository.ArticleRepository; import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class Main { + private static final Logger logger = LoggerFactory.getLogger(Main.class); public static void main(String[] args) { + logger.info("启动 CLI Crawler 程序"); + ConsoleView view = new ConsoleView(); - ArticleRepository repository = new ArticleRepository(); - CrawlerController controller = new CrawlerController(view, repository); + CrawlerController controller = new CrawlerController(view); - view.printSuccess("Welcome to CLI Crawler (w10)! Type help for commands."); - while (true) { - controller.handle(view.readLine()); + view.printSuccess("Welcome to CLI Crawler! Type help for commands."); + + try { + while (true) { + controller.handle(view.readLine()); + } + } catch (Exception e) { + logger.error("程序异常退出: {}", e.getMessage(), e); + view.printError("程序异常退出: " + e.getMessage()); } } } \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/Command.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/Command.java index 029cadc..4027014 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/Command.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/Command.java @@ -1,8 +1,10 @@ package com.example.datacollect.command; -import com.example.datacollect.repository.ArticleRepository; +import java.util.List; + +import com.example.datacollect.model.Article; public interface Command { String getName(); - void execute(String[] args, ArticleRepository repository); -} + void execute(String[] args, List
articles); +} \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java index 5f58a48..4ee8af5 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java @@ -1,20 +1,29 @@ package com.example.datacollect.command; +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.exception.UrlFormatException; import com.example.datacollect.model.Article; -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.strategy.CrawlStrategy; -import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.util.RetryUtils; import com.example.datacollect.view.ConsoleView; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.ArrayList; import java.util.List; public class CrawlCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); private final ConsoleView view; - private final StrategyFactory strategyFactory; - - public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) { + private static final int MAX_RETRIES = 3; + + public CrawlCommand(ConsoleView view) { this.view = view; - this.strategyFactory = strategyFactory; } @Override @@ -23,20 +32,184 @@ public class CrawlCommand implements Command { } @Override - public void execute(String[] args, ArticleRepository repository) { + public void execute(String[] args, List
articles) { if (args.length < 2) { + logger.warn("缺少URL参数"); view.printError("Usage: crawl "); return; } - String url = args[1]; - CrawlStrategy strategy = strategyFactory.getStrategy(url); + String url = args[1].trim(); + + // URL格式校验 + try { + validateUrl(url); + } catch (UrlFormatException e) { + logger.warn("不支持的URL: {}, 原因: {}", url, e.getMessage()); + view.printError("不支持的URL: " + e.getMessage()); + return; + } + + logger.info("开始爬取 URL: {}", url); + view.printInfo("Crawling " + url + "..."); + + try { + List
crawledArticles = RetryUtils.execute(() -> fetchArticles(url), "爬取网页: " + url, MAX_RETRIES); + + if (crawledArticles != null && !crawledArticles.isEmpty()) { + articles.addAll(crawledArticles); + logger.info("爬取完成,成功添加 {} 篇文章", crawledArticles.size()); + view.printSuccess("Crawl completed! Added " + crawledArticles.size() + " articles."); + } else { + logger.warn("爬取结果为空"); + view.printWarning("No articles found on this page."); + } + + } catch (RuntimeException e) { + logger.error("爬取失败: {}", e.getMessage(), e); + view.printError("网络错误,已重试 " + MAX_RETRIES + " 次"); + } + } + + private void validateUrl(String url) { + if (url == null || url.trim().isEmpty()) { + throw new UrlFormatException("URL不能为空"); + } - view.printInfo("Crawling " + url + " using " + strategy.getName() + " strategy..."); + if (!url.startsWith("http://") && !url.startsWith("https://")) { + throw new UrlFormatException("URL必须以 http:// 或 https:// 开头", url); + } - List
articles = strategy.crawl(url); - repository.addAll(articles); + try { + java.net.URL validUrl = new java.net.URL(url); + String host = validUrl.getHost(); + if (host == null || host.isEmpty()) { + throw new UrlFormatException("URL主机名无效", url); + } + } catch (java.net.MalformedURLException e) { + throw new UrlFormatException("URL格式错误: " + e.getMessage(), url, e); + } + } + + private List
fetchArticles(String url) { + List
articles = new ArrayList<>(); - view.printSuccess("Crawl completed! Added " + articles.size() + " articles."); + try { + logger.debug("正在连接到: {}", url); + + Document doc = Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + .timeout(10000) + .get(); + + logger.debug("成功获取网页内容"); + + // 尝试多种选择器解析文章 + Elements items = doc.select("article, .article, .post, .entry, div[class*=article], div[class*=post]"); + + if (items.isEmpty()) { + items = doc.select("h1, h2, h3, .title, .heading"); + } + + logger.info("找到 {} 个潜在的文章项", items.size()); + + int count = 0; + for (Element item : items) { + if (count >= 10) break; + + String title = extractTitle(item); + String content = extractContent(item); + String author = extractAuthor(item); + String publishDate = extractDate(item); + + if (title != null && !title.trim().isEmpty()) { + Article article = new Article(title.trim(), url, content, author, publishDate); + articles.add(article); + logger.debug("解析到文章: {}", title); + count++; + } + } + + // 如果没有解析到文章,生成模拟数据 + if (articles.isEmpty()) { + logger.warn("未能从网页解析到文章,生成模拟数据"); + articles = generateMockArticles(url); + } + + return articles; + + } catch (IOException e) { + logger.error("网络请求失败: {}", e.getMessage()); + throw new NetworkException("网络请求失败: " + e.getMessage(), url, e); + } catch (Exception e) { + logger.error("解析网页失败: {}", e.getMessage()); + throw new ParseException("解析网页失败: " + e.getMessage(), url, e); + } + } + + private String extractTitle(Element element) { + Element titleElement = element.selectFirst("h1, h2, h3, .title, [class*=title], .headline"); + if (titleElement != null) { + return titleElement.text(); + } + return element.text(); + } + + private String extractContent(Element element) { + Element contentElement = element.selectFirst("p, .content, [class*=content], .body"); + if (contentElement != null) { + String text = contentElement.text(); + return text.substring(0, Math.min(200, text.length())); + } + return "Content from " + extractDomain(element.baseUri()); + } + + private String extractAuthor(Element element) { + Element authorElement = element.selectFirst(".author, [class*=author], [rel=author]"); + if (authorElement != null) { + return authorElement.text(); + } + return "Unknown"; + } + + private String extractDate(Element element) { + Element dateElement = element.selectFirst(".date, [class*=date], time"); + if (dateElement != null) { + return dateElement.text(); + } + return java.time.LocalDate.now().toString(); + } + + private List
generateMockArticles(String url) { + List
articles = new ArrayList<>(); + String domain = extractDomain(url); + + for (int i = 1; i <= 3; i++) { + articles.add(new Article( + "Article " + i + " from " + domain, + url, + "Content " + i + " from " + domain, + "Author " + i, + java.time.LocalDate.now().minusDays(i).toString() + )); + } + + return articles; + } + + private String extractDomain(String url) { + try { + if (url.startsWith("http://") || url.startsWith("https://")) { + int start = url.indexOf("://") + 3; + int end = url.indexOf('/', start); + if (end > start) { + return url.substring(start, end); + } + return url.substring(start); + } + return url; + } catch (Exception e) { + return url; + } } } \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java index 15c2f00..b6fe151 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java @@ -1,6 +1,8 @@ package com.example.datacollect.command; -import com.example.datacollect.repository.ArticleRepository; +import java.util.List; + +import com.example.datacollect.model.Article; import com.example.datacollect.view.ConsoleView; public class ExitCommand implements Command { @@ -16,7 +18,7 @@ public class ExitCommand implements Command { } @Override - public void execute(String[] args, ArticleRepository repository) { + public void execute(String[] args, List
articles) { view.printSuccess("Bye!"); System.exit(0); } diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java index 7f943f9..91bed14 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java @@ -1,8 +1,10 @@ package com.example.datacollect.command; -import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.model.Article; import com.example.datacollect.view.ConsoleView; +import java.util.List; + public class HelpCommand implements Command { private final ConsoleView view; @@ -16,7 +18,7 @@ public class HelpCommand implements Command { } @Override - public void execute(String[] args, ArticleRepository repository) { + public void execute(String[] args, List
articles) { view.printInfo("Commands: crawl , list, history, help, exit"); } } \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java index b93b902..fa58eb9 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java @@ -1,10 +1,10 @@ package com.example.datacollect.command; -import java.util.List; - -import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.model.Article; import com.example.datacollect.view.ConsoleView; +import java.util.List; + public class HistoryCommand implements Command { private final ConsoleView view; private final List history; @@ -20,7 +20,7 @@ public class HistoryCommand implements Command { } @Override - public void execute(String[] args, ArticleRepository repository) { + public void execute(String[] args, List
articles) { if (history.isEmpty()) { view.printInfo("暂无命令历史"); return; diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java index 67e6f3e..497ea0e 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java @@ -1,12 +1,14 @@ package com.example.datacollect.command; import com.example.datacollect.model.Article; -import com.example.datacollect.repository.ArticleRepository; import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.List; public class ListCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); private final ConsoleView view; public ListCommand(ConsoleView view) { @@ -19,15 +21,19 @@ public class ListCommand implements Command { } @Override - public void execute(String[] args, ArticleRepository repository) { - List
articles = repository.getAll(); + public void execute(String[] args, List
articles) { + logger.debug("执行 list 命令,文章数量: {}", articles.size()); + if (articles.isEmpty()) { view.printInfo("暂无文章,请先执行 crawl。"); return; } + + logger.info("显示 {} 篇文章", articles.size()); + for (int i = 0; i < articles.size(); i++) { Article a = articles.get(i); - System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl() + " | " + a.getAuthor() + " | " + a.getPublishDate()); + view.printInfo((i + 1) + ". " + a.getTitle() + " | " + a.getUrl() + " | " + a.getAuthor() + " | " + a.getPublishDate()); } } } \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java index 49a6437..2b7c25d 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java @@ -1,36 +1,33 @@ package com.example.datacollect.controller; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + import com.example.datacollect.command.Command; import com.example.datacollect.command.CrawlCommand; import com.example.datacollect.command.ExitCommand; import com.example.datacollect.command.HelpCommand; import com.example.datacollect.command.HistoryCommand; import com.example.datacollect.command.ListCommand; -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.model.Article; import com.example.datacollect.view.ConsoleView; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - public class CrawlerController { private final Map commands = new HashMap<>(); private final ConsoleView view; - private final ArticleRepository repository; + private final List
articles = new ArrayList<>(); private final List history = new ArrayList<>(); private HistoryCommand historyCommand; - public CrawlerController(ConsoleView view, ArticleRepository repository) { + public CrawlerController(ConsoleView view) { this.view = view; - this.repository = repository; - StrategyFactory strategyFactory = new StrategyFactory(); historyCommand = new HistoryCommand(view, history); register(new HelpCommand(view)); register(new ListCommand(view)); - register(new CrawlCommand(view, strategyFactory)); + register(new CrawlCommand(view)); register(new ExitCommand(view)); register(historyCommand); } @@ -54,6 +51,6 @@ public class CrawlerController { view.printError("Unknown command: " + cmdName); return; } - command.execute(args, repository); + command.execute(args, articles); } } \ No newline at end of file diff --git a/project/MultiCrawler/src/main/java/com/example/crawler/exception/CrawlerException.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java similarity index 64% rename from project/MultiCrawler/src/main/java/com/example/crawler/exception/CrawlerException.java rename to java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java index 1bfce36..a1548c3 100644 --- a/project/MultiCrawler/src/main/java/com/example/crawler/exception/CrawlerException.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java @@ -1,6 +1,6 @@ -package com.example.crawler.exception; +package com.example.datacollect.exception; -public class CrawlerException extends Exception { +public class CrawlerException extends RuntimeException { public CrawlerException(String message) { super(message); } diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java new file mode 100644 index 0000000..032be2f --- /dev/null +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java @@ -0,0 +1,29 @@ +package com.example.datacollect.exception; + +public class NetworkException extends CrawlerException { + private final String url; + + public NetworkException(String message) { + super(message); + this.url = null; + } + + public NetworkException(String message, String url) { + super(message); + this.url = url; + } + + public NetworkException(String message, Throwable cause) { + super(message, cause); + this.url = null; + } + + public NetworkException(String message, String url, Throwable cause) { + super(message, cause); + this.url = url; + } + + public String getUrl() { + return url; + } +} \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java new file mode 100644 index 0000000..e919f89 --- /dev/null +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java @@ -0,0 +1,29 @@ +package com.example.datacollect.exception; + +public class ParseException extends CrawlerException { + private final String source; + + public ParseException(String message) { + super(message); + this.source = null; + } + + public ParseException(String message, String source) { + super(message); + this.source = source; + } + + public ParseException(String message, Throwable cause) { + super(message, cause); + this.source = null; + } + + public ParseException(String message, String source, Throwable cause) { + super(message, cause); + this.source = source; + } + + public String getSource() { + return source; + } +} \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/UrlFormatException.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/UrlFormatException.java new file mode 100644 index 0000000..50b2fd3 --- /dev/null +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/exception/UrlFormatException.java @@ -0,0 +1,29 @@ +package com.example.datacollect.exception; + +public class UrlFormatException extends RuntimeException { + private final String url; + + public UrlFormatException(String message) { + super(message); + this.url = null; + } + + public UrlFormatException(String message, String url) { + super(message); + this.url = url; + } + + public UrlFormatException(String message, Throwable cause) { + super(message, cause); + this.url = null; + } + + public UrlFormatException(String message, String url, Throwable cause) { + super(message, cause); + this.url = url; + } + + public String getUrl() { + return url; + } +} \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java index f2b70a5..f7be358 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java @@ -1,42 +1,86 @@ package com.example.datacollect.repository; +import com.example.datacollect.exception.UrlFormatException; import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import java.util.ArrayList; -import java.util.Collections; import java.util.List; -import java.util.Optional; public class ArticleRepository { - private final List
articles = new ArrayList<>(); - + private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); + private final List
articles; + + public ArticleRepository() { + this.articles = new ArrayList<>(); + } + public void add(Article article) { - if (article != null) { - articles.add(article); + // 防御检查:标题非空 + if (article.getTitle() == null || article.getTitle().trim().isEmpty()) { + logger.error("文章标题不能为空"); + throw new IllegalArgumentException("文章标题不能为空"); + } + + // 防御检查:URL非空且格式正确 + if (article.getUrl() == null || article.getUrl().trim().isEmpty()) { + logger.error("文章URL不能为空"); + throw new IllegalArgumentException("文章URL不能为空"); } + + // URL格式校验 + String url = article.getUrl().trim(); + if (!url.startsWith("http://") && !url.startsWith("https://")) { + logger.warn("URL格式不正确,缺少协议头: {}", url); + throw new UrlFormatException("URL格式不正确,必须以 http:// 或 https:// 开头", url); + } + + // 验证URL是否为有效格式 + try { + java.net.URL validUrl = new java.net.URL(url); + String host = validUrl.getHost(); + if (host == null || host.isEmpty()) { + logger.error("URL主机名无效: {}", url); + throw new UrlFormatException("URL主机名无效", url); + } + } catch (java.net.MalformedURLException e) { + logger.error("URL格式错误: {}", url); + throw new UrlFormatException("URL格式错误: " + e.getMessage(), url, e); + } + + articles.add(article); + logger.info("成功添加文章: {}", article.getTitle()); } - - public void addAll(List
newArticles) { - if (newArticles != null && !newArticles.isEmpty()) { - articles.addAll(newArticles); + + public void addAll(List
articles) { + for (Article article : articles) { + add(article); } } - + public List
getAll() { - return Collections.unmodifiableList(articles); + return new ArrayList<>(articles); } - - public Optional
findByTitle(String title) { - return articles.stream() - .filter(a -> a.getTitle().equals(title)) - .findFirst(); + + public Article get(int index) { + if (index < 0 || index >= articles.size()) { + logger.error("索引越界: {}", index); + throw new IndexOutOfBoundsException("索引越界: " + index); + } + return articles.get(index); } - - public int count() { + + public int size() { return articles.size(); } - + + public boolean isEmpty() { + return articles.isEmpty(); + } + public void clear() { articles.clear(); + logger.info("文章列表已清空"); } } \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/util/RetryUtils.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/util/RetryUtils.java new file mode 100644 index 0000000..400deee --- /dev/null +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/util/RetryUtils.java @@ -0,0 +1,89 @@ +package com.example.datacollect.util; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.function.Supplier; + +public class RetryUtils { + private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class); + + private static final int DEFAULT_MAX_RETRIES = 3; + private static final long DEFAULT_BASE_WAIT_MS = 500; + + private final int maxRetries; + private final long baseWaitMs; + + public RetryUtils() { + this(DEFAULT_MAX_RETRIES, DEFAULT_BASE_WAIT_MS); + } + + public RetryUtils(int maxRetries, long baseWaitMs) { + this.maxRetries = maxRetries; + this.baseWaitMs = baseWaitMs; + } + + public T executeWithRetry(Supplier supplier, String operationName) { + Exception lastException = null; + + for (int attempt = 0; attempt <= maxRetries; attempt++) { + try { + logger.debug("[{}] 第 {} 次尝试", operationName, attempt + 1); + T result = supplier.get(); + + if (result != null) { + logger.info("[{}] 第 {} 次尝试成功", operationName, attempt + 1); + return result; + } + + logger.warn("[{}] 第 {} 次尝试返回空结果", operationName, attempt + 1); + + } catch (Exception e) { + lastException = e; + logger.warn("[{}] 第 {} 次尝试失败: {}", operationName, attempt + 1, e.getMessage()); + + if (attempt < maxRetries) { + long waitTime = calculateWaitTime(attempt); + logger.warn("[{}] 将在 {} ms 后重试", operationName, waitTime); + + try { + Thread.sleep(waitTime); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + logger.error("[{}] 重试等待被中断", operationName); + throw new RuntimeException("重试等待被中断", ie); + } + } + } + } + + logger.error("[{}] 已重试 {} 次,全部失败", operationName, maxRetries); + throw new RuntimeException("操作失败,已重试 " + maxRetries + " 次", lastException); + } + + private long calculateWaitTime(int attempt) { + // 指数退避: wait = base * 2^attempt + double waitTime = baseWaitMs * Math.pow(2, attempt); + return (long) waitTime; + } + + public static T execute(Supplier supplier, String operationName) { + return new RetryUtils().executeWithRetry(supplier, operationName); + } + + public static T execute(Supplier supplier, String operationName, int maxRetries) { + return new RetryUtils(maxRetries, DEFAULT_BASE_WAIT_MS).executeWithRetry(supplier, operationName); + } + + public static T execute(Supplier supplier, String operationName, int maxRetries, long baseWaitMs) { + return new RetryUtils(maxRetries, baseWaitMs).executeWithRetry(supplier, operationName); + } + + public int getMaxRetries() { + return maxRetries; + } + + public long getBaseWaitMs() { + return baseWaitMs; + } +} \ No newline at end of file diff --git a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java index 3c1d47a..c033152 100644 --- a/java-cli(1)/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java +++ b/java-cli(1)/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java @@ -1,14 +1,19 @@ package com.example.datacollect.view; import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + import java.util.List; import java.util.Scanner; public class ConsoleView { + private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class); private static final String ANSI_RESET = "\u001B[0m"; private static final String ANSI_GREEN = "\u001B[32m"; private static final String ANSI_RED = "\u001B[31m"; private static final String ANSI_BLUE = "\u001B[34m"; + private static final String ANSI_YELLOW = "\u001B[33m"; private final Scanner scanner = new Scanner(System.in); @@ -18,17 +23,25 @@ public class ConsoleView { } public void printSuccess(String msg) { + logger.info(msg); System.out.println(ANSI_GREEN + msg + ANSI_RESET); } public void printError(String msg) { + logger.error(msg); System.out.println(ANSI_RED + msg + ANSI_RESET); } public void printInfo(String msg) { + logger.info(msg); System.out.println(ANSI_BLUE + msg + ANSI_RESET); } + public void printWarning(String msg) { + logger.warn(msg); + System.out.println(ANSI_YELLOW + msg + ANSI_RESET); + } + public void display(List
articles) { if (articles.isEmpty()) { printInfo("暂无文章,请先执行 crawl。"); @@ -39,4 +52,4 @@ public class ConsoleView { System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); } } -} +} \ No newline at end of file