From 0b9d29fd56c1f1650699a2f4f45517570f2060ad Mon Sep 17 00:00:00 2001 From: wangbo <1248863822@qq.com> Date: Wed, 20 May 2026 22:05:34 +0800 Subject: [PATCH] =?UTF-8?q?=E5=88=A0=E9=99=A4=20'w11/CrawlCommand.java.jav?= =?UTF-8?q?a'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- w11/CrawlCommand.java.java | 98 -------------------------------------- 1 file changed, 98 deletions(-) delete mode 100644 w11/CrawlCommand.java.java diff --git a/w11/CrawlCommand.java.java b/w11/CrawlCommand.java.java deleted file mode 100644 index e094185..0000000 --- a/w11/CrawlCommand.java.java +++ /dev/null @@ -1,98 +0,0 @@ -package com.example.datacollect.command; - -import com.example.datacollect.exception.NetworkException; -import com.example.datacollect.exception.ParseException; -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.strategy.CrawlStrategy; -import com.example.datacollect.strategy.StrategyFactory; -import com.example.datacollect.view.ConsoleView; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.io.IOException; - -public class CrawlCommand implements Command { - private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); - private static final int MAX_RETRIES = 3; - private static final long RETRY_DELAY_MS = 1000; - - private final ConsoleView view; - private final StrategyFactory strategyFactory; - - public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) { - this.view = view; - this.strategyFactory = strategyFactory; - } - - @Override - public String getName() { - return "crawl"; - } - - @Override - public void execute(String[] args, ArticleRepository repository) { - if (args.length < 2) { - view.printError("Usage: crawl "); - return; - } - String url = args[1]; - logger.info("Crawl command received for URL: {}", url); - - CrawlStrategy strategy = strategyFactory.getStrategy(url); - if (strategy == null) { - view.printError("No strategy found for: " + url); - logger.warn("No strategy found for URL: {}", url); - return; - } - - for (int attempt = 1; attempt <= MAX_RETRIES; attempt++) { - try { - view.printInfo("Crawling: " + url + " (attempt " + attempt + "/" + MAX_RETRIES + ")"); - logger.debug("Attempt {} to crawl {}", attempt, url); - - Document doc = Jsoup.connect(url) - .userAgent("Mozilla/5.0") - .timeout(10000) - .get(); - - var articles = strategy.parse(url, doc); - - for (var article : articles) { - repository.add(article); - logger.debug("Added article: {}", article.getTitle()); - } - - view.printSuccess("Crawled " + articles.size() + " articles."); - logger.info("Successfully crawled {} articles from {}", articles.size(), url); - return; // 成功,退出重试循环 - - } catch (ParseException e) { - logger.error("Parse error for URL {}: {}", url, e.getMessage(), e); - view.printError("Failed to parse: " + e.getMessage()); - return; // 解析错误不重试,直接退出 - - } catch (IOException e) { - logger.warn("Network error for URL {} (attempt {}/{}): {}", url, attempt, MAX_RETRIES, e.getMessage()); - - if (attempt == MAX_RETRIES) { - view.printError("Failed to crawl after " + MAX_RETRIES + " attempts: " + e.getMessage()); - } else { - view.printWarning("Network error, retrying in " + RETRY_DELAY_MS + "ms..."); - try { - Thread.sleep(RETRY_DELAY_MS); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - view.printError("Retry interrupted"); - return; - } - } - } catch (Exception e) { - logger.error("Unexpected error crawling {}: {}", url, e.getMessage(), e); - view.printError("Unexpected error: " + e.getMessage()); - return; - } - } - } -} \ No newline at end of file