package com.example.datacollect.command; import com.example.datacollect.exception.CrawlerException; import com.example.datacollect.model.Article; import com.example.datacollect.repository.ArticleRepository; import com.example.datacollect.service.ScraperService; import com.example.datacollect.strategy.ArticleCrawlStrategy; import com.example.datacollect.view.ConsoleView; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.List; public class CrawlCommand implements Command { private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); private final ConsoleView view; private final ArticleRepository repository; private final ScraperService scraperService; private static final int MAX_RETRIES = 3; private static final long RETRY_DELAY_MS = 1000; public CrawlCommand(ConsoleView view, ArticleRepository repository) { this.view = view; this.repository = repository; this.scraperService = new ScraperService(new ArticleCrawlStrategy(), MAX_RETRIES, RETRY_DELAY_MS); } @Override public String getName() { return "crawl"; } @Override public void execute(String[] args, List
articles) { if (args.length < 2) { view.printError("Usage: crawl "); return; } String url = args[1]; logger.info("Starting crawl for URL: {}", url); view.printInfo("Starting to crawl: " + url); int attempts = 0; boolean success = false; CrawlerException lastException = null; while (attempts < MAX_RETRIES && !success) { attempts++; try { Article article = scraperService.scrape(url); repository.save(article); articles.add(article); logger.info("Successfully crawled article: {}", article.getTitle()); view.printSuccess("Successfully crawled: " + article.getTitle()); success = true; } catch (CrawlerException e) { lastException = e; logger.warn("Attempt {}/{} failed for URL: {}", attempts, MAX_RETRIES, url); view.printError("Attempt " + attempts + " failed: " + e.getMessage()); if (attempts < MAX_RETRIES) { try { Thread.sleep(RETRY_DELAY_MS); } catch (InterruptedException ie) { Thread.currentThread().interrupt(); logger.error("Crawl interrupted for URL: {}", url); view.printError("Crawl interrupted"); return; } } } } if (!success) { logger.error("Failed to crawl URL after {} attempts: {}", MAX_RETRIES, url); view.printError("Failed to crawl after " + MAX_RETRIES + " attempts"); if (lastException != null) { view.printError("Last error: " + lastException.getMessage()); } } } }