package com.example.datacollect.command;

import com.example.datacollect.exception.NetworkException;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.strategy.CrawlStrategy;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.view.ConsoleView;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

public class CrawlCommand implements Command {
    private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
    private static final int MAX_RETRY = 3;
    private static final long RETRY_INTERVAL = 1000;
    private final ConsoleView view;
    private final StrategyFactory strategyFactory;

    public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
        this.view = view;
        this.strategyFactory = strategyFactory;
    }

    @Override
    public String getName() {
        return "crawl";
    }

    @Override
    public void execute(String[] args, ArticleRepository repository) {
        if (args.length < 2) {
            view.printError("用法: crawl <URL> (示例: crawl https://news.hnu.edu.cn)");
            return;
        }
        String url = args[1];
        logger.info("开始抓取URL: {}", url);

        CrawlStrategy strategy = strategyFactory.getStrategy(url);
        if (strategy == null) {
            view.printError("无适配的抓取策略: " + url);
            return;
        }

        // 重试逻辑
        int retryCount = 0;
        while (retryCount < MAX_RETRY) {
            try {
                Document doc = fetchDocumentWithRetry(url, retryCount);
                List<Article> allArticles = strategy.parse(url, doc);

                // 增量抓取核心：过滤已存在的URL
                List<Article> newArticles = allArticles.stream()
                        .filter(article -> article.getUrl() != null && !article.getUrl().isBlank())
                        .filter(article -> !repository.containsUrl(article.getUrl()))
                        .collect(Collectors.toList());

                if (newArticles.isEmpty()) {
                    view.printInfo("ℹ️  无新文章（所有URL已存在）");
                    return;
                }

                // 批量添加新文章
                repository.addAll(newArticles);
                view.printSuccess("✅ 抓取成功 | 新增: " + newArticles.size() + " 篇 | 总计解析: " + allArticles.size() + " 篇");
                return;

            } catch (NetworkException e) {
                retryCount++;
                view.printError("⚠️  网络异常（重试 " + retryCount + "/" + MAX_RETRY + "）: " + e.getMessage());
                if (retryCount >= MAX_RETRY) {
                    view.printError("❌ 抓取失败（超出最大重试次数）: " + url);
                }
                try { TimeUnit.MILLISECONDS.sleep(RETRY_INTERVAL); }
                catch (InterruptedException ie) { Thread.currentThread().interrupt(); break; }

            } catch (ParseException e) {
                view.printError("❌ 解析失败: " + e.getMessage());
                return;

            } catch (Exception e) {
                view.printError("❌ 未知异常: " + e.getMessage());
                return;
            }
        }
    }

    private Document fetchDocumentWithRetry(String url, int retryCount) throws NetworkException {
        try {
            return Jsoup.connect(url)
                    .timeout(5000)
                    .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
                    .get();
        } catch (Exception e) {
            throw new NetworkException("获取文档失败（重试" + retryCount + "）: " + url, e);
        }
    }
}