package controller; import model.CrawlResult; import model.Statistics; import model.ResultContainer; import repository.Repository; import command.Command; import command.CommandInvoker; import command.CrawlCommand; import command.RetryCommand; import strategy.CrawlStrategy; import strategy.DangDangStrategy; import strategy.WeatherStrategy; import strategy.MovieStrategy; import strategy.Train12306Strategy; import strategy.CsdnBlogStrategy; import exception.CrawlerException; import exception.NetworkException; import view.CrawlerView; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.PrintWriter; import java.util.List; public class CrawlerController { private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); private final CrawlerView view; private final CommandInvoker invoker; private final Repository dataRepository; private final Statistics statistics; public CrawlerController(CrawlerView view) { if (view == null) { throw new IllegalArgumentException("View cannot be null"); } this.view = view; this.invoker = new CommandInvoker(view); this.dataRepository = new Repository<>(CrawlResult.class); this.statistics = new Statistics<>("CrawlerController"); logger.info("CrawlerController 初始化完成"); } public ResultContainer> runDangDangCrawler() { logger.info("开始执行当当网图书爬虫"); statistics.record("dangdang_start", System.currentTimeMillis()); try { CrawlStrategy strategy = new DangDangStrategy(); Command command = new CrawlCommand(strategy, 1, 5, "dangdang_books.txt", view); Command retryCommand = new RetryCommand(command, 3, view); List results = retryCommand.execute(); logger.info("当当网爬虫执行成功,获取 {} 条数据", results.size()); return processResults(results, "dangdang_books.txt", "当当网图书"); } catch (NetworkException e) { logger.error("【断网异常】当当网爬虫网络请求失败: {}", e.getMessage()); view.showError("【断网异常】当当网爬虫网络请求失败: " + e.getMessage()); statistics.increment("dangdang_failures"); return ResultContainer.failure("【断网异常】当当网爬虫失败 - 网络连接异常: " + e.getMessage(), e); } catch (CrawlerException e) { logger.error("当当网爬虫执行失败: {}", e.getMessage()); statistics.increment("dangdang_failures"); return ResultContainer.failure("当当网爬虫失败: " + e.getMessage(), e); } } public ResultContainer> runWeatherCrawler() { logger.info("开始执行中国天气网爬虫"); statistics.record("weather_start", System.currentTimeMillis()); try { CrawlStrategy strategy = new WeatherStrategy(); Command command = new CrawlCommand(strategy, 1, 14, "weather_cities.txt", view); Command retryCommand = new RetryCommand(command, 3, view); List results = retryCommand.execute(); logger.info("中国天气网爬虫执行成功,获取 {} 条数据", results.size()); return processResults(results, "weather_cities.txt", "中国天气网"); } catch (NetworkException e) { logger.error("【断网异常】中国天气网爬虫网络请求失败: {}", e.getMessage()); view.showError("【断网异常】中国天气网爬虫网络请求失败: " + e.getMessage()); statistics.increment("weather_failures"); return ResultContainer.failure("【断网异常】中国天气网爬虫失败 - 网络连接异常: " + e.getMessage(), e); } catch (CrawlerException e) { logger.error("中国天气网爬虫执行失败: {}", e.getMessage()); statistics.increment("weather_failures"); return ResultContainer.failure("天气网爬虫失败: " + e.getMessage(), e); } } public ResultContainer> runMaoyanMovieCrawler() { logger.info("开始执行猫眼电影爬虫"); statistics.record("maoyan_start", System.currentTimeMillis()); try { CrawlStrategy strategy = new MovieStrategy(); Command command = new CrawlCommand(strategy, 1, 10, "maoyan_top100.txt", view); Command retryCommand = new RetryCommand(command, 3, view); List results = retryCommand.execute(); logger.info("猫眼电影爬虫执行成功,获取 {} 条数据", results.size()); return processResults(results, "maoyan_top100.txt", "猫眼电影"); } catch (NetworkException e) { logger.error("【断网异常】猫眼电影爬虫网络请求失败: {}", e.getMessage()); view.showError("【断网异常】猫眼电影爬虫网络请求失败: " + e.getMessage()); statistics.increment("maoyan_failures"); return ResultContainer.failure("【断网异常】猫眼电影爬虫失败 - 网络连接异常: " + e.getMessage(), e); } catch (CrawlerException e) { logger.error("猫眼电影爬虫执行失败: {}", e.getMessage()); statistics.increment("maoyan_failures"); return ResultContainer.failure("猫眼电影爬虫失败: " + e.getMessage(), e); } } public ResultContainer> runTrain12306Crawler() { logger.info("开始执行12306火车票爬虫"); statistics.record("12306_start", System.currentTimeMillis()); try { CrawlStrategy strategy = new Train12306Strategy(); Command command = new CrawlCommand(strategy, 1, 10, "train_12306.txt", view); Command retryCommand = new RetryCommand(command, 3, view); List results = retryCommand.execute(); logger.info("12306爬虫执行成功,获取 {} 条数据", results.size()); return processResults(results, "train_12306.txt", "12306火车票"); } catch (NetworkException e) { logger.error("【断网异常】12306爬虫网络请求失败: {}", e.getMessage()); view.showError("【断网异常】12306爬虫网络请求失败: " + e.getMessage()); statistics.increment("12306_failures"); return ResultContainer.failure("【断网异常】12306爬虫失败 - 网络连接异常: " + e.getMessage(), e); } catch (CrawlerException e) { logger.error("12306爬虫执行失败: {}", e.getMessage()); statistics.increment("12306_failures"); return ResultContainer.failure("12306爬虫失败: " + e.getMessage(), e); } } public ResultContainer> runCsdnBlogCrawler() { logger.info("开始执行CSDN博客爬虫"); statistics.record("csdn_start", System.currentTimeMillis()); try { CrawlStrategy strategy = new CsdnBlogStrategy(); Command command = new CrawlCommand(strategy, 1, 15, "csdn_blogs.txt", view); Command retryCommand = new RetryCommand(command, 3, view); List results = retryCommand.execute(); logger.info("CSDN博客爬虫执行成功,获取 {} 条数据", results.size()); return processResults(results, "csdn_blogs.txt", "CSDN博客"); } catch (NetworkException e) { logger.error("【断网异常】CSDN博客爬虫网络请求失败: {}", e.getMessage()); view.showError("【断网异常】CSDN博客爬虫网络请求失败: " + e.getMessage()); statistics.increment("csdn_failures"); return ResultContainer.failure("【断网异常】CSDN博客爬虫失败 - 网络连接异常: " + e.getMessage(), e); } catch (CrawlerException e) { logger.error("CSDN博客爬虫执行失败: {}", e.getMessage()); statistics.increment("csdn_failures"); return ResultContainer.failure("CSDN博客爬虫失败: " + e.getMessage(), e); } } private ResultContainer> processResults(List results, String filename, String siteName) { if (results == null || results.isEmpty()) { logger.warn("{} 爬取结果为空", siteName); return ResultContainer.failure(siteName + "爬取结果为空"); } for (CrawlResult result : results) { dataRepository.add(result); } saveToFile(results, filename); saveToJson(results, filename.replace(".txt", ".json")); statistics.record(siteName + "_count", results.size()); statistics.record(siteName + "_end", System.currentTimeMillis()); statistics.increment("total_items", results.size()); logger.info("{} 爬取完成,共 {} 条数据,已保存到 {}", siteName, results.size(), filename); return ResultContainer.success(results, siteName + "爬取完成,共 " + results.size() + " 条数据"); } public void runAllCrawlers() { logger.info("开始执行所有爬虫"); int successCount = 0; int failCount = 0; ResultContainer> result; view.showHeader("当当网图书爬虫"); result = runDangDangCrawler(); if (result.isSuccess()) { successCount++; view.showSuccess(result.getMessage()); } else { failCount++; view.showError(result.getMessage()); } view.showHeader("中国天气网爬虫"); result = runWeatherCrawler(); if (result.isSuccess()) { successCount++; view.showSuccess(result.getMessage()); } else { failCount++; view.showError(result.getMessage()); } view.showHeader("猫眼电影爬虫"); result = runMaoyanMovieCrawler(); if (result.isSuccess()) { successCount++; view.showSuccess(result.getMessage()); } else { failCount++; view.showError(result.getMessage()); } view.showHeader("12306火车票爬虫"); result = runTrain12306Crawler(); if (result.isSuccess()) { successCount++; view.showSuccess(result.getMessage()); } else { failCount++; view.showError(result.getMessage()); } view.showHeader("CSDN博客爬虫"); result = runCsdnBlogCrawler(); if (result.isSuccess()) { successCount++; view.showSuccess(result.getMessage()); } else { failCount++; view.showError(result.getMessage()); } view.showLine(); view.showMessage("所有爬虫执行完成"); view.showMessage("成功: " + successCount + " 个"); view.showMessage("失败: " + failCount + " 个"); view.showMessage("总计采集数据: " + statistics.getCount("total_items") + " 条"); statistics.record("success_count", successCount); statistics.record("fail_count", failCount); logger.info("所有爬虫执行完成,成功: {},失败: {},总计数据: {}", successCount, failCount, statistics.getCount("total_items")); } public void saveToFile(List results, String filename) { if (filename == null || filename.trim().isEmpty()) { logger.error("文件名为空,无法保存"); view.showError("文件名不能为空"); return; } try { File file = new File(filename); File parentDir = file.getParentFile(); if (parentDir != null && !parentDir.exists()) { parentDir.mkdirs(); } try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"))) { writer.println("Title,Price,OriginalPrice,Discount,ImageUrl,Author"); for (CrawlResult result : results) { if (result != null) { writer.println(result.toString()); } } } logger.info("文件保存成功: {}", filename); view.showSuccess("文件保存成功: " + filename); } catch (IOException e) { logger.error("保存文件失败: {} - {}", filename, e.getMessage()); view.showError("保存文件失败: " + filename + " (" + e.getMessage() + ")"); } } public void saveToJson(List results, String filename) { if (filename == null || filename.trim().isEmpty()) { logger.error("JSON文件名为空,无法保存"); view.showError("文件名不能为空"); return; } try { File file = new File(filename); File parentDir = file.getParentFile(); if (parentDir != null && !parentDir.exists()) { parentDir.mkdirs(); } StringBuilder json = new StringBuilder(); json.append("[\n"); for (int i = 0; i < results.size(); i++) { CrawlResult result = results.get(i); if (result != null) { json.append(" ").append(result.toJson()); if (i < results.size() - 1) { json.append(","); } json.append("\n"); } } json.append("]"); try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"))) { writer.write(json.toString()); } logger.info("JSON文件保存成功: {}", filename); view.showSuccess("JSON文件保存成功: " + filename); } catch (IOException e) { logger.error("保存JSON文件失败: {} - {}", filename, e.getMessage()); view.showError("保存JSON文件失败: " + filename + " (" + e.getMessage() + ")"); } } public Repository getDataRepository() { return dataRepository; } public Statistics getStatistics() { return statistics; } }