You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 

331 lines
14 KiB

package controller;
import model.CrawlResult;
import model.Statistics;
import model.ResultContainer;
import repository.Repository;
import command.Command;
import command.CommandInvoker;
import command.CrawlCommand;
import command.RetryCommand;
import strategy.CrawlStrategy;
import strategy.DangDangStrategy;
import strategy.WeatherStrategy;
import strategy.MovieStrategy;
import strategy.Train12306Strategy;
import strategy.CsdnBlogStrategy;
import exception.CrawlerException;
import exception.NetworkException;
import view.CrawlerView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.util.List;
public class CrawlerController {
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class);
private final CrawlerView view;
private final CommandInvoker invoker;
private final Repository<CrawlResult> dataRepository;
private final Statistics<String> statistics;
public CrawlerController(CrawlerView view) {
if (view == null) {
throw new IllegalArgumentException("View cannot be null");
}
this.view = view;
this.invoker = new CommandInvoker(view);
this.dataRepository = new Repository<>(CrawlResult.class);
this.statistics = new Statistics<>("CrawlerController");
logger.info("CrawlerController 初始化完成");
}
public ResultContainer<List<CrawlResult>> runDangDangCrawler() {
logger.info("开始执行当当网图书爬虫");
statistics.record("dangdang_start", System.currentTimeMillis());
try {
CrawlStrategy strategy = new DangDangStrategy();
Command command = new CrawlCommand(strategy, 1, 5, "dangdang_books.txt", view);
Command retryCommand = new RetryCommand(command, 3, view);
List<CrawlResult> results = retryCommand.execute();
logger.info("当当网爬虫执行成功,获取 {} 条数据", results.size());
return processResults(results, "dangdang_books.txt", "当当网图书");
} catch (NetworkException e) {
logger.error("【断网异常】当当网爬虫网络请求失败: {}", e.getMessage());
view.showError("【断网异常】当当网爬虫网络请求失败: " + e.getMessage());
statistics.increment("dangdang_failures");
return ResultContainer.failure("【断网异常】当当网爬虫失败 - 网络连接异常: " + e.getMessage(), e);
} catch (CrawlerException e) {
logger.error("当当网爬虫执行失败: {}", e.getMessage());
statistics.increment("dangdang_failures");
return ResultContainer.failure("当当网爬虫失败: " + e.getMessage(), e);
}
}
public ResultContainer<List<CrawlResult>> runWeatherCrawler() {
logger.info("开始执行中国天气网爬虫");
statistics.record("weather_start", System.currentTimeMillis());
try {
CrawlStrategy strategy = new WeatherStrategy();
Command command = new CrawlCommand(strategy, 1, 14, "weather_cities.txt", view);
Command retryCommand = new RetryCommand(command, 3, view);
List<CrawlResult> results = retryCommand.execute();
logger.info("中国天气网爬虫执行成功,获取 {} 条数据", results.size());
return processResults(results, "weather_cities.txt", "中国天气网");
} catch (NetworkException e) {
logger.error("【断网异常】中国天气网爬虫网络请求失败: {}", e.getMessage());
view.showError("【断网异常】中国天气网爬虫网络请求失败: " + e.getMessage());
statistics.increment("weather_failures");
return ResultContainer.failure("【断网异常】中国天气网爬虫失败 - 网络连接异常: " + e.getMessage(), e);
} catch (CrawlerException e) {
logger.error("中国天气网爬虫执行失败: {}", e.getMessage());
statistics.increment("weather_failures");
return ResultContainer.failure("天气网爬虫失败: " + e.getMessage(), e);
}
}
public ResultContainer<List<CrawlResult>> runMaoyanMovieCrawler() {
logger.info("开始执行猫眼电影爬虫");
statistics.record("maoyan_start", System.currentTimeMillis());
try {
CrawlStrategy strategy = new MovieStrategy();
Command command = new CrawlCommand(strategy, 1, 10, "maoyan_top100.txt", view);
Command retryCommand = new RetryCommand(command, 3, view);
List<CrawlResult> results = retryCommand.execute();
logger.info("猫眼电影爬虫执行成功,获取 {} 条数据", results.size());
return processResults(results, "maoyan_top100.txt", "猫眼电影");
} catch (NetworkException e) {
logger.error("【断网异常】猫眼电影爬虫网络请求失败: {}", e.getMessage());
view.showError("【断网异常】猫眼电影爬虫网络请求失败: " + e.getMessage());
statistics.increment("maoyan_failures");
return ResultContainer.failure("【断网异常】猫眼电影爬虫失败 - 网络连接异常: " + e.getMessage(), e);
} catch (CrawlerException e) {
logger.error("猫眼电影爬虫执行失败: {}", e.getMessage());
statistics.increment("maoyan_failures");
return ResultContainer.failure("猫眼电影爬虫失败: " + e.getMessage(), e);
}
}
public ResultContainer<List<CrawlResult>> runTrain12306Crawler() {
logger.info("开始执行12306火车票爬虫");
statistics.record("12306_start", System.currentTimeMillis());
try {
CrawlStrategy strategy = new Train12306Strategy();
Command command = new CrawlCommand(strategy, 1, 10, "train_12306.txt", view);
Command retryCommand = new RetryCommand(command, 3, view);
List<CrawlResult> results = retryCommand.execute();
logger.info("12306爬虫执行成功,获取 {} 条数据", results.size());
return processResults(results, "train_12306.txt", "12306火车票");
} catch (NetworkException e) {
logger.error("【断网异常】12306爬虫网络请求失败: {}", e.getMessage());
view.showError("【断网异常】12306爬虫网络请求失败: " + e.getMessage());
statistics.increment("12306_failures");
return ResultContainer.failure("【断网异常】12306爬虫失败 - 网络连接异常: " + e.getMessage(), e);
} catch (CrawlerException e) {
logger.error("12306爬虫执行失败: {}", e.getMessage());
statistics.increment("12306_failures");
return ResultContainer.failure("12306爬虫失败: " + e.getMessage(), e);
}
}
public ResultContainer<List<CrawlResult>> runCsdnBlogCrawler() {
logger.info("开始执行CSDN博客爬虫");
statistics.record("csdn_start", System.currentTimeMillis());
try {
CrawlStrategy strategy = new CsdnBlogStrategy();
Command command = new CrawlCommand(strategy, 1, 15, "csdn_blogs.txt", view);
Command retryCommand = new RetryCommand(command, 3, view);
List<CrawlResult> results = retryCommand.execute();
logger.info("CSDN博客爬虫执行成功,获取 {} 条数据", results.size());
return processResults(results, "csdn_blogs.txt", "CSDN博客");
} catch (NetworkException e) {
logger.error("【断网异常】CSDN博客爬虫网络请求失败: {}", e.getMessage());
view.showError("【断网异常】CSDN博客爬虫网络请求失败: " + e.getMessage());
statistics.increment("csdn_failures");
return ResultContainer.failure("【断网异常】CSDN博客爬虫失败 - 网络连接异常: " + e.getMessage(), e);
} catch (CrawlerException e) {
logger.error("CSDN博客爬虫执行失败: {}", e.getMessage());
statistics.increment("csdn_failures");
return ResultContainer.failure("CSDN博客爬虫失败: " + e.getMessage(), e);
}
}
private ResultContainer<List<CrawlResult>> processResults(List<CrawlResult> results,
String filename, String siteName) {
if (results == null || results.isEmpty()) {
logger.warn("{} 爬取结果为空", siteName);
return ResultContainer.failure(siteName + "爬取结果为空");
}
for (CrawlResult result : results) {
dataRepository.add(result);
}
saveToFile(results, filename);
saveToJson(results, filename.replace(".txt", ".json"));
statistics.record(siteName + "_count", results.size());
statistics.record(siteName + "_end", System.currentTimeMillis());
statistics.increment("total_items", results.size());
logger.info("{} 爬取完成,共 {} 条数据,已保存到 {}", siteName, results.size(), filename);
return ResultContainer.success(results, siteName + "爬取完成,共 " + results.size() + " 条数据");
}
public void runAllCrawlers() {
logger.info("开始执行所有爬虫");
int successCount = 0;
int failCount = 0;
ResultContainer<List<CrawlResult>> result;
view.showHeader("当当网图书爬虫");
result = runDangDangCrawler();
if (result.isSuccess()) {
successCount++;
view.showSuccess(result.getMessage());
} else {
failCount++;
view.showError(result.getMessage());
}
view.showHeader("中国天气网爬虫");
result = runWeatherCrawler();
if (result.isSuccess()) {
successCount++;
view.showSuccess(result.getMessage());
} else {
failCount++;
view.showError(result.getMessage());
}
view.showHeader("猫眼电影爬虫");
result = runMaoyanMovieCrawler();
if (result.isSuccess()) {
successCount++;
view.showSuccess(result.getMessage());
} else {
failCount++;
view.showError(result.getMessage());
}
view.showHeader("12306火车票爬虫");
result = runTrain12306Crawler();
if (result.isSuccess()) {
successCount++;
view.showSuccess(result.getMessage());
} else {
failCount++;
view.showError(result.getMessage());
}
view.showHeader("CSDN博客爬虫");
result = runCsdnBlogCrawler();
if (result.isSuccess()) {
successCount++;
view.showSuccess(result.getMessage());
} else {
failCount++;
view.showError(result.getMessage());
}
view.showLine();
view.showMessage("所有爬虫执行完成");
view.showMessage("成功: " + successCount + " 个");
view.showMessage("失败: " + failCount + " 个");
view.showMessage("总计采集数据: " + statistics.getCount("total_items") + " 条");
statistics.record("success_count", successCount);
statistics.record("fail_count", failCount);
logger.info("所有爬虫执行完成,成功: {},失败: {},总计数据: {}",
successCount, failCount, statistics.getCount("total_items"));
}
public void saveToFile(List<CrawlResult> results, String filename) {
if (filename == null || filename.trim().isEmpty()) {
logger.error("文件名为空,无法保存");
view.showError("文件名不能为空");
return;
}
try {
File file = new File(filename);
File parentDir = file.getParentFile();
if (parentDir != null && !parentDir.exists()) {
parentDir.mkdirs();
}
try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"))) {
writer.println("Title,Price,OriginalPrice,Discount,ImageUrl,Author");
for (CrawlResult result : results) {
if (result != null) {
writer.println(result.toString());
}
}
}
logger.info("文件保存成功: {}", filename);
view.showSuccess("文件保存成功: " + filename);
} catch (IOException e) {
logger.error("保存文件失败: {} - {}", filename, e.getMessage());
view.showError("保存文件失败: " + filename + " (" + e.getMessage() + ")");
}
}
public void saveToJson(List<CrawlResult> results, String filename) {
if (filename == null || filename.trim().isEmpty()) {
logger.error("JSON文件名为空,无法保存");
view.showError("文件名不能为空");
return;
}
try {
File file = new File(filename);
File parentDir = file.getParentFile();
if (parentDir != null && !parentDir.exists()) {
parentDir.mkdirs();
}
StringBuilder json = new StringBuilder();
json.append("[\n");
for (int i = 0; i < results.size(); i++) {
CrawlResult result = results.get(i);
if (result != null) {
json.append(" ").append(result.toJson());
if (i < results.size() - 1) {
json.append(",");
}
json.append("\n");
}
}
json.append("]");
try (PrintWriter writer = new PrintWriter(new OutputStreamWriter(new FileOutputStream(file), "UTF-8"))) {
writer.write(json.toString());
}
logger.info("JSON文件保存成功: {}", filename);
view.showSuccess("JSON文件保存成功: " + filename);
} catch (IOException e) {
logger.error("保存JSON文件失败: {} - {}", filename, e.getMessage());
view.showError("保存JSON文件失败: " + filename + " (" + e.getMessage() + ")");
}
}
public Repository<CrawlResult> getDataRepository() {
return dataRepository;
}
public Statistics<String> getStatistics() {
return statistics;
}
}