package command; import controller.CrawlerContext; import crawler.WeatherCrawler; import exception.CrawlerException; import exception.NetworkException; import exception.ParseException; import model.Weather; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import util.DataUtil; import java.io.IOException; import java.util.List; import java.util.Set; import java.util.stream.Collectors; public class WeatherCrawlCommand implements CrawlCommand { private final CrawlerContext context; private List weatherList; private List oldWeatherList; private static final Logger logger = LoggerFactory.getLogger(WeatherCrawlCommand.class); public WeatherCrawlCommand(CrawlerContext context) { this.context = context; } @Override public void execute() throws ParseException, NetworkException, CrawlerException, IOException { oldWeatherList = DataUtil.importJson("weather.json", Weather.class); logger.info("导入历史天气数据:{}条", oldWeatherList.size()); // 第四点:增量抓取,过滤重复城市 Set existCities = DataUtil.getExistWeatherCities("weather.json"); context.setCrawlerStrategy(new WeatherCrawler()); weatherList = (List) context.executeCrawl(); weatherList = weatherList.stream() .filter(weather -> !existCities.contains(weather.getCity())) .collect(Collectors.toList()); weatherList.addAll(oldWeatherList); DataUtil.addAll("天气数据.txt", weatherList); DataUtil.exportJson("weather.json", weatherList); logger.info("天气爬取完成,本次新增:{}条", weatherList.size() - oldWeatherList.size()); } public List getResult() { return weatherList; } }