You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

54 lines
1.8 KiB

package command;
import controller.CrawlerContext;
import crawler.WeatherCrawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import model.Weather;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import util.DataUtil;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
public class WeatherCrawlCommand implements CrawlCommand {
private final CrawlerContext context;
private List<Weather> weatherList;
private List<Weather> oldWeatherList;
private static final Logger logger = LoggerFactory.getLogger(WeatherCrawlCommand.class);
public WeatherCrawlCommand(CrawlerContext context) {
this.context = context;
}
@Override
public void execute() throws ParseException, NetworkException, CrawlerException, IOException {
oldWeatherList = DataUtil.importJson("weather.json", Weather.class);
logger.info("导入历史天气数据:{}条", oldWeatherList.size());
// 第四点:增量抓取,过滤重复城市
Set<String> existCities = DataUtil.getExistWeatherCities("weather.json");
context.setCrawlerStrategy(new WeatherCrawler());
weatherList = (List<Weather>) context.executeCrawl();
weatherList = weatherList.stream()
.filter(weather -> !existCities.contains(weather.getCity()))
.collect(Collectors.toList());
weatherList.addAll(oldWeatherList);
DataUtil.addAll("天气数据.txt", weatherList);
DataUtil.exportJson("weather.json", weatherList);
logger.info("天气爬取完成,本次新增:{}条", weatherList.size() - oldWeatherList.size());
}
public List<Weather> getResult() {
return weatherList;
}
}