You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
54 lines
1.8 KiB
54 lines
1.8 KiB
package command;
|
|
|
|
import controller.CrawlerContext;
|
|
import crawler.WeatherCrawler;
|
|
import exception.CrawlerException;
|
|
import exception.NetworkException;
|
|
import exception.ParseException;
|
|
import model.Weather;
|
|
import org.slf4j.Logger;
|
|
import org.slf4j.LoggerFactory;
|
|
import util.DataUtil;
|
|
|
|
import java.io.IOException;
|
|
import java.util.List;
|
|
import java.util.Set;
|
|
import java.util.stream.Collectors;
|
|
|
|
public class WeatherCrawlCommand implements CrawlCommand {
|
|
private final CrawlerContext context;
|
|
private List<Weather> weatherList;
|
|
private List<Weather> oldWeatherList;
|
|
|
|
private static final Logger logger = LoggerFactory.getLogger(WeatherCrawlCommand.class);
|
|
|
|
public WeatherCrawlCommand(CrawlerContext context) {
|
|
this.context = context;
|
|
}
|
|
|
|
@Override
|
|
public void execute() throws ParseException, NetworkException, CrawlerException, IOException {
|
|
oldWeatherList = DataUtil.importJson("weather.json", Weather.class);
|
|
logger.info("导入历史天气数据:{}条", oldWeatherList.size());
|
|
|
|
// 第四点:增量抓取,过滤重复城市
|
|
Set<String> existCities = DataUtil.getExistWeatherCities("weather.json");
|
|
|
|
context.setCrawlerStrategy(new WeatherCrawler());
|
|
weatherList = (List<Weather>) context.executeCrawl();
|
|
|
|
weatherList = weatherList.stream()
|
|
.filter(weather -> !existCities.contains(weather.getCity()))
|
|
.collect(Collectors.toList());
|
|
|
|
weatherList.addAll(oldWeatherList);
|
|
|
|
DataUtil.addAll("天气数据.txt", weatherList);
|
|
DataUtil.exportJson("weather.json", weatherList);
|
|
logger.info("天气爬取完成,本次新增:{}条", weatherList.size() - oldWeatherList.size());
|
|
}
|
|
|
|
public List<Weather> getResult() {
|
|
return weatherList;
|
|
}
|
|
}
|