14 changed files with 599 additions and 0 deletions
@ -0,0 +1,29 @@ |
|||||
|
### IntelliJ IDEA ### |
||||
|
out/ |
||||
|
!**/src/main/**/out/ |
||||
|
!**/src/test/**/out/ |
||||
|
|
||||
|
### Eclipse ### |
||||
|
.apt_generated |
||||
|
.classpath |
||||
|
.factorypath |
||||
|
.project |
||||
|
.settings |
||||
|
.springBeans |
||||
|
.sts4-cache |
||||
|
bin/ |
||||
|
!**/src/main/**/bin/ |
||||
|
!**/src/test/**/bin/ |
||||
|
|
||||
|
### NetBeans ### |
||||
|
/nbproject/private/ |
||||
|
/nbbuild/ |
||||
|
/dist/ |
||||
|
/nbdist/ |
||||
|
/.nb-gradle/ |
||||
|
|
||||
|
### VS Code ### |
||||
|
.vscode/ |
||||
|
|
||||
|
### Mac OS ### |
||||
|
.DS_Store |
||||
@ -0,0 +1,10 @@ |
|||||
|
# 默认忽略的文件 |
||||
|
/shelf/ |
||||
|
/workspace.xml |
||||
|
# 基于编辑器的 HTTP 客户端请求 |
||||
|
/httpRequests/ |
||||
|
# 依赖于环境的 Maven 主目录路径 |
||||
|
/mavenHomeManager.xml |
||||
|
# Datasource local storage ignored files |
||||
|
/dataSources/ |
||||
|
/dataSources.local.xml |
||||
@ -0,0 +1,9 @@ |
|||||
|
<component name="libraryTable"> |
||||
|
<library name="jcommon-1.0.24"> |
||||
|
<CLASSES> |
||||
|
<root url="jar://$USER_HOME$/Downloads/jcommon-1.0.24.jar!/" /> |
||||
|
</CLASSES> |
||||
|
<JAVADOC /> |
||||
|
<SOURCES /> |
||||
|
</library> |
||||
|
</component> |
||||
@ -0,0 +1,9 @@ |
|||||
|
<component name="libraryTable"> |
||||
|
<library name="jfreechart-1.5.3"> |
||||
|
<CLASSES> |
||||
|
<root url="jar://$USER_HOME$/Downloads/jfreechart-1.5.3.jar!/" /> |
||||
|
</CLASSES> |
||||
|
<JAVADOC /> |
||||
|
<SOURCES /> |
||||
|
</library> |
||||
|
</component> |
||||
@ -0,0 +1,9 @@ |
|||||
|
<component name="libraryTable"> |
||||
|
<library name="jsoup-1.17.2"> |
||||
|
<CLASSES> |
||||
|
<root url="jar://$USER_HOME$/Downloads/jsoup-1.17.2.jar!/" /> |
||||
|
</CLASSES> |
||||
|
<JAVADOC /> |
||||
|
<SOURCES /> |
||||
|
</library> |
||||
|
</component> |
||||
@ -0,0 +1,9 @@ |
|||||
|
<component name="libraryTable"> |
||||
|
<library name="kumo-core-1.12"> |
||||
|
<CLASSES> |
||||
|
<root url="jar://$USER_HOME$/Downloads/kumo-core-1.12.jar!/" /> |
||||
|
</CLASSES> |
||||
|
<JAVADOC /> |
||||
|
<SOURCES /> |
||||
|
</library> |
||||
|
</component> |
||||
@ -0,0 +1,9 @@ |
|||||
|
<component name="libraryTable"> |
||||
|
<library name="logback-classic-1.4.11"> |
||||
|
<CLASSES> |
||||
|
<root url="jar://$USER_HOME$/Downloads/logback-classic-1.4.11.jar!/" /> |
||||
|
</CLASSES> |
||||
|
<JAVADOC /> |
||||
|
<SOURCES /> |
||||
|
</library> |
||||
|
</component> |
||||
@ -0,0 +1,9 @@ |
|||||
|
<component name="libraryTable"> |
||||
|
<library name="logback-core-1.4.11"> |
||||
|
<CLASSES> |
||||
|
<root url="jar://$USER_HOME$/Downloads/logback-core-1.4.11.jar!/" /> |
||||
|
</CLASSES> |
||||
|
<JAVADOC /> |
||||
|
<SOURCES /> |
||||
|
</library> |
||||
|
</component> |
||||
@ -0,0 +1,9 @@ |
|||||
|
<component name="libraryTable"> |
||||
|
<library name="slf4j-api-2.0.9"> |
||||
|
<CLASSES> |
||||
|
<root url="jar://$USER_HOME$/Downloads/slf4j-api-2.0.9.jar!/" /> |
||||
|
</CLASSES> |
||||
|
<JAVADOC /> |
||||
|
<SOURCES /> |
||||
|
</library> |
||||
|
</component> |
||||
@ -0,0 +1,6 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="openjdk-26" project-jdk-type="JavaSDK"> |
||||
|
<output url="file://$PROJECT_DIR$/out" /> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,8 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="ProjectModuleManager"> |
||||
|
<modules> |
||||
|
<module fileurl="file://$PROJECT_DIR$/CrawlerMain2.iml" filepath="$PROJECT_DIR$/CrawlerMain2.iml" /> |
||||
|
</modules> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,18 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<module type="JAVA_MODULE" version="4"> |
||||
|
<component name="NewModuleRootManager" inherit-compiler-output="true"> |
||||
|
<exclude-output /> |
||||
|
<content url="file://$MODULE_DIR$"> |
||||
|
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" /> |
||||
|
</content> |
||||
|
<orderEntry type="inheritedJdk" /> |
||||
|
<orderEntry type="sourceFolder" forTests="false" /> |
||||
|
<orderEntry type="library" name="jsoup-1.17.2" level="project" /> |
||||
|
<orderEntry type="library" name="jfreechart-1.5.3" level="project" /> |
||||
|
<orderEntry type="library" name="jcommon-1.0.24" level="project" /> |
||||
|
<orderEntry type="library" name="kumo-core-1.12" level="project" /> |
||||
|
<orderEntry type="library" name="logback-classic-1.4.11" level="project" /> |
||||
|
<orderEntry type="library" name="logback-core-1.4.11" level="project" /> |
||||
|
<orderEntry type="library" name="slf4j-api-2.0.9" level="project" /> |
||||
|
</component> |
||||
|
</module> |
||||
@ -0,0 +1,440 @@ |
|||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
// ===================== 1. 新增自定义异常包(对应作业要求第一条) =====================
|
||||
|
// 爬虫根异常
|
||||
|
class CrawlerException extends Exception { |
||||
|
public CrawlerException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
public CrawlerException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 网络异常
|
||||
|
class NetworkException extends CrawlerException { |
||||
|
public NetworkException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
public NetworkException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 解析异常
|
||||
|
class ParseException extends CrawlerException { |
||||
|
public ParseException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
public ParseException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ===================== 2. 抽象策略接口(声明抛出ParseException) =====================
|
||||
|
interface Crawler { |
||||
|
List<?> startCrawl() throws ParseException, NetworkException, CrawlerException; |
||||
|
} |
||||
|
|
||||
|
// ===================== 3. 抽象模板父类 + 重试逻辑(对应作业要求第三条) =====================
|
||||
|
abstract class BaseCrawler implements Crawler { |
||||
|
protected final String baseUrl; |
||||
|
protected static final Logger logger = LoggerFactory.getLogger(BaseCrawler.class); |
||||
|
// 重试次数配置
|
||||
|
private static final int MAX_RETRY = 3; |
||||
|
private static final long RETRY_INTERVAL = 2000; // 2秒重试间隔
|
||||
|
|
||||
|
protected BaseCrawler(String baseUrl) { |
||||
|
this.baseUrl = baseUrl; |
||||
|
} |
||||
|
|
||||
|
// 带重试逻辑的通用页面请求方法
|
||||
|
protected Document getPage(String url) throws NetworkException { |
||||
|
int retryCount = 0; |
||||
|
while (retryCount < MAX_RETRY) { |
||||
|
try { |
||||
|
logger.info("第{}次请求页面:{}", retryCount + 1, url); |
||||
|
return Jsoup.connect(url) |
||||
|
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64)") |
||||
|
.timeout(15000) |
||||
|
.get(); |
||||
|
} catch (Exception e) { |
||||
|
retryCount++; |
||||
|
logger.error("请求页面失败,剩余重试次数:{}", MAX_RETRY - retryCount, e); |
||||
|
if (retryCount >= MAX_RETRY) { |
||||
|
throw new NetworkException("页面请求重试" + MAX_RETRY + "次后仍失败:" + url, e); |
||||
|
} |
||||
|
try { |
||||
|
Thread.sleep(RETRY_INTERVAL); |
||||
|
} catch (InterruptedException ie) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
throw new NetworkException("未知错误导致页面请求失败:" + url); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public abstract List<?> startCrawl() throws ParseException, NetworkException, CrawlerException; |
||||
|
} |
||||
|
|
||||
|
// ===================== 4. 实体类(完善封装) =====================
|
||||
|
class Movie { |
||||
|
private final String title; |
||||
|
private final String rating; |
||||
|
|
||||
|
public Movie(String title, String rating) { |
||||
|
this.title = title; |
||||
|
this.rating = rating; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { return title; } |
||||
|
public double getRatingDouble() { |
||||
|
try { |
||||
|
return Double.parseDouble(rating); |
||||
|
} catch (NumberFormatException e) { |
||||
|
throw new IllegalArgumentException("评分格式错误:" + rating, e); |
||||
|
} |
||||
|
} |
||||
|
public String getRating() { return rating; } |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "电影:《" + title + "》 | 评分:" + rating; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class Hero { |
||||
|
private final String name; |
||||
|
public Hero(String name) { this.name = name; } |
||||
|
public String getName() { return name; } |
||||
|
@Override |
||||
|
public String toString() { return "英雄:" + name; } |
||||
|
} |
||||
|
|
||||
|
class Weather { |
||||
|
private final String province; |
||||
|
private final String city; |
||||
|
private final String temperature; |
||||
|
private final String condition; |
||||
|
|
||||
|
public Weather(String province, String city, String temperature, String condition) { |
||||
|
this.province = province; |
||||
|
this.city = city; |
||||
|
this.temperature = temperature; |
||||
|
this.condition = condition; |
||||
|
} |
||||
|
|
||||
|
public String getProvince() { return province; } |
||||
|
public String getCity() { return city; } |
||||
|
public String getTemperature() { return temperature; } |
||||
|
public String getCondition() { return condition; } |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "省份:" + province + " | 城市:" + city + " | 天气:" + condition + " | 温度:" + temperature; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ===================== 5. 具体策略类(抛出自定义异常) =====================
|
||||
|
class MovieCrawler extends BaseCrawler { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(MovieCrawler.class); |
||||
|
|
||||
|
public MovieCrawler() { |
||||
|
super("https://movie.douban.com/top250"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Movie> startCrawl() throws ParseException, NetworkException, CrawlerException { |
||||
|
List<Movie> list = new ArrayList<>(); |
||||
|
logger.info("开始爬取豆瓣电影Top250"); |
||||
|
try { |
||||
|
for (int i = 0; i < 250; i += 25) { |
||||
|
Document doc = getPage(baseUrl + "?start=" + i); |
||||
|
Elements items = doc.select(".item"); |
||||
|
if (items.isEmpty()) { |
||||
|
throw new ParseException("页面解析失败:未找到电影列表项"); |
||||
|
} |
||||
|
for (Element e : items) { |
||||
|
Element titleEle = e.select(".title").first(); |
||||
|
Element ratingEle = e.select(".rating_num").first(); |
||||
|
if (titleEle == null || ratingEle == null) { |
||||
|
logger.warn("单条电影数据解析失败,跳过"); |
||||
|
continue; |
||||
|
} |
||||
|
String title = titleEle.text().split("/")[0].trim(); |
||||
|
String rating = ratingEle.text(); |
||||
|
list.add(new Movie(title, rating)); |
||||
|
} |
||||
|
Thread.sleep(1000); |
||||
|
} |
||||
|
logger.info("豆瓣电影爬取完成,共{}条数据", list.size()); |
||||
|
} catch (NetworkException e) { |
||||
|
throw e; // 抛出网络异常
|
||||
|
} catch (InterruptedException e) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
throw new CrawlerException("爬取被中断", e); |
||||
|
} catch (Exception e) { |
||||
|
throw new ParseException("电影数据解析异常", e); |
||||
|
} |
||||
|
return list; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class HeroCrawler extends BaseCrawler { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(HeroCrawler.class); |
||||
|
|
||||
|
public HeroCrawler() { |
||||
|
super("https://pvp.qq.com/web201605/herolist.shtml"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Hero> startCrawl() throws ParseException, NetworkException, CrawlerException { |
||||
|
List<Hero> list = new ArrayList<>(); |
||||
|
logger.info("开始爬取王者荣耀英雄数据"); |
||||
|
try { |
||||
|
Document doc = getPage(baseUrl); |
||||
|
Elements heros = doc.select("ul.herolist li a"); |
||||
|
if (heros.isEmpty()) { |
||||
|
throw new ParseException("页面解析失败:未找到英雄列表项"); |
||||
|
} |
||||
|
for (Element h : heros) { |
||||
|
String name = h.text().trim(); |
||||
|
if (!name.isEmpty()) { |
||||
|
list.add(new Hero(name)); |
||||
|
} |
||||
|
} |
||||
|
logger.info("英雄爬取完成,共{}条数据", list.size()); |
||||
|
} catch (NetworkException e) { |
||||
|
throw e; |
||||
|
} catch (Exception e) { |
||||
|
throw new ParseException("英雄数据解析异常", e); |
||||
|
} |
||||
|
return list; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class WeatherCrawler extends BaseCrawler { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(WeatherCrawler.class); |
||||
|
|
||||
|
private static final String[][] cities = { |
||||
|
{"北京","北京","101010100"},{"上海","上海","101020100"},{"天津","天津","101030100"},{"重庆","重庆","101040100"}, |
||||
|
{"河北","石家庄","101090101"},{"山西","太原","101100101"},{"辽宁","沈阳","101070101"},{"吉林","长春","101060101"}, |
||||
|
{"黑龙江","哈尔滨","101050101"},{"江苏","南京","101190101"},{"浙江","杭州","101210101"},{"安徽","合肥","101220101"}, |
||||
|
{"福建","福州","101230101"},{"江西","南昌","101240101"},{"山东","济南","101120101"},{"河南","郑州","101180101"}, |
||||
|
{"湖北","武汉","101200101"},{"湖南","长沙","101250101"},{"广东","广州","101280101"},{"海南","海口","101310101"}, |
||||
|
{"四川","成都","101270101"},{"贵州","贵阳","101260101"},{"云南","昆明","101290101"},{"陕西","西安","101110101"}, |
||||
|
{"甘肃","兰州","101160101"},{"青海","西宁","101150101"},{"内蒙古","呼和浩特","101080101"},{"广西","南宁","101300101"}, |
||||
|
{"西藏","拉萨","101140101"},{"宁夏","银川","101170101"},{"新疆","乌鲁木齐","101130101"}, |
||||
|
{"香港","香港","101320101"},{"澳门","澳门","101330101"},{"台湾","台北","101340101"} |
||||
|
}; |
||||
|
|
||||
|
public WeatherCrawler() { |
||||
|
super("https://www.weather.com.cn/weather/"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Weather> startCrawl() throws ParseException, NetworkException, CrawlerException { |
||||
|
List<Weather> list = new ArrayList<>(); |
||||
|
logger.info("开始爬取全国天气数据"); |
||||
|
try { |
||||
|
for (String[] city : cities) { |
||||
|
String province = city[0]; |
||||
|
String cityName = city[1]; |
||||
|
String code = city[2]; |
||||
|
Document doc = getPage(baseUrl + code + ".shtml"); |
||||
|
Element today = doc.select("ul.t li").first(); |
||||
|
if (today == null) { |
||||
|
throw new ParseException("天气页面解析失败:未找到今日天气数据"); |
||||
|
} |
||||
|
String temp = today.select(".tem").text(); |
||||
|
String wea = today.select(".wea").text(); |
||||
|
list.add(new Weather(province, cityName, temp, wea)); |
||||
|
Thread.sleep(500); |
||||
|
} |
||||
|
logger.info("天气数据爬取完成,共{}条数据", list.size()); |
||||
|
} catch (NetworkException e) { |
||||
|
throw e; |
||||
|
} catch (InterruptedException e) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
throw new CrawlerException("爬取被中断", e); |
||||
|
} catch (Exception e) { |
||||
|
throw new ParseException("天气数据解析异常", e); |
||||
|
} |
||||
|
return list; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ===================== 6. 策略上下文Context =====================
|
||||
|
class CrawlerContext { |
||||
|
private Crawler crawlerStrategy; |
||||
|
private static final Logger logger = LoggerFactory.getLogger(CrawlerContext.class); |
||||
|
|
||||
|
public void setCrawlerStrategy(Crawler crawlerStrategy) { |
||||
|
this.crawlerStrategy = crawlerStrategy; |
||||
|
} |
||||
|
|
||||
|
public List<?> executeCrawl() throws ParseException, NetworkException, CrawlerException { |
||||
|
if (crawlerStrategy == null) { |
||||
|
logger.error("未设置爬取策略"); |
||||
|
throw new CrawlerException("爬取策略未配置"); |
||||
|
} |
||||
|
return crawlerStrategy.startCrawl(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ===================== 7. 工具类(增强防御检查,对应作业要求第六条) =====================
|
||||
|
final class DataUtil { |
||||
|
private static final String PATH = "D:\\Java爬虫\\"; |
||||
|
private static final Logger logger = LoggerFactory.getLogger(DataUtil.class); |
||||
|
|
||||
|
private DataUtil() {} |
||||
|
|
||||
|
public static void initFolder() { |
||||
|
File dir = new File(PATH); |
||||
|
if (!dir.exists()) { |
||||
|
boolean created = dir.mkdirs(); |
||||
|
if (created) { |
||||
|
logger.info("创建目录:{}", PATH); |
||||
|
} else { |
||||
|
logger.error("目录创建失败:{}", PATH); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static void saveText(String fileName, String content) throws IOException { |
||||
|
// 增强防御检查
|
||||
|
if (fileName == null || fileName.isBlank()) { |
||||
|
throw new IllegalArgumentException("文件名不能为空"); |
||||
|
} |
||||
|
if (content == null || content.isBlank()) { |
||||
|
logger.warn("保存文件内容为空,跳过:{}", fileName); |
||||
|
return; |
||||
|
} |
||||
|
try (FileWriter fw = new FileWriter(PATH + fileName)) { |
||||
|
fw.write(content); |
||||
|
} |
||||
|
logger.info("文件保存成功:{}", fileName); |
||||
|
} |
||||
|
|
||||
|
public static <T> void addAll(String fileName, List<T> dataList) throws IOException { |
||||
|
// 增强防御检查
|
||||
|
if (dataList == null) { |
||||
|
throw new NullPointerException("待保存数据列表不能为null"); |
||||
|
} |
||||
|
if (dataList.isEmpty()) { |
||||
|
logger.warn("批量数据为空,跳过保存:{}", fileName); |
||||
|
return; |
||||
|
} |
||||
|
StringBuilder sb = new StringBuilder(); |
||||
|
dataList.forEach(item -> { |
||||
|
if (item != null) { // 空元素防御
|
||||
|
sb.append(item).append("\r\n"); |
||||
|
} |
||||
|
}); |
||||
|
saveText(fileName, sb.toString()); |
||||
|
} |
||||
|
|
||||
|
public static void analyzeData(List<Movie> movieList, List<Hero> heroList) { |
||||
|
// 增强防御检查
|
||||
|
if (movieList == null) { |
||||
|
logger.error("电影数据列表为null"); |
||||
|
return; |
||||
|
} |
||||
|
if (heroList == null) { |
||||
|
logger.error("英雄数据列表为null"); |
||||
|
return; |
||||
|
} |
||||
|
if (movieList.isEmpty()) { |
||||
|
logger.warn("电影数据列表为空,跳过分析"); |
||||
|
return; |
||||
|
} |
||||
|
logger.info("===== 执行数据分析 ====="); |
||||
|
double sum = 0; |
||||
|
int validCount = 0; |
||||
|
for (Movie movie : movieList) { |
||||
|
try { |
||||
|
sum += movie.getRatingDouble(); |
||||
|
validCount++; |
||||
|
} catch (IllegalArgumentException e) { |
||||
|
logger.warn("电影评分解析失败,跳过:{}", movie.getTitle(), e); |
||||
|
} |
||||
|
} |
||||
|
if (validCount == 0) { |
||||
|
logger.error("无有效电影评分数据"); |
||||
|
return; |
||||
|
} |
||||
|
double avg = sum / validCount; |
||||
|
System.out.println("电影平均评分:" + String.format("%.2f", avg)); |
||||
|
long highScoreCount = movieList.stream() |
||||
|
.filter(m -> { |
||||
|
try { |
||||
|
return m.getRatingDouble() >= 8.5; |
||||
|
} catch (IllegalArgumentException e) { |
||||
|
return false; |
||||
|
} |
||||
|
}) |
||||
|
.count(); |
||||
|
System.out.println("8.5分以上电影数量:" + highScoreCount); |
||||
|
System.out.println("英雄总数量:" + heroList.size()); |
||||
|
logger.info("数据分析结束"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ===================== 8. 主程序(统一异常处理 + 完整日志) =====================
|
||||
|
public class CrawlerMain { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(CrawlerMain.class); |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
logger.info("===== 爬虫程序启动 ====="); |
||||
|
CrawlerContext context = new CrawlerContext(); |
||||
|
List<Movie> movieList = new ArrayList<>(); |
||||
|
List<Hero> heroList = new ArrayList<>(); |
||||
|
List<Weather> weatherList = new ArrayList<>(); |
||||
|
|
||||
|
try { |
||||
|
// 爬取电影
|
||||
|
context.setCrawlerStrategy(new MovieCrawler()); |
||||
|
movieList = (List<Movie>) context.executeCrawl(); |
||||
|
|
||||
|
// 爬取英雄
|
||||
|
context.setCrawlerStrategy(new HeroCrawler()); |
||||
|
heroList = (List<Hero>) context.executeCrawl(); |
||||
|
|
||||
|
// 爬取天气
|
||||
|
context.setCrawlerStrategy(new WeatherCrawler()); |
||||
|
weatherList = (List<Weather>) context.executeCrawl(); |
||||
|
|
||||
|
// 保存数据
|
||||
|
DataUtil.initFolder(); |
||||
|
DataUtil.addAll("电影数据.txt", movieList); |
||||
|
DataUtil.addAll("英雄数据.txt", heroList); |
||||
|
DataUtil.addAll("天气数据.txt", weatherList); |
||||
|
|
||||
|
// 数据分析
|
||||
|
DataUtil.analyzeData(movieList, heroList); |
||||
|
|
||||
|
logger.info("===== 全部任务执行完成 ====="); |
||||
|
System.out.println("✅ 数据已全部保存至 D:\\Java爬虫"); |
||||
|
} catch (NetworkException e) { |
||||
|
logger.error("网络异常:爬取失败", e); |
||||
|
} catch (ParseException e) { |
||||
|
logger.error("解析异常:数据解析失败", e); |
||||
|
} catch (CrawlerException e) { |
||||
|
logger.error("爬虫核心异常", e); |
||||
|
} catch (Exception e) { |
||||
|
logger.error("程序运行异常", e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<configuration> |
||||
|
<!-- 控制台输出 --> |
||||
|
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<!-- 文件输出(可选) --> |
||||
|
<appender name="FILE" class="ch.qos.logback.core.FileAppender"> |
||||
|
<file>D:\Java爬虫\crawler.log</file> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{50} - %msg%n</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<!-- 全局日志级别 --> |
||||
|
<root level="INFO"> |
||||
|
<appender-ref ref="CONSOLE" /> |
||||
|
<appender-ref ref="FILE" /> <!-- 如需日志文件则保留,否则删除 --> |
||||
|
</root> |
||||
|
</configuration> |
||||
Loading…
Reference in new issue