@ -1,10 +0,0 @@ |
|||||
# 默认忽略的文件 |
|
||||
/shelf/ |
|
||||
/workspace.xml |
|
||||
# 已忽略包含查询文件的默认文件夹 |
|
||||
/queries/ |
|
||||
# Datasource local storage ignored files |
|
||||
/dataSources/ |
|
||||
/dataSources.local.xml |
|
||||
# 基于编辑器的 HTTP 客户端请求 |
|
||||
/httpRequests/ |
|
||||
@ -1,13 +0,0 @@ |
|||||
<?xml version="1.0" encoding="UTF-8"?> |
|
||||
<project version="4"> |
|
||||
<component name="CompilerConfiguration"> |
|
||||
<annotationProcessing> |
|
||||
<profile name="Maven default annotation processors profile" enabled="true"> |
|
||||
<sourceOutputDir name="target/generated-sources/annotations" /> |
|
||||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> |
|
||||
<outputRelativeToContentRoot value="true" /> |
|
||||
<module name="crawler-project" /> |
|
||||
</profile> |
|
||||
</annotationProcessing> |
|
||||
</component> |
|
||||
</project> |
|
||||
@ -1,7 +0,0 @@ |
|||||
<?xml version="1.0" encoding="UTF-8"?> |
|
||||
<project version="4"> |
|
||||
<component name="Encoding"> |
|
||||
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" /> |
|
||||
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" /> |
|
||||
</component> |
|
||||
</project> |
|
||||
@ -1,20 +0,0 @@ |
|||||
<?xml version="1.0" encoding="UTF-8"?> |
|
||||
<project version="4"> |
|
||||
<component name="RemoteRepositoriesConfiguration"> |
|
||||
<remote-repository> |
|
||||
<option name="id" value="central" /> |
|
||||
<option name="name" value="Central Repository" /> |
|
||||
<option name="url" value="https://repo.maven.apache.org/maven2" /> |
|
||||
</remote-repository> |
|
||||
<remote-repository> |
|
||||
<option name="id" value="central" /> |
|
||||
<option name="name" value="Maven Central repository" /> |
|
||||
<option name="url" value="https://repo1.maven.org/maven2" /> |
|
||||
</remote-repository> |
|
||||
<remote-repository> |
|
||||
<option name="id" value="jboss.community" /> |
|
||||
<option name="name" value="JBoss Community repository" /> |
|
||||
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" /> |
|
||||
</remote-repository> |
|
||||
</component> |
|
||||
</project> |
|
||||
@ -1,12 +0,0 @@ |
|||||
<?xml version="1.0" encoding="UTF-8"?> |
|
||||
<project version="4"> |
|
||||
<component name="ExternalStorageConfigurationManager" enabled="true" /> |
|
||||
<component name="MavenProjectsManager"> |
|
||||
<option name="originalFiles"> |
|
||||
<list> |
|
||||
<option value="$PROJECT_DIR$/pom.xml" /> |
|
||||
</list> |
|
||||
</option> |
|
||||
</component> |
|
||||
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="21" project-jdk-type="JavaSDK" /> |
|
||||
</project> |
|
||||
@ -1,6 +0,0 @@ |
|||||
<?xml version="1.0" encoding="UTF-8"?> |
|
||||
<project version="4"> |
|
||||
<component name="VcsDirectoryMappings"> |
|
||||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" /> |
|
||||
</component> |
|
||||
</project> |
|
||||
|
Before Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 35 KiB |
|
Before Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 44 KiB |
|
Before Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 30 KiB |
@ -1,64 +0,0 @@ |
|||||
<?xml version="1.0" encoding="UTF-8"?> |
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|
||||
<modelVersion>4.0.0</modelVersion> |
|
||||
<groupId>com.example</groupId> |
|
||||
<artifactId>crawler-project</artifactId> |
|
||||
<name>crawler-project</name> |
|
||||
<version>1.0.0</version> |
|
||||
<description>Java爬虫项目 - MVC + Command + Strategy模式</description> |
|
||||
<build> |
|
||||
<plugins> |
|
||||
<plugin> |
|
||||
<artifactId>maven-compiler-plugin</artifactId> |
|
||||
<version>3.11.0</version> |
|
||||
<configuration> |
|
||||
<source>${java.version}</source> |
|
||||
<target>${java.version}</target> |
|
||||
<encoding>${project.build.sourceEncoding}</encoding> |
|
||||
</configuration> |
|
||||
</plugin> |
|
||||
<plugin> |
|
||||
<artifactId>maven-shade-plugin</artifactId> |
|
||||
<version>3.5.0</version> |
|
||||
<executions> |
|
||||
<execution> |
|
||||
<phase>package</phase> |
|
||||
<goals> |
|
||||
<goal>shade</goal> |
|
||||
</goals> |
|
||||
<configuration> |
|
||||
<transformers> |
|
||||
<transformer> |
|
||||
<mainClass>com.example.crawler.Main</mainClass> |
|
||||
</transformer> |
|
||||
</transformers> |
|
||||
</configuration> |
|
||||
</execution> |
|
||||
</executions> |
|
||||
</plugin> |
|
||||
</plugins> |
|
||||
</build> |
|
||||
<dependencies> |
|
||||
<dependency> |
|
||||
<groupId>junit</groupId> |
|
||||
<artifactId>junit</artifactId> |
|
||||
<version>4.13.2</version> |
|
||||
<scope>test</scope> |
|
||||
<exclusions> |
|
||||
<exclusion> |
|
||||
<artifactId>hamcrest-core</artifactId> |
|
||||
<groupId>org.hamcrest</groupId> |
|
||||
</exclusion> |
|
||||
</exclusions> |
|
||||
</dependency> |
|
||||
</dependencies> |
|
||||
<properties> |
|
||||
<java.version>11</java.version> |
|
||||
<maven.compiler.source>11</maven.compiler.source> |
|
||||
<jsoup.version>1.17.2</jsoup.version> |
|
||||
<jfreechart.version>1.5.3</jfreechart.version> |
|
||||
<maven.compiler.target>11</maven.compiler.target> |
|
||||
<gson.version>2.10.1</gson.version> |
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|
||||
</properties> |
|
||||
</project> |
|
||||
@ -1,82 +0,0 @@ |
|||||
[ |
|
||||
{ |
|
||||
"title": "专栏", |
|
||||
"publishTime": "", |
|
||||
"url": "http://zhuanlan.sina.com.cn/" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "导航", |
|
||||
"publishTime": "", |
|
||||
"url": "http://news.sina.com.cn/guide/" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "新浪财经", |
|
||||
"publishTime": "", |
|
||||
"url": "https://finance.sina.com.cn/mobile/comfinanceweb.shtml" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "新浪博客", |
|
||||
"publishTime": "", |
|
||||
"url": "https://blog.sina.com.cn/lm/z/app/" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "我的收藏", |
|
||||
"publishTime": "", |
|
||||
"url": "http://my.sina.com.cn/#location=fav" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "注册", |
|
||||
"publishTime": "", |
|
||||
"url": "https://login.sina.com.cn/signup/signup?entry=news" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "新闻中心", |
|
||||
"publishTime": "", |
|
||||
"url": "http://news.sina.com.cn/" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "新闻排行", |
|
||||
"publishTime": "", |
|
||||
"url": "http://news.sina.com.cn/hotnews/" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "联系我们", |
|
||||
"publishTime": "", |
|
||||
"url": "http://www.sina.com.cn/contactus.html" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "广告服务", |
|
||||
"publishTime": "", |
|
||||
"url": "http://emarketing.sina.com.cn/" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "通行证注册", |
|
||||
"publishTime": "", |
|
||||
"url": "http://login.sina.com.cn/signup/signup" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "产品答疑", |
|
||||
"publishTime": "", |
|
||||
"url": "http://help.sina.com.cn/" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "招聘信息", |
|
||||
"publishTime": "", |
|
||||
"url": "http://career.sina.com.cn/" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "网站律师", |
|
||||
"publishTime": "", |
|
||||
"url": "http://corp.sina.com.cn/lawfirm/sina.htm" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "版权所有", |
|
||||
"publishTime": "", |
|
||||
"url": "https://corp.sina.com.cn/chn/copyright.html" |
|
||||
}, |
|
||||
{ |
|
||||
"title": "意见反馈", |
|
||||
"publishTime": "", |
|
||||
"url": "http://news.sina.com.cn/feedback/post.html" |
|
||||
} |
|
||||
] |
|
||||
@ -1,212 +0,0 @@ |
|||||
[ |
|
||||
{ |
|
||||
"rank": 1, |
|
||||
"universityName": "清华大学 Tsinghua University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "北京", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 2, |
|
||||
"universityName": "北京大学 Peking University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "北京", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 3, |
|
||||
"universityName": "浙江大学 Zhejiang University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "浙江", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 4, |
|
||||
"universityName": "上海交通大学 Shanghai Jiao Tong University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "上海", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 5, |
|
||||
"universityName": "复旦大学 Fudan University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "上海", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 6, |
|
||||
"universityName": "南京大学 Nanjing University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "江苏", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 7, |
|
||||
"universityName": "中国科学技术大学 University of Science and Technology of China 双一流/985/211", |
|
||||
"totalScore": "理工", |
|
||||
"province": "安徽", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 8, |
|
||||
"universityName": "武汉大学 Wuhan University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "湖北", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 9, |
|
||||
"universityName": "华中科技大学 Huazhong University of Science and Technology 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "湖北", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 10, |
|
||||
"universityName": "西安交通大学 Xi'an Jiaotong University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "陕西", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 11, |
|
||||
"universityName": "北京航空航天大学 Beihang University 双一流/985/211", |
|
||||
"totalScore": "理工", |
|
||||
"province": "北京", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 12, |
|
||||
"universityName": "中山大学 Sun Yat-sen University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "广东", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 13, |
|
||||
"universityName": "北京理工大学 Beijing Institute of Technology 双一流/985/211", |
|
||||
"totalScore": "理工", |
|
||||
"province": "北京", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 14, |
|
||||
"universityName": "哈尔滨工业大学 Harbin Institute of Technology 双一流/985/211", |
|
||||
"totalScore": "理工", |
|
||||
"province": "黑龙江", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 15, |
|
||||
"universityName": "四川大学 Sichuan University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "四川", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 16, |
|
||||
"universityName": "东南大学 Southeast University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "江苏", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 17, |
|
||||
"universityName": "中国人民大学 Renmin University of China 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "北京", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 18, |
|
||||
"universityName": "同济大学 Tongji University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "上海", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 19, |
|
||||
"universityName": "北京师范大学 Beijing Normal University 双一流/985/211", |
|
||||
"totalScore": "师范", |
|
||||
"province": "北京", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 20, |
|
||||
"universityName": "天津大学 Tianjin University 双一流/985/211", |
|
||||
"totalScore": "理工", |
|
||||
"province": "天津", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 21, |
|
||||
"universityName": "西北工业大学 Northwestern Polytechnical University 双一流/985/211", |
|
||||
"totalScore": "理工", |
|
||||
"province": "陕西", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 22, |
|
||||
"universityName": "山东大学 Shandong University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "山东", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 23, |
|
||||
"universityName": "南开大学 Nankai University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "天津", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 24, |
|
||||
"universityName": "厦门大学 Xiamen University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "福建", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 25, |
|
||||
"universityName": "中国农业大学 China Agricultural University 双一流/985/211", |
|
||||
"totalScore": "农业", |
|
||||
"province": "北京", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 26, |
|
||||
"universityName": "吉林大学 Jilin University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "吉林", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 27, |
|
||||
"universityName": "中南大学 Central South University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "湖南", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 28, |
|
||||
"universityName": "大连理工大学 Dalian University of Technology 双一流/985/211", |
|
||||
"totalScore": "理工", |
|
||||
"province": "辽宁", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 29, |
|
||||
"universityName": "湖南大学 Hunan University 双一流/985/211", |
|
||||
"totalScore": "综合", |
|
||||
"province": "湖南", |
|
||||
"category": "" |
|
||||
}, |
|
||||
{ |
|
||||
"rank": 30, |
|
||||
"universityName": "华东师范大学 East China Normal University 双一流/985/211", |
|
||||
"totalScore": "师范", |
|
||||
"province": "上海", |
|
||||
"category": "" |
|
||||
} |
|
||||
] |
|
||||
@ -1,335 +0,0 @@ |
|||||
[ |
|
||||
{ |
|
||||
"cityName": "上海", |
|
||||
"temperature": 22.7, |
|
||||
"humidity": 83.0, |
|
||||
"windSpeed": 7.8, |
|
||||
"weatherCode": "3", |
|
||||
"hourlyTimes": [ |
|
||||
"00:00", |
|
||||
"01:00", |
|
||||
"02:00", |
|
||||
"03:00", |
|
||||
"04:00", |
|
||||
"05:00", |
|
||||
"06:00", |
|
||||
"07:00", |
|
||||
"08:00", |
|
||||
"09:00", |
|
||||
"10:00", |
|
||||
"11:00", |
|
||||
"12:00", |
|
||||
"13:00", |
|
||||
"14:00", |
|
||||
"15:00", |
|
||||
"16:00", |
|
||||
"17:00", |
|
||||
"18:00", |
|
||||
"19:00", |
|
||||
"20:00", |
|
||||
"21:00", |
|
||||
"22:00", |
|
||||
"23:00" |
|
||||
], |
|
||||
"hourlyTemperatures": [ |
|
||||
19.2, |
|
||||
19.0, |
|
||||
18.9, |
|
||||
18.3, |
|
||||
18.1, |
|
||||
17.8, |
|
||||
18.7, |
|
||||
20.9, |
|
||||
23.5, |
|
||||
24.9, |
|
||||
26.2, |
|
||||
27.0, |
|
||||
27.5, |
|
||||
28.1, |
|
||||
28.2, |
|
||||
27.4, |
|
||||
26.7, |
|
||||
25.0, |
|
||||
23.8, |
|
||||
22.7, |
|
||||
22.0, |
|
||||
20.6, |
|
||||
19.9, |
|
||||
19.4 |
|
||||
], |
|
||||
"hourlyHumidities": [ |
|
||||
83, |
|
||||
84, |
|
||||
85, |
|
||||
87, |
|
||||
89, |
|
||||
92, |
|
||||
90, |
|
||||
79, |
|
||||
55, |
|
||||
43, |
|
||||
38, |
|
||||
34, |
|
||||
33, |
|
||||
31, |
|
||||
30, |
|
||||
32, |
|
||||
35, |
|
||||
45, |
|
||||
54, |
|
||||
63, |
|
||||
67, |
|
||||
73, |
|
||||
76, |
|
||||
78 |
|
||||
], |
|
||||
"hourlyWindSpeeds": [ |
|
||||
3.8, |
|
||||
3.3, |
|
||||
2.6, |
|
||||
1.9, |
|
||||
1.0, |
|
||||
0.6, |
|
||||
2.3, |
|
||||
0.6, |
|
||||
1.8, |
|
||||
2.7, |
|
||||
3.0, |
|
||||
3.5, |
|
||||
5.4, |
|
||||
5.4, |
|
||||
6.0, |
|
||||
7.8, |
|
||||
9.2, |
|
||||
9.0, |
|
||||
8.1, |
|
||||
7.8, |
|
||||
7.2, |
|
||||
7.1, |
|
||||
7.1, |
|
||||
7.1 |
|
||||
] |
|
||||
}, |
|
||||
{ |
|
||||
"cityName": "广州", |
|
||||
"temperature": 25.9, |
|
||||
"humidity": 85.0, |
|
||||
"windSpeed": 5.3, |
|
||||
"weatherCode": "81", |
|
||||
"hourlyTimes": [ |
|
||||
"00:00", |
|
||||
"01:00", |
|
||||
"02:00", |
|
||||
"03:00", |
|
||||
"04:00", |
|
||||
"05:00", |
|
||||
"06:00", |
|
||||
"07:00", |
|
||||
"08:00", |
|
||||
"09:00", |
|
||||
"10:00", |
|
||||
"11:00", |
|
||||
"12:00", |
|
||||
"13:00", |
|
||||
"14:00", |
|
||||
"15:00", |
|
||||
"16:00", |
|
||||
"17:00", |
|
||||
"18:00", |
|
||||
"19:00", |
|
||||
"20:00", |
|
||||
"21:00", |
|
||||
"22:00", |
|
||||
"23:00" |
|
||||
], |
|
||||
"hourlyTemperatures": [ |
|
||||
27.7, |
|
||||
27.2, |
|
||||
26.0, |
|
||||
25.5, |
|
||||
25.4, |
|
||||
25.0, |
|
||||
25.0, |
|
||||
26.0, |
|
||||
28.1, |
|
||||
29.3, |
|
||||
30.6, |
|
||||
31.9, |
|
||||
33.0, |
|
||||
33.8, |
|
||||
33.9, |
|
||||
33.6, |
|
||||
34.2, |
|
||||
30.5, |
|
||||
29.4, |
|
||||
25.9, |
|
||||
26.4, |
|
||||
26.5, |
|
||||
26.3, |
|
||||
26.2 |
|
||||
], |
|
||||
"hourlyHumidities": [ |
|
||||
85, |
|
||||
87, |
|
||||
82, |
|
||||
84, |
|
||||
85, |
|
||||
90, |
|
||||
92, |
|
||||
87, |
|
||||
76, |
|
||||
70, |
|
||||
63, |
|
||||
57, |
|
||||
54, |
|
||||
53, |
|
||||
53, |
|
||||
54, |
|
||||
51, |
|
||||
69, |
|
||||
72, |
|
||||
95, |
|
||||
97, |
|
||||
96, |
|
||||
98, |
|
||||
98 |
|
||||
], |
|
||||
"hourlyWindSpeeds": [ |
|
||||
5.8, |
|
||||
4.9, |
|
||||
4.4, |
|
||||
3.3, |
|
||||
3.4, |
|
||||
3.8, |
|
||||
4.1, |
|
||||
5.6, |
|
||||
4.0, |
|
||||
3.8, |
|
||||
4.0, |
|
||||
2.8, |
|
||||
1.3, |
|
||||
3.3, |
|
||||
5.1, |
|
||||
5.2, |
|
||||
5.1, |
|
||||
12.3, |
|
||||
3.1, |
|
||||
5.3, |
|
||||
3.6, |
|
||||
1.7, |
|
||||
2.0, |
|
||||
1.4 |
|
||||
] |
|
||||
}, |
|
||||
{ |
|
||||
"cityName": "北京", |
|
||||
"temperature": 32.3, |
|
||||
"humidity": 56.0, |
|
||||
"windSpeed": 17.1, |
|
||||
"weatherCode": "0", |
|
||||
"hourlyTimes": [ |
|
||||
"00:00", |
|
||||
"01:00", |
|
||||
"02:00", |
|
||||
"03:00", |
|
||||
"04:00", |
|
||||
"05:00", |
|
||||
"06:00", |
|
||||
"07:00", |
|
||||
"08:00", |
|
||||
"09:00", |
|
||||
"10:00", |
|
||||
"11:00", |
|
||||
"12:00", |
|
||||
"13:00", |
|
||||
"14:00", |
|
||||
"15:00", |
|
||||
"16:00", |
|
||||
"17:00", |
|
||||
"18:00", |
|
||||
"19:00", |
|
||||
"20:00", |
|
||||
"21:00", |
|
||||
"22:00", |
|
||||
"23:00" |
|
||||
], |
|
||||
"hourlyTemperatures": [ |
|
||||
22.8, |
|
||||
21.9, |
|
||||
21.2, |
|
||||
20.1, |
|
||||
19.6, |
|
||||
18.8, |
|
||||
19.2, |
|
||||
20.7, |
|
||||
23.7, |
|
||||
27.0, |
|
||||
29.9, |
|
||||
32.5, |
|
||||
34.5, |
|
||||
35.8, |
|
||||
36.3, |
|
||||
36.6, |
|
||||
36.2, |
|
||||
35.7, |
|
||||
34.2, |
|
||||
32.3, |
|
||||
30.9, |
|
||||
29.9, |
|
||||
29.1, |
|
||||
28.6 |
|
||||
], |
|
||||
"hourlyHumidities": [ |
|
||||
56, |
|
||||
60, |
|
||||
63, |
|
||||
69, |
|
||||
71, |
|
||||
75, |
|
||||
74, |
|
||||
67, |
|
||||
57, |
|
||||
45, |
|
||||
37, |
|
||||
28, |
|
||||
21, |
|
||||
18, |
|
||||
20, |
|
||||
21, |
|
||||
26, |
|
||||
26, |
|
||||
30, |
|
||||
33, |
|
||||
35, |
|
||||
36, |
|
||||
35, |
|
||||
34 |
|
||||
], |
|
||||
"hourlyWindSpeeds": [ |
|
||||
11.6, |
|
||||
10.6, |
|
||||
7.6, |
|
||||
4.5, |
|
||||
3.9, |
|
||||
2.3, |
|
||||
2.3, |
|
||||
0.6, |
|
||||
0.8, |
|
||||
2.2, |
|
||||
2.4, |
|
||||
4.9, |
|
||||
7.6, |
|
||||
10.4, |
|
||||
12.2, |
|
||||
13.4, |
|
||||
14.7, |
|
||||
15.1, |
|
||||
14.5, |
|
||||
17.1, |
|
||||
16.9, |
|
||||
18.1, |
|
||||
19.7, |
|
||||
20.1 |
|
||||
] |
|
||||
} |
|
||||
] |
|
||||
@ -1,96 +0,0 @@ |
|||||
<?xml version="1.0" encoding="UTF-8"?> |
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" |
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|
||||
<modelVersion>4.0.0</modelVersion> |
|
||||
|
|
||||
<groupId>com.example</groupId> |
|
||||
<artifactId>crawler-project</artifactId> |
|
||||
<version>1.0.0</version> |
|
||||
<name>crawler-project</name> |
|
||||
<description>Java爬虫项目 - MVC + Command + Strategy模式</description> |
|
||||
|
|
||||
<properties> |
|
||||
<java.version>11</java.version> |
|
||||
<jsoup.version>1.17.2</jsoup.version> |
|
||||
<gson.version>2.10.1</gson.version> |
|
||||
<jfreechart.version>1.5.3</jfreechart.version> |
|
||||
<logback.version>1.4.14</logback.version> |
|
||||
<maven.compiler.source>11</maven.compiler.source> |
|
||||
<maven.compiler.target>11</maven.compiler.target> |
|
||||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|
||||
</properties> |
|
||||
|
|
||||
<dependencies> |
|
||||
<!-- Jsoup HTML解析 --> |
|
||||
<dependency> |
|
||||
<groupId>org.jsoup</groupId> |
|
||||
<artifactId>jsoup</artifactId> |
|
||||
<version>${jsoup.version}</version> |
|
||||
</dependency> |
|
||||
|
|
||||
<!-- Gson JSON处理 --> |
|
||||
<dependency> |
|
||||
<groupId>com.google.code.gson</groupId> |
|
||||
<artifactId>gson</artifactId> |
|
||||
<version>${gson.version}</version> |
|
||||
</dependency> |
|
||||
|
|
||||
<!-- JFreeChart 图表生成 --> |
|
||||
<dependency> |
|
||||
<groupId>org.jfree</groupId> |
|
||||
<artifactId>jfreechart</artifactId> |
|
||||
<version>${jfreechart.version}</version> |
|
||||
</dependency> |
|
||||
|
|
||||
<!-- Logback 日志框架 --> |
|
||||
<dependency> |
|
||||
<groupId>ch.qos.logback</groupId> |
|
||||
<artifactId>logback-classic</artifactId> |
|
||||
<version>${logback.version}</version> |
|
||||
</dependency> |
|
||||
|
|
||||
<!-- 测试依赖 --> |
|
||||
<dependency> |
|
||||
<groupId>junit</groupId> |
|
||||
<artifactId>junit</artifactId> |
|
||||
<version>4.13.2</version> |
|
||||
<scope>test</scope> |
|
||||
</dependency> |
|
||||
</dependencies> |
|
||||
|
|
||||
<build> |
|
||||
<plugins> |
|
||||
<plugin> |
|
||||
<groupId>org.apache.maven.plugins</groupId> |
|
||||
<artifactId>maven-compiler-plugin</artifactId> |
|
||||
<version>3.11.0</version> |
|
||||
<configuration> |
|
||||
<source>${java.version}</source> |
|
||||
<target>${java.version}</target> |
|
||||
<encoding>${project.build.sourceEncoding}</encoding> |
|
||||
</configuration> |
|
||||
</plugin> |
|
||||
<plugin> |
|
||||
<groupId>org.apache.maven.plugins</groupId> |
|
||||
<artifactId>maven-shade-plugin</artifactId> |
|
||||
<version>3.5.0</version> |
|
||||
<executions> |
|
||||
<execution> |
|
||||
<phase>package</phase> |
|
||||
<goals> |
|
||||
<goal>shade</goal> |
|
||||
</goals> |
|
||||
<configuration> |
|
||||
<transformers> |
|
||||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> |
|
||||
<mainClass>com.example.crawler.Main</mainClass> |
|
||||
</transformer> |
|
||||
</transformers> |
|
||||
</configuration> |
|
||||
</execution> |
|
||||
</executions> |
|
||||
</plugin> |
|
||||
</plugins> |
|
||||
</build> |
|
||||
</project> |
|
||||
@ -1,14 +0,0 @@ |
|||||
========== 书籍数据分析报告 ========== |
|
||||
生成时间: 2026-05-30T17:47:42.026682900 |
|
||||
分析书籍总数: 600 |
|
||||
|
|
||||
【价格统计】 |
|
||||
最高价: £59.92 |
|
||||
最低价: £10.01 |
|
||||
平均价: £35.29 |
|
||||
|
|
||||
【库存统计】 |
|
||||
有库存: 600 本 |
|
||||
缺货: 0 本 |
|
||||
|
|
||||
报告生成完成 |
|
||||
@ -1,31 +0,0 @@ |
|||||
========== 新闻数据分析报告 ========== |
|
||||
生成时间: 2026-05-30T17:47:42.145591 |
|
||||
分析新闻总数: 16 |
|
||||
|
|
||||
【发布时间分布】 |
|
||||
00:00 - 01:00: 0 条 |
|
||||
01:00 - 02:00: 0 条 |
|
||||
02:00 - 03:00: 0 条 |
|
||||
03:00 - 04:00: 0 条 |
|
||||
04:00 - 05:00: 0 条 |
|
||||
05:00 - 06:00: 0 条 |
|
||||
06:00 - 07:00: 0 条 |
|
||||
07:00 - 08:00: 0 条 |
|
||||
08:00 - 09:00: 0 条 |
|
||||
09:00 - 10:00: 0 条 |
|
||||
10:00 - 11:00: 0 条 |
|
||||
11:00 - 12:00: 0 条 |
|
||||
12:00 - 13:00: 0 条 |
|
||||
13:00 - 14:00: 0 条 |
|
||||
14:00 - 15:00: 0 条 |
|
||||
15:00 - 16:00: 0 条 |
|
||||
16:00 - 17:00: 0 条 |
|
||||
17:00 - 18:00: 16 条 |
|
||||
18:00 - 19:00: 0 条 |
|
||||
19:00 - 20:00: 0 条 |
|
||||
20:00 - 21:00: 0 条 |
|
||||
21:00 - 22:00: 0 条 |
|
||||
22:00 - 23:00: 0 条 |
|
||||
23:00 - 00:00: 0 条 |
|
||||
|
|
||||
报告生成完成 |
|
||||
@ -1,17 +0,0 @@ |
|||||
========== 大学排名数据分析报告 ========== |
|
||||
生成时间: 2026-05-30T17:47:42.272388 |
|
||||
分析大学总数: 30 |
|
||||
|
|
||||
【省份排行榜 TOP 10】 |
|
||||
北京: 7 所大学 |
|
||||
上海: 4 所大学 |
|
||||
湖北: 2 所大学 |
|
||||
湖南: 2 所大学 |
|
||||
天津: 2 所大学 |
|
||||
陕西: 2 所大学 |
|
||||
江苏: 2 所大学 |
|
||||
山东: 1 所大学 |
|
||||
福建: 1 所大学 |
|
||||
吉林: 1 所大学 |
|
||||
|
|
||||
报告生成完成 |
|
||||
@ -1,29 +0,0 @@ |
|||||
========== 天气数据分析报告 ========== |
|
||||
生成时间: 2026-05-30T17:47:42.585539200 |
|
||||
分析城市数量: 3 |
|
||||
数据来源: Open-Meteo API (CC BY 4.0) |
|
||||
|
|
||||
【多城市天气对比】 |
|
||||
|
|
||||
城市: 上海 |
|
||||
当前温度: 24.0°C |
|
||||
当前湿度: 83% |
|
||||
风速: 8.3 km/h |
|
||||
天气: 多云 |
|
||||
24小时平均温度: 22.7°C |
|
||||
|
|
||||
城市: 广州 |
|
||||
当前温度: 29.8°C |
|
||||
当前湿度: 85% |
|
||||
风速: 2.4 km/h |
|
||||
天气: 小毛毛雨 |
|
||||
24小时平均温度: 28.6°C |
|
||||
|
|
||||
城市: 北京 |
|
||||
当前温度: 34.6°C |
|
||||
当前湿度: 56% |
|
||||
风速: 14.4 km/h |
|
||||
天气: 晴 |
|
||||
24小时平均温度: 28.2°C |
|
||||
|
|
||||
报告生成完成 |
|
||||
@ -1,15 +0,0 @@ |
|||||
package com.example.crawler; |
|
||||
|
|
||||
import com.example.crawler.controller.CrawlerController; |
|
||||
|
|
||||
/** |
|
||||
* 爬虫项目主入口类 |
|
||||
*/ |
|
||||
public class Main { |
|
||||
|
|
||||
public static void main(String[] args) { |
|
||||
// 创建控制器并启动CLI界面
|
|
||||
CrawlerController controller = new CrawlerController(); |
|
||||
controller.start(); |
|
||||
} |
|
||||
} |
|
||||
@ -1,229 +0,0 @@ |
|||||
package com.example.crawler.chart; |
|
||||
|
|
||||
import java.awt.Color; |
|
||||
import java.awt.Font; |
|
||||
import java.io.File; |
|
||||
import java.io.IOException; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
import java.util.stream.Collectors; |
|
||||
|
|
||||
import com.example.crawler.constant.CrawlerConstants; |
|
||||
import org.jfree.chart.ChartFactory; |
|
||||
import org.jfree.chart.ChartUtils; |
|
||||
import org.jfree.chart.JFreeChart; |
|
||||
import org.jfree.chart.axis.CategoryAxis; |
|
||||
import org.jfree.chart.axis.NumberAxis; |
|
||||
import org.jfree.chart.plot.CategoryPlot; |
|
||||
import org.jfree.chart.plot.PiePlot; |
|
||||
import org.jfree.chart.plot.XYPlot; |
|
||||
import org.jfree.chart.renderer.category.BarRenderer; |
|
||||
import org.jfree.chart.renderer.category.LineAndShapeRenderer; |
|
||||
import org.jfree.data.category.DefaultCategoryDataset; |
|
||||
import org.jfree.data.general.DefaultPieDataset; |
|
||||
import org.jfree.data.xy.XYDataset; |
|
||||
import org.jfree.data.xy.XYSeries; |
|
||||
import org.jfree.data.xy.XYSeriesCollection; |
|
||||
|
|
||||
public class ChartGenerator { |
|
||||
|
|
||||
static { |
|
||||
File dir = new File(CrawlerConstants.CHARTS_DIR); |
|
||||
if (!dir.exists()) { |
|
||||
dir.mkdirs(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public static void generatePriceHistogram(Map<String, Integer> priceDistribution, String fileName) { |
|
||||
DefaultCategoryDataset dataset = createCategoryDataset(priceDistribution); |
|
||||
JFreeChart chart = ChartFactory.createBarChart( |
|
||||
"书籍价格分布", |
|
||||
"价格区间(£)", |
|
||||
"书籍数量", |
|
||||
dataset |
|
||||
); |
|
||||
customizeBarChart(chart); |
|
||||
saveChart(chart, fileName); |
|
||||
} |
|
||||
|
|
||||
public static void generateRatingPieChart(Map<String, Integer> ratingDistribution, String fileName) { |
|
||||
DefaultPieDataset<String> dataset = new DefaultPieDataset<>(); |
|
||||
for (Map.Entry<String, Integer> entry : ratingDistribution.entrySet()) { |
|
||||
dataset.setValue(entry.getKey(), entry.getValue()); |
|
||||
} |
|
||||
JFreeChart chart = ChartFactory.createPieChart( |
|
||||
"书籍评分分布", |
|
||||
dataset, |
|
||||
true, |
|
||||
true, |
|
||||
false |
|
||||
); |
|
||||
customizePieChart(chart); |
|
||||
saveChart(chart, fileName); |
|
||||
} |
|
||||
|
|
||||
public static void generateNewsTimeTrend(Map<Integer, Integer> hourDistribution, String fileName) { |
|
||||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|
||||
for (int i = 0; i < 24; i++) { |
|
||||
int count = hourDistribution.getOrDefault(i, 0); |
|
||||
dataset.addValue(count, "新闻数量", String.format("%02d:00", i)); |
|
||||
} |
|
||||
JFreeChart chart = ChartFactory.createLineChart( |
|
||||
"新闻发布时间分布", |
|
||||
"小时", |
|
||||
"新闻数量", |
|
||||
dataset |
|
||||
); |
|
||||
customizeLineChart(chart); |
|
||||
saveChart(chart, fileName); |
|
||||
} |
|
||||
|
|
||||
public static void generateWordFrequencyBarChart(Map<String, Integer> wordFrequency, String fileName) { |
|
||||
Map<String, Integer> top10 = wordFrequency.entrySet().stream() |
|
||||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|
||||
.limit(10) |
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
|
||||
|
|
||||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|
||||
for (Map.Entry<String, Integer> entry : top10.entrySet()) { |
|
||||
dataset.addValue(entry.getValue(), "词频", entry.getKey()); |
|
||||
} |
|
||||
JFreeChart chart = ChartFactory.createBarChart( |
|
||||
"新闻高频词 TOP 10", |
|
||||
"关键词", |
|
||||
"出现次数", |
|
||||
dataset |
|
||||
); |
|
||||
customizeBarChart(chart); |
|
||||
saveChart(chart, fileName); |
|
||||
} |
|
||||
|
|
||||
public static void generateProvinceBarChart(Map<String, Integer> provinceDistribution, String fileName) { |
|
||||
Map<String, Integer> top10 = provinceDistribution.entrySet().stream() |
|
||||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|
||||
.limit(10) |
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
|
||||
|
|
||||
DefaultCategoryDataset dataset = createCategoryDataset(top10); |
|
||||
JFreeChart chart = ChartFactory.createBarChart( |
|
||||
"各省上榜大学数量 TOP 10", |
|
||||
"省份", |
|
||||
"大学数量", |
|
||||
dataset |
|
||||
); |
|
||||
customizeBarChart(chart); |
|
||||
saveChart(chart, fileName); |
|
||||
} |
|
||||
|
|
||||
public static void generateScoreHistogram(Map<String, Integer> scoreDistribution, String fileName) { |
|
||||
DefaultCategoryDataset dataset = createCategoryDataset(scoreDistribution); |
|
||||
JFreeChart chart = ChartFactory.createBarChart( |
|
||||
"大学总分分布", |
|
||||
"分数区间", |
|
||||
"大学数量", |
|
||||
dataset |
|
||||
); |
|
||||
customizeBarChart(chart); |
|
||||
saveChart(chart, fileName); |
|
||||
} |
|
||||
|
|
||||
public static void generateTemperatureTrend(List<String> times, List<Double> temperatures, String cityName, String fileName) { |
|
||||
XYSeries series = new XYSeries(cityName); |
|
||||
for (int i = 0; i < Math.min(times.size(), temperatures.size()); i++) { |
|
||||
series.add(i, temperatures.get(i)); |
|
||||
} |
|
||||
XYDataset dataset = new XYSeriesCollection(series); |
|
||||
JFreeChart chart = ChartFactory.createXYLineChart( |
|
||||
cityName + " 未来24小时温度变化", |
|
||||
"小时", |
|
||||
"温度(°C)", |
|
||||
dataset |
|
||||
); |
|
||||
customizeXYLineChart(chart); |
|
||||
saveChart(chart, fileName); |
|
||||
} |
|
||||
|
|
||||
public static void generateMultiCityTemperatureComparison(Map<String, List<Double>> cityTemperatures, String fileName) { |
|
||||
XYSeriesCollection dataset = new XYSeriesCollection(); |
|
||||
for (Map.Entry<String, List<Double>> entry : cityTemperatures.entrySet()) { |
|
||||
XYSeries series = new XYSeries(entry.getKey()); |
|
||||
List<Double> temps = entry.getValue(); |
|
||||
for (int i = 0; i < Math.min(temps.size(), 24); i++) { |
|
||||
series.add(i, temps.get(i)); |
|
||||
} |
|
||||
dataset.addSeries(series); |
|
||||
} |
|
||||
JFreeChart chart = ChartFactory.createXYLineChart( |
|
||||
"多城市未来24小时温度对比", |
|
||||
"小时", |
|
||||
"温度(°C)", |
|
||||
dataset |
|
||||
); |
|
||||
customizeXYLineChart(chart); |
|
||||
saveChart(chart, fileName); |
|
||||
} |
|
||||
|
|
||||
private static DefaultCategoryDataset createCategoryDataset(Map<String, Integer> data) { |
|
||||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|
||||
for (Map.Entry<String, Integer> entry : data.entrySet()) { |
|
||||
dataset.addValue(entry.getValue(), "数值", entry.getKey()); |
|
||||
} |
|
||||
return dataset; |
|
||||
} |
|
||||
|
|
||||
private static void customizeBarChart(JFreeChart chart) { |
|
||||
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
|
||||
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|
||||
|
|
||||
CategoryPlot plot = chart.getCategoryPlot(); |
|
||||
CategoryAxis domainAxis = plot.getDomainAxis(); |
|
||||
domainAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|
||||
domainAxis.setTickLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 10)); |
|
||||
|
|
||||
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis(); |
|
||||
rangeAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|
||||
|
|
||||
BarRenderer renderer = (BarRenderer) plot.getRenderer(); |
|
||||
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
|
||||
} |
|
||||
|
|
||||
private static void customizePieChart(JFreeChart chart) { |
|
||||
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
|
||||
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|
||||
|
|
||||
PiePlot plot = (PiePlot) chart.getPlot(); |
|
||||
plot.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|
||||
} |
|
||||
|
|
||||
private static void customizeLineChart(JFreeChart chart) { |
|
||||
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
|
||||
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|
||||
|
|
||||
CategoryPlot plot = chart.getCategoryPlot(); |
|
||||
LineAndShapeRenderer renderer = (LineAndShapeRenderer) plot.getRenderer(); |
|
||||
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
|
||||
} |
|
||||
|
|
||||
private static void customizeXYLineChart(JFreeChart chart) { |
|
||||
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
|
||||
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|
||||
|
|
||||
XYPlot plot = chart.getXYPlot(); |
|
||||
|
|
||||
NumberAxis xAxis = (NumberAxis) plot.getDomainAxis(); |
|
||||
xAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|
||||
|
|
||||
NumberAxis yAxis = (NumberAxis) plot.getRangeAxis(); |
|
||||
yAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|
||||
} |
|
||||
|
|
||||
private static void saveChart(JFreeChart chart, String fileName) { |
|
||||
try { |
|
||||
File file = new File(CrawlerConstants.CHARTS_DIR, fileName); |
|
||||
ChartUtils.saveChartAsPNG(file, chart, 800, 500); |
|
||||
System.out.println("图表已保存: " + file.getAbsolutePath()); |
|
||||
} catch (IOException e) { |
|
||||
System.err.println("保存图表失败: " + e.getMessage()); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,60 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
import com.example.crawler.constant.CrawlerConstants; |
|
||||
import com.example.crawler.exception.CrawlException; |
|
||||
import com.example.crawler.exception.NetworkException; |
|
||||
import com.example.crawler.repository.DataRepository; |
|
||||
import com.example.crawler.strategy.CrawlStrategy; |
|
||||
import org.slf4j.Logger; |
|
||||
import org.slf4j.LoggerFactory; |
|
||||
|
|
||||
public abstract class BaseCrawlCommand implements Command { |
|
||||
|
|
||||
protected static final Logger logger = LoggerFactory.getLogger(BaseCrawlCommand.class); |
|
||||
|
|
||||
protected DataRepository repository; |
|
||||
protected int maxRetries; |
|
||||
protected long retryDelayMs; |
|
||||
|
|
||||
public BaseCrawlCommand(DataRepository repository) { |
|
||||
this.repository = repository; |
|
||||
this.maxRetries = CrawlerConstants.MAX_RETRIES; |
|
||||
this.retryDelayMs = 2000; |
|
||||
} |
|
||||
|
|
||||
protected abstract CrawlStrategy<?> getStrategy(); |
|
||||
|
|
||||
protected abstract void saveToRepository(Object data); |
|
||||
|
|
||||
@Override |
|
||||
public void execute() { |
|
||||
try { |
|
||||
Object data = crawlWithRetry(); |
|
||||
saveToRepository(data); |
|
||||
logger.info("Crawling completed and saved to repository"); |
|
||||
} catch (Exception e) { |
|
||||
logger.error("Crawling failed", e); |
|
||||
System.err.println("爬取失败: " + e.getMessage()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
protected Object crawlWithRetry() throws Exception { |
|
||||
int attempts = 0; |
|
||||
while (attempts < maxRetries) { |
|
||||
try { |
|
||||
CrawlStrategy<?> strategy = getStrategy(); |
|
||||
return strategy.crawl(); |
|
||||
} catch (NetworkException e) { |
|
||||
attempts++; |
|
||||
if (attempts < maxRetries) { |
|
||||
logger.warn("Network error, retrying in {}ms (attempt {}/{})", retryDelayMs, attempts, maxRetries); |
|
||||
Thread.sleep(retryDelayMs); |
|
||||
} else { |
|
||||
logger.error("Max retries reached, giving up"); |
|
||||
throw e; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
throw new CrawlException("Max retries exceeded"); |
|
||||
} |
|
||||
} |
|
||||
@ -1,32 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
import com.example.crawler.model.Book; |
|
||||
import com.example.crawler.repository.DataRepository; |
|
||||
import com.example.crawler.strategy.BookCrawlStrategy; |
|
||||
import com.example.crawler.strategy.CrawlStrategy; |
|
||||
|
|
||||
import java.util.List; |
|
||||
|
|
||||
public class BookCommand extends BaseCrawlCommand { |
|
||||
|
|
||||
public BookCommand(DataRepository repository) { |
|
||||
super(repository); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
protected CrawlStrategy<?> getStrategy() { |
|
||||
return new BookCrawlStrategy(); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
@SuppressWarnings("unchecked") |
|
||||
protected void saveToRepository(Object data) { |
|
||||
repository.saveBooks((List<Book>) data); |
|
||||
System.out.println("成功爬取 " + ((List<Book>) data).size() + " 本书籍信息"); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getName() { |
|
||||
return "爬取书籍信息"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,20 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
/** |
|
||||
* 命令接口 |
|
||||
* 定义命令执行的标准方法,实现Command模式 |
|
||||
*/ |
|
||||
public interface Command { |
|
||||
|
|
||||
/** |
|
||||
* 执行命令 |
|
||||
*/ |
|
||||
void execute(); |
|
||||
|
|
||||
/** |
|
||||
* 获取命令名称 |
|
||||
* |
|
||||
* @return 命令名称 |
|
||||
*/ |
|
||||
String getName(); |
|
||||
} |
|
||||
@ -1,45 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
import com.example.crawler.controller.CrawlerController; |
|
||||
import com.example.crawler.repository.DataRepository; |
|
||||
|
|
||||
public class CrawlAllCommand implements Command { |
|
||||
|
|
||||
private final DataRepository repository; |
|
||||
private final CrawlerController controller; |
|
||||
|
|
||||
public CrawlAllCommand(CrawlerController controller) { |
|
||||
this.controller = controller; |
|
||||
this.repository = controller.getRepository(); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public void execute() { |
|
||||
System.out.println("\n=== 开始爬取全部数据源 ==="); |
|
||||
|
|
||||
Command[] commands = { |
|
||||
new BookCommand(repository), |
|
||||
new NewsCommand(repository), |
|
||||
new CrawlRankingCommand(repository), |
|
||||
new WeatherCommand(repository) |
|
||||
}; |
|
||||
|
|
||||
for (Command command : commands) { |
|
||||
command.execute(); |
|
||||
try { |
|
||||
Thread.sleep(2000); |
|
||||
} catch (InterruptedException e) { |
|
||||
Thread.currentThread().interrupt(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
new SaveCommand(controller).execute(); |
|
||||
|
|
||||
System.out.println("\n=== 全部数据爬取完成 ==="); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getName() { |
|
||||
return "爬取全部数据并保存"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,104 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
import com.example.crawler.controller.CrawlerController; |
|
||||
import com.example.crawler.repository.DataRepository; |
|
||||
import com.example.crawler.service.BookAnalysisService; |
|
||||
import com.example.crawler.service.NewsAnalysisService; |
|
||||
import com.example.crawler.service.RankingAnalysisService; |
|
||||
import com.example.crawler.service.WeatherAnalysisService; |
|
||||
|
|
||||
public class CrawlAndAnalyzeAllCommand implements Command { |
|
||||
|
|
||||
private final DataRepository repository; |
|
||||
private final CrawlerController controller; |
|
||||
|
|
||||
public CrawlAndAnalyzeAllCommand(CrawlerController controller) { |
|
||||
this.controller = controller; |
|
||||
this.repository = controller.getRepository(); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public void execute() { |
|
||||
System.out.println("\n========== 爬取全部数据并生成分析 ==========\n"); |
|
||||
|
|
||||
System.out.println("第1步:爬取书籍信息..."); |
|
||||
try { |
|
||||
BookCommand bookCommand = new BookCommand(repository); |
|
||||
bookCommand.execute(); |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("书籍爬取失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n第2步:爬取新闻信息..."); |
|
||||
try { |
|
||||
NewsCommand newsCommand = new NewsCommand(repository); |
|
||||
newsCommand.execute(); |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("新闻爬取失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n第3步:爬取大学排名..."); |
|
||||
try { |
|
||||
CrawlRankingCommand rankingCommand = new CrawlRankingCommand(repository); |
|
||||
rankingCommand.execute(); |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("大学排名爬取失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n第4步:爬取天气数据..."); |
|
||||
try { |
|
||||
WeatherCommand weatherCommand = new WeatherCommand(repository); |
|
||||
weatherCommand.execute(); |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("天气数据爬取失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n========== 数据爬取完成,开始分析 ==========\n"); |
|
||||
|
|
||||
try { |
|
||||
BookAnalysisService bookService = new BookAnalysisService(); |
|
||||
if (!repository.getBooks().isEmpty()) { |
|
||||
bookService.analyze(repository.getBooks()); |
|
||||
} |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("书籍分析失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
try { |
|
||||
NewsAnalysisService newsService = new NewsAnalysisService(); |
|
||||
if (!repository.getNewsList().isEmpty()) { |
|
||||
newsService.analyze(repository.getNewsList()); |
|
||||
} |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("新闻分析失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
try { |
|
||||
RankingAnalysisService rankingService = new RankingAnalysisService(); |
|
||||
if (!repository.getRankings().isEmpty()) { |
|
||||
rankingService.analyze(repository.getRankings()); |
|
||||
} |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("大学排名分析失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
try { |
|
||||
WeatherAnalysisService weatherService = new WeatherAnalysisService(); |
|
||||
if (!repository.getWeatherList().isEmpty()) { |
|
||||
weatherService.analyze(repository.getWeatherList()); |
|
||||
} |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("天气分析失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n========== 全部完成 =========="); |
|
||||
System.out.println("原始数据已保存到 output/ 目录"); |
|
||||
System.out.println("分析报告已保存到 reports/ 目录"); |
|
||||
System.out.println("图表已保存到 charts/ 目录"); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getName() { |
|
||||
return "爬取并分析全部数据"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,32 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
import com.example.crawler.model.UniversityRank; |
|
||||
import com.example.crawler.repository.DataRepository; |
|
||||
import com.example.crawler.strategy.CrawlStrategy; |
|
||||
import com.example.crawler.strategy.UniversityRankCrawlStrategy; |
|
||||
|
|
||||
import java.util.List; |
|
||||
|
|
||||
public class CrawlRankingCommand extends BaseCrawlCommand { |
|
||||
|
|
||||
public CrawlRankingCommand(DataRepository repository) { |
|
||||
super(repository); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
protected CrawlStrategy<?> getStrategy() { |
|
||||
return new UniversityRankCrawlStrategy(); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
@SuppressWarnings("unchecked") |
|
||||
protected void saveToRepository(Object data) { |
|
||||
repository.saveRankings((List<UniversityRank>) data); |
|
||||
System.out.println("成功爬取 " + ((List<UniversityRank>) data).size() + " 条大学排名数据"); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getName() { |
|
||||
return "爬取软科中国大学排名"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,19 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
/** |
|
||||
* 退出命令 |
|
||||
* // Command模式:退出命令
|
|
||||
*/ |
|
||||
public class ExitCommand implements Command { |
|
||||
|
|
||||
@Override |
|
||||
public void execute() { |
|
||||
System.out.println("\n=== 感谢使用数据爬取系统 ==="); |
|
||||
System.exit(0); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getName() { |
|
||||
return "退出"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,77 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
import com.example.crawler.controller.CrawlerController; |
|
||||
import com.example.crawler.repository.DataRepository; |
|
||||
import com.example.crawler.service.BookAnalysisService; |
|
||||
import com.example.crawler.service.NewsAnalysisService; |
|
||||
import com.example.crawler.service.RankingAnalysisService; |
|
||||
import com.example.crawler.service.WeatherAnalysisService; |
|
||||
|
|
||||
public class GenerateAllAnalysisCommand implements Command { |
|
||||
|
|
||||
private final DataRepository repository; |
|
||||
private final CrawlerController controller; |
|
||||
|
|
||||
public GenerateAllAnalysisCommand(CrawlerController controller) { |
|
||||
this.controller = controller; |
|
||||
this.repository = controller.getRepository(); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public void execute() { |
|
||||
System.out.println("\n========== 生成所有数据源分析报告 ==========\n"); |
|
||||
|
|
||||
try { |
|
||||
BookAnalysisService bookService = new BookAnalysisService(); |
|
||||
if (!repository.getBooks().isEmpty()) { |
|
||||
bookService.analyze(repository.getBooks()); |
|
||||
} else { |
|
||||
System.out.println("没有书籍数据,跳过书籍分析"); |
|
||||
} |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("书籍分析失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
try { |
|
||||
NewsAnalysisService newsService = new NewsAnalysisService(); |
|
||||
if (!repository.getNewsList().isEmpty()) { |
|
||||
newsService.analyze(repository.getNewsList()); |
|
||||
} else { |
|
||||
System.out.println("没有新闻数据,跳过新闻分析"); |
|
||||
} |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("新闻分析失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
try { |
|
||||
RankingAnalysisService rankingService = new RankingAnalysisService(); |
|
||||
if (!repository.getRankings().isEmpty()) { |
|
||||
rankingService.analyze(repository.getRankings()); |
|
||||
} else { |
|
||||
System.out.println("没有大学排名数据,跳过排名分析"); |
|
||||
} |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("大学排名分析失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
try { |
|
||||
WeatherAnalysisService weatherService = new WeatherAnalysisService(); |
|
||||
if (!repository.getWeatherList().isEmpty()) { |
|
||||
weatherService.analyze(repository.getWeatherList()); |
|
||||
} else { |
|
||||
System.out.println("没有天气数据,跳过天气分析"); |
|
||||
} |
|
||||
} catch (Exception e) { |
|
||||
System.err.println("天气分析失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n========== 分析完成 =========="); |
|
||||
System.out.println("报告已保存到 reports/ 目录"); |
|
||||
System.out.println("图表已保存到 charts/ 目录"); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getName() { |
|
||||
return "生成所有分析报告"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,32 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
import com.example.crawler.model.News; |
|
||||
import com.example.crawler.repository.DataRepository; |
|
||||
import com.example.crawler.strategy.CrawlStrategy; |
|
||||
import com.example.crawler.strategy.NewsCrawlStrategy; |
|
||||
|
|
||||
import java.util.List; |
|
||||
|
|
||||
public class NewsCommand extends BaseCrawlCommand { |
|
||||
|
|
||||
public NewsCommand(DataRepository repository) { |
|
||||
super(repository); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
protected CrawlStrategy<?> getStrategy() { |
|
||||
return new NewsCrawlStrategy(); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
@SuppressWarnings("unchecked") |
|
||||
protected void saveToRepository(Object data) { |
|
||||
repository.saveNewsList((List<News>) data); |
|
||||
System.out.println("成功爬取 " + ((List<News>) data).size() + " 条新闻"); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getName() { |
|
||||
return "爬取新浪国内新闻"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,74 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
import java.time.LocalDateTime; |
|
||||
import java.time.format.DateTimeFormatter; |
|
||||
import java.util.List; |
|
||||
|
|
||||
import com.example.crawler.constant.CrawlerConstants; |
|
||||
import com.example.crawler.controller.CrawlerController; |
|
||||
import com.example.crawler.model.Book; |
|
||||
import com.example.crawler.model.News; |
|
||||
import com.example.crawler.model.UniversityRank; |
|
||||
import com.example.crawler.model.Weather; |
|
||||
import com.example.crawler.util.JsonUtil; |
|
||||
|
|
||||
public class SaveCommand implements Command { |
|
||||
|
|
||||
private final CrawlerController controller; |
|
||||
|
|
||||
public SaveCommand(CrawlerController controller) { |
|
||||
this.controller = controller; |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public void execute() { |
|
||||
System.out.println("\n=== 开始保存数据 ==="); |
|
||||
|
|
||||
try { |
|
||||
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); |
|
||||
|
|
||||
// 保存书籍数据
|
|
||||
List<Book> books = controller.getBooks(); |
|
||||
if (books != null && !books.isEmpty()) { |
|
||||
String bookFileName = CrawlerConstants.OUTPUT_DIR + "/books_" + timestamp + ".json"; |
|
||||
JsonUtil.saveListToJsonFile(books, bookFileName); |
|
||||
System.out.println("书籍数据已保存到: " + bookFileName); |
|
||||
} |
|
||||
|
|
||||
// 保存新闻数据
|
|
||||
List<News> newsList = controller.getNewsList(); |
|
||||
if (newsList != null && !newsList.isEmpty()) { |
|
||||
String newsFileName = CrawlerConstants.OUTPUT_DIR + "/news_" + timestamp + ".json"; |
|
||||
JsonUtil.saveListToJsonFile(newsList, newsFileName); |
|
||||
System.out.println("新闻数据已保存到: " + newsFileName); |
|
||||
} |
|
||||
|
|
||||
// 保存大学排名数据
|
|
||||
List<UniversityRank> universityRankList = controller.getUniversityRankList(); |
|
||||
if (universityRankList != null && !universityRankList.isEmpty()) { |
|
||||
String rankingFileName = CrawlerConstants.OUTPUT_DIR + "/university_ranking_" + timestamp + ".json"; |
|
||||
JsonUtil.saveListToJsonFile(universityRankList, rankingFileName); |
|
||||
System.out.println("大学排名数据已保存到: " + rankingFileName); |
|
||||
} |
|
||||
|
|
||||
// 保存天气数据
|
|
||||
List<Weather> weatherList = controller.getWeatherList(); |
|
||||
if (weatherList != null && !weatherList.isEmpty()) { |
|
||||
String weatherFileName = CrawlerConstants.OUTPUT_DIR + "/weather_" + timestamp + ".json"; |
|
||||
JsonUtil.saveListToJsonFile(weatherList, weatherFileName); |
|
||||
System.out.println("天气数据已保存到: " + weatherFileName); |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n=== 数据保存完成 ==="); |
|
||||
|
|
||||
} catch (Exception e) { |
|
||||
System.err.println("保存数据失败: " + e.getMessage()); |
|
||||
e.printStackTrace(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getName() { |
|
||||
return "保存当前数据到文件"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,32 +0,0 @@ |
|||||
package com.example.crawler.command; |
|
||||
|
|
||||
import com.example.crawler.model.Weather; |
|
||||
import com.example.crawler.repository.DataRepository; |
|
||||
import com.example.crawler.strategy.CrawlStrategy; |
|
||||
import com.example.crawler.strategy.WeatherCrawlStrategy; |
|
||||
|
|
||||
import java.util.List; |
|
||||
|
|
||||
public class WeatherCommand extends BaseCrawlCommand { |
|
||||
|
|
||||
public WeatherCommand(DataRepository repository) { |
|
||||
super(repository); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
protected CrawlStrategy<?> getStrategy() { |
|
||||
return new WeatherCrawlStrategy(); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
@SuppressWarnings("unchecked") |
|
||||
protected void saveToRepository(Object data) { |
|
||||
repository.saveWeatherList((List<Weather>) data); |
|
||||
System.out.println("成功爬取 " + ((List<Weather>) data).size() + " 个城市的天气信息"); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getName() { |
|
||||
return "爬取天气数据"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,31 +0,0 @@ |
|||||
package com.example.crawler.constant; |
|
||||
|
|
||||
import java.util.HashMap; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
public class CrawlerConstants { |
|
||||
|
|
||||
public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"; |
|
||||
public static final String REFERER = "https://www.baidu.com"; |
|
||||
|
|
||||
public static final int TIMEOUT_MS = 10000; |
|
||||
public static final int MAX_RETRIES = 3; |
|
||||
public static final long DELAY_MS = 3000; |
|
||||
|
|
||||
public static final String URL_BOOKS = "https://books.toscrape.com/"; |
|
||||
public static final String URL_NEWS = "https://news.sina.com.cn/china/"; |
|
||||
public static final String URL_RANKING = "https://www.shanghairanking.cn/rankings/bcur/202310"; |
|
||||
public static final String URL_WEATHER_API = "https://api.open-meteo.com/v1/forecast"; |
|
||||
|
|
||||
public static final String OUTPUT_DIR = "output"; |
|
||||
public static final String REPORTS_DIR = "reports"; |
|
||||
public static final String CHARTS_DIR = "charts"; |
|
||||
|
|
||||
public static final Map<String, double[]> CITY_COORDINATES; |
|
||||
static { |
|
||||
CITY_COORDINATES = new HashMap<>(); |
|
||||
CITY_COORDINATES.put("北京", new double[]{39.9042, 116.4074}); |
|
||||
CITY_COORDINATES.put("上海", new double[]{31.2304, 121.4737}); |
|
||||
CITY_COORDINATES.put("广州", new double[]{23.1291, 113.2644}); |
|
||||
} |
|
||||
} |
|
||||
@ -1,90 +0,0 @@ |
|||||
package com.example.crawler.controller; |
|
||||
|
|
||||
import java.util.HashMap; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
import java.util.Scanner; |
|
||||
|
|
||||
import com.example.crawler.command.BookCommand; |
|
||||
import com.example.crawler.command.Command; |
|
||||
import com.example.crawler.command.CrawlAllCommand; |
|
||||
import com.example.crawler.command.CrawlAndAnalyzeAllCommand; |
|
||||
import com.example.crawler.command.CrawlRankingCommand; |
|
||||
import com.example.crawler.command.ExitCommand; |
|
||||
import com.example.crawler.command.GenerateAllAnalysisCommand; |
|
||||
import com.example.crawler.command.NewsCommand; |
|
||||
import com.example.crawler.command.SaveCommand; |
|
||||
import com.example.crawler.command.WeatherCommand; |
|
||||
import com.example.crawler.model.Book; |
|
||||
import com.example.crawler.model.News; |
|
||||
import com.example.crawler.model.UniversityRank; |
|
||||
import com.example.crawler.model.Weather; |
|
||||
import com.example.crawler.repository.DataRepository; |
|
||||
import com.example.crawler.view.CrawlerView; |
|
||||
|
|
||||
public class CrawlerController { |
|
||||
|
|
||||
private final CrawlerView view; |
|
||||
private final Map<Integer, Command> commandMap; |
|
||||
private final DataRepository repository; |
|
||||
|
|
||||
public CrawlerController() { |
|
||||
this.view = new CrawlerView(); |
|
||||
this.repository = DataRepository.getInstance(); |
|
||||
this.commandMap = new HashMap<>(); |
|
||||
initCommands(); |
|
||||
} |
|
||||
|
|
||||
private void initCommands() { |
|
||||
commandMap.put(1, new BookCommand(repository)); |
|
||||
commandMap.put(2, new NewsCommand(repository)); |
|
||||
commandMap.put(3, new CrawlRankingCommand(repository)); |
|
||||
commandMap.put(4, new WeatherCommand(repository)); |
|
||||
commandMap.put(5, new CrawlAllCommand(this)); |
|
||||
commandMap.put(6, new SaveCommand(this)); |
|
||||
commandMap.put(7, new GenerateAllAnalysisCommand(this)); |
|
||||
commandMap.put(8, new CrawlAndAnalyzeAllCommand(this)); |
|
||||
commandMap.put(9, new ExitCommand()); |
|
||||
} |
|
||||
|
|
||||
public void start() { |
|
||||
Scanner scanner = new Scanner(System.in); |
|
||||
|
|
||||
while (true) { |
|
||||
view.showMenu(); |
|
||||
|
|
||||
int choice = view.getInput(scanner); |
|
||||
|
|
||||
Command command = commandMap.get(choice); |
|
||||
if (command != null) { |
|
||||
command.execute(); |
|
||||
} else { |
|
||||
view.showError("无效的选择,请输入1-9之间的数字"); |
|
||||
} |
|
||||
|
|
||||
if (choice != 9) { |
|
||||
view.pause(scanner); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public List<Book> getBooks() { |
|
||||
return repository.getBooks(); |
|
||||
} |
|
||||
|
|
||||
public List<News> getNewsList() { |
|
||||
return repository.getNewsList(); |
|
||||
} |
|
||||
|
|
||||
public List<UniversityRank> getUniversityRankList() { |
|
||||
return repository.getRankings(); |
|
||||
} |
|
||||
|
|
||||
public List<Weather> getWeatherList() { |
|
||||
return repository.getWeatherList(); |
|
||||
} |
|
||||
|
|
||||
public DataRepository getRepository() { |
|
||||
return repository; |
|
||||
} |
|
||||
} |
|
||||
@ -1,16 +0,0 @@ |
|||||
package com.example.crawler.exception; |
|
||||
|
|
||||
/** |
|
||||
* 爬虫异常基类 |
|
||||
* 所有爬虫相关异常都继承此类 |
|
||||
*/ |
|
||||
public class CrawlException extends Exception { |
|
||||
|
|
||||
public CrawlException(String message) { |
|
||||
super(message); |
|
||||
} |
|
||||
|
|
||||
public CrawlException(String message, Throwable cause) { |
|
||||
super(message, cause); |
|
||||
} |
|
||||
} |
|
||||
@ -1,16 +0,0 @@ |
|||||
package com.example.crawler.exception; |
|
||||
|
|
||||
/** |
|
||||
* 数据保存异常 |
|
||||
* 用于处理文件写入失败、JSON序列化失败等数据保存相关错误 |
|
||||
*/ |
|
||||
public class DataSaveException extends CrawlException { |
|
||||
|
|
||||
public DataSaveException(String message) { |
|
||||
super(message); |
|
||||
} |
|
||||
|
|
||||
public DataSaveException(String message, Throwable cause) { |
|
||||
super(message, cause); |
|
||||
} |
|
||||
} |
|
||||
@ -1,16 +0,0 @@ |
|||||
package com.example.crawler.exception; |
|
||||
|
|
||||
/** |
|
||||
* 网络异常 |
|
||||
* 用于处理HTTP请求失败、连接超时等网络相关错误 |
|
||||
*/ |
|
||||
public class NetworkException extends CrawlException { |
|
||||
|
|
||||
public NetworkException(String message) { |
|
||||
super(message); |
|
||||
} |
|
||||
|
|
||||
public NetworkException(String message, Throwable cause) { |
|
||||
super(message, cause); |
|
||||
} |
|
||||
} |
|
||||
@ -1,16 +0,0 @@ |
|||||
package com.example.crawler.exception; |
|
||||
|
|
||||
/** |
|
||||
* 解析异常 |
|
||||
* 用于处理HTML解析失败、JSON解析失败等数据解析相关错误 |
|
||||
*/ |
|
||||
public class ParseException extends CrawlException { |
|
||||
|
|
||||
public ParseException(String message) { |
|
||||
super(message); |
|
||||
} |
|
||||
|
|
||||
public ParseException(String message, Throwable cause) { |
|
||||
super(message, cause); |
|
||||
} |
|
||||
} |
|
||||
@ -1,65 +0,0 @@ |
|||||
package com.example.crawler.model; |
|
||||
|
|
||||
/** |
|
||||
* 书籍数据模型 |
|
||||
* 存储toscrape.com网站的书籍信息 |
|
||||
*/ |
|
||||
public class Book { |
|
||||
|
|
||||
private String title; |
|
||||
private String price; |
|
||||
private String availability; |
|
||||
private String rating; |
|
||||
|
|
||||
public Book() { |
|
||||
} |
|
||||
|
|
||||
public Book(String title, String price, String availability, String rating) { |
|
||||
this.title = title; |
|
||||
this.price = price; |
|
||||
this.availability = availability; |
|
||||
this.rating = rating; |
|
||||
} |
|
||||
|
|
||||
public String getTitle() { |
|
||||
return title; |
|
||||
} |
|
||||
|
|
||||
public void setTitle(String title) { |
|
||||
this.title = title; |
|
||||
} |
|
||||
|
|
||||
public String getPrice() { |
|
||||
return price; |
|
||||
} |
|
||||
|
|
||||
public void setPrice(String price) { |
|
||||
this.price = price; |
|
||||
} |
|
||||
|
|
||||
public String getAvailability() { |
|
||||
return availability; |
|
||||
} |
|
||||
|
|
||||
public void setAvailability(String availability) { |
|
||||
this.availability = availability; |
|
||||
} |
|
||||
|
|
||||
public String getRating() { |
|
||||
return rating; |
|
||||
} |
|
||||
|
|
||||
public void setRating(String rating) { |
|
||||
this.rating = rating; |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String toString() { |
|
||||
return "Book{" + |
|
||||
"title='" + title + '\'' + |
|
||||
", price='" + price + '\'' + |
|
||||
", availability='" + availability + '\'' + |
|
||||
", rating='" + rating + '\'' + |
|
||||
'}'; |
|
||||
} |
|
||||
} |
|
||||
@ -1,54 +0,0 @@ |
|||||
package com.example.crawler.model; |
|
||||
|
|
||||
/** |
|
||||
* 新闻数据模型 |
|
||||
* 存储新浪新闻的国内新闻信息 |
|
||||
*/ |
|
||||
public class News { |
|
||||
|
|
||||
private String title; |
|
||||
private String publishTime; |
|
||||
private String url; |
|
||||
|
|
||||
public News() { |
|
||||
} |
|
||||
|
|
||||
public News(String title, String publishTime, String url) { |
|
||||
this.title = title; |
|
||||
this.publishTime = publishTime; |
|
||||
this.url = url; |
|
||||
} |
|
||||
|
|
||||
public String getTitle() { |
|
||||
return title; |
|
||||
} |
|
||||
|
|
||||
public void setTitle(String title) { |
|
||||
this.title = title; |
|
||||
} |
|
||||
|
|
||||
public String getPublishTime() { |
|
||||
return publishTime; |
|
||||
} |
|
||||
|
|
||||
public void setPublishTime(String publishTime) { |
|
||||
this.publishTime = publishTime; |
|
||||
} |
|
||||
|
|
||||
public String getUrl() { |
|
||||
return url; |
|
||||
} |
|
||||
|
|
||||
public void setUrl(String url) { |
|
||||
this.url = url; |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String toString() { |
|
||||
return "News{" + |
|
||||
"title='" + title + '\'' + |
|
||||
", publishTime='" + publishTime + '\'' + |
|
||||
", url='" + url + '\'' + |
|
||||
'}'; |
|
||||
} |
|
||||
} |
|
||||
@ -1,76 +0,0 @@ |
|||||
package com.example.crawler.model; |
|
||||
|
|
||||
/** |
|
||||
* 大学排名数据模型 |
|
||||
* 存储软科中国大学排名信息 |
|
||||
*/ |
|
||||
public class UniversityRank { |
|
||||
|
|
||||
private Integer rank; |
|
||||
private String universityName; |
|
||||
private String totalScore; |
|
||||
private String province; |
|
||||
private String category; |
|
||||
|
|
||||
public UniversityRank() { |
|
||||
} |
|
||||
|
|
||||
public UniversityRank(Integer rank, String universityName, String totalScore, String province, String category) { |
|
||||
this.rank = rank; |
|
||||
this.universityName = universityName; |
|
||||
this.totalScore = totalScore; |
|
||||
this.province = province; |
|
||||
this.category = category; |
|
||||
} |
|
||||
|
|
||||
public Integer getRank() { |
|
||||
return rank; |
|
||||
} |
|
||||
|
|
||||
public void setRank(Integer rank) { |
|
||||
this.rank = rank; |
|
||||
} |
|
||||
|
|
||||
public String getUniversityName() { |
|
||||
return universityName; |
|
||||
} |
|
||||
|
|
||||
public void setUniversityName(String universityName) { |
|
||||
this.universityName = universityName; |
|
||||
} |
|
||||
|
|
||||
public String getTotalScore() { |
|
||||
return totalScore; |
|
||||
} |
|
||||
|
|
||||
public void setTotalScore(String totalScore) { |
|
||||
this.totalScore = totalScore; |
|
||||
} |
|
||||
|
|
||||
public String getProvince() { |
|
||||
return province; |
|
||||
} |
|
||||
|
|
||||
public void setProvince(String province) { |
|
||||
this.province = province; |
|
||||
} |
|
||||
|
|
||||
public String getCategory() { |
|
||||
return category; |
|
||||
} |
|
||||
|
|
||||
public void setCategory(String category) { |
|
||||
this.category = category; |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String toString() { |
|
||||
return "UniversityRank{" + |
|
||||
"rank=" + rank + |
|
||||
", universityName='" + universityName + '\'' + |
|
||||
", totalScore='" + totalScore + '\'' + |
|
||||
", province='" + province + '\'' + |
|
||||
", category='" + category + '\'' + |
|
||||
'}'; |
|
||||
} |
|
||||
} |
|
||||
@ -1,140 +0,0 @@ |
|||||
package com.example.crawler.model; |
|
||||
|
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
|
|
||||
/** |
|
||||
* 天气数据模型 |
|
||||
* 存储 Open-Meteo API 的城市天气信息 |
|
||||
* 数据来源:Open-Meteo (CC BY 4.0) |
|
||||
*/ |
|
||||
public class Weather { |
|
||||
|
|
||||
private String cityName; |
|
||||
private double temperature; |
|
||||
private double humidity; |
|
||||
private double windSpeed; |
|
||||
private String weatherCode; |
|
||||
private List<String> hourlyTimes; |
|
||||
private List<Double> hourlyTemperatures; |
|
||||
private List<Integer> hourlyHumidities; |
|
||||
private List<Double> hourlyWindSpeeds; |
|
||||
|
|
||||
public Weather() { |
|
||||
this.hourlyTimes = new ArrayList<>(); |
|
||||
this.hourlyTemperatures = new ArrayList<>(); |
|
||||
this.hourlyHumidities = new ArrayList<>(); |
|
||||
this.hourlyWindSpeeds = new ArrayList<>(); |
|
||||
} |
|
||||
|
|
||||
public Weather(String cityName, double temperature, double humidity, double windSpeed, String weatherCode) { |
|
||||
this.cityName = cityName; |
|
||||
this.temperature = temperature; |
|
||||
this.humidity = humidity; |
|
||||
this.windSpeed = windSpeed; |
|
||||
this.weatherCode = weatherCode; |
|
||||
this.hourlyTimes = new ArrayList<>(); |
|
||||
this.hourlyTemperatures = new ArrayList<>(); |
|
||||
this.hourlyHumidities = new ArrayList<>(); |
|
||||
this.hourlyWindSpeeds = new ArrayList<>(); |
|
||||
} |
|
||||
|
|
||||
public String getCityName() { |
|
||||
return cityName; |
|
||||
} |
|
||||
|
|
||||
public void setCityName(String cityName) { |
|
||||
this.cityName = cityName; |
|
||||
} |
|
||||
|
|
||||
public double getTemperature() { |
|
||||
return temperature; |
|
||||
} |
|
||||
|
|
||||
public void setTemperature(double temperature) { |
|
||||
this.temperature = temperature; |
|
||||
} |
|
||||
|
|
||||
public double getHumidity() { |
|
||||
return humidity; |
|
||||
} |
|
||||
|
|
||||
public void setHumidity(double humidity) { |
|
||||
this.humidity = humidity; |
|
||||
} |
|
||||
|
|
||||
public double getWindSpeed() { |
|
||||
return windSpeed; |
|
||||
} |
|
||||
|
|
||||
public void setWindSpeed(double windSpeed) { |
|
||||
this.windSpeed = windSpeed; |
|
||||
} |
|
||||
|
|
||||
public String getWeatherCode() { |
|
||||
return weatherCode; |
|
||||
} |
|
||||
|
|
||||
public void setWeatherCode(String weatherCode) { |
|
||||
this.weatherCode = weatherCode; |
|
||||
} |
|
||||
|
|
||||
public List<String> getHourlyTimes() { |
|
||||
return hourlyTimes; |
|
||||
} |
|
||||
|
|
||||
public void setHourlyTimes(List<String> hourlyTimes) { |
|
||||
this.hourlyTimes = hourlyTimes; |
|
||||
} |
|
||||
|
|
||||
public List<Double> getHourlyTemperatures() { |
|
||||
return hourlyTemperatures; |
|
||||
} |
|
||||
|
|
||||
public void setHourlyTemperatures(List<Double> hourlyTemperatures) { |
|
||||
this.hourlyTemperatures = hourlyTemperatures; |
|
||||
} |
|
||||
|
|
||||
public List<Integer> getHourlyHumidities() { |
|
||||
return hourlyHumidities; |
|
||||
} |
|
||||
|
|
||||
public void setHourlyHumidities(List<Integer> hourlyHumidities) { |
|
||||
this.hourlyHumidities = hourlyHumidities; |
|
||||
} |
|
||||
|
|
||||
public List<Double> getHourlyWindSpeeds() { |
|
||||
return hourlyWindSpeeds; |
|
||||
} |
|
||||
|
|
||||
public void setHourlyWindSpeeds(List<Double> hourlyWindSpeeds) { |
|
||||
this.hourlyWindSpeeds = hourlyWindSpeeds; |
|
||||
} |
|
||||
|
|
||||
public String getWeatherDescription() { |
|
||||
if (weatherCode == null) return "未知"; |
|
||||
switch (weatherCode) { |
|
||||
case "0": return "晴"; |
|
||||
case "1": case "2": case "3": return "多云"; |
|
||||
case "45": case "48": return "雾"; |
|
||||
case "51": case "53": case "55": return "小毛毛雨"; |
|
||||
case "61": case "63": case "65": return "小雨"; |
|
||||
case "80": case "81": case "82": return "阵雨"; |
|
||||
case "95": return "雷暴"; |
|
||||
case "96": case "99": return "雷暴加冰雹"; |
|
||||
default: return "未知"; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String toString() { |
|
||||
return "Weather{" + |
|
||||
"cityName='" + cityName + '\'' + |
|
||||
", temperature=" + temperature + |
|
||||
", humidity=" + humidity + |
|
||||
", windSpeed=" + windSpeed + |
|
||||
", weatherCode='" + weatherCode + '\'' + |
|
||||
", weather='" + getWeatherDescription() + '\'' + |
|
||||
'}'; |
|
||||
} |
|
||||
} |
|
||||
@ -1,75 +0,0 @@ |
|||||
package com.example.crawler.repository; |
|
||||
|
|
||||
import com.example.crawler.model.Book; |
|
||||
import com.example.crawler.model.News; |
|
||||
import com.example.crawler.model.UniversityRank; |
|
||||
import com.example.crawler.model.Weather; |
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
|
|
||||
public class DataRepository { |
|
||||
|
|
||||
private static DataRepository instance; |
|
||||
|
|
||||
private List<Book> books; |
|
||||
private List<News> newsList; |
|
||||
private List<UniversityRank> rankings; |
|
||||
private List<Weather> weatherList; |
|
||||
|
|
||||
private DataRepository() { |
|
||||
this.books = new ArrayList<>(); |
|
||||
this.newsList = new ArrayList<>(); |
|
||||
this.rankings = new ArrayList<>(); |
|
||||
this.weatherList = new ArrayList<>(); |
|
||||
} |
|
||||
|
|
||||
public static synchronized DataRepository getInstance() { |
|
||||
if (instance == null) { |
|
||||
instance = new DataRepository(); |
|
||||
} |
|
||||
return instance; |
|
||||
} |
|
||||
|
|
||||
public List<Book> getBooks() { |
|
||||
return new ArrayList<>(books); |
|
||||
} |
|
||||
|
|
||||
public void saveBooks(List<Book> books) { |
|
||||
this.books.clear(); |
|
||||
this.books.addAll(books); |
|
||||
} |
|
||||
|
|
||||
public List<News> getNewsList() { |
|
||||
return new ArrayList<>(newsList); |
|
||||
} |
|
||||
|
|
||||
public void saveNewsList(List<News> newsList) { |
|
||||
this.newsList.clear(); |
|
||||
this.newsList.addAll(newsList); |
|
||||
} |
|
||||
|
|
||||
public List<UniversityRank> getRankings() { |
|
||||
return new ArrayList<>(rankings); |
|
||||
} |
|
||||
|
|
||||
public void saveRankings(List<UniversityRank> rankings) { |
|
||||
this.rankings.clear(); |
|
||||
this.rankings.addAll(rankings); |
|
||||
} |
|
||||
|
|
||||
public List<Weather> getWeatherList() { |
|
||||
return new ArrayList<>(weatherList); |
|
||||
} |
|
||||
|
|
||||
public void saveWeatherList(List<Weather> weatherList) { |
|
||||
this.weatherList.clear(); |
|
||||
this.weatherList.addAll(weatherList); |
|
||||
} |
|
||||
|
|
||||
public void clearAll() { |
|
||||
books.clear(); |
|
||||
newsList.clear(); |
|
||||
rankings.clear(); |
|
||||
weatherList.clear(); |
|
||||
} |
|
||||
} |
|
||||
@ -1,171 +0,0 @@ |
|||||
package com.example.crawler.service; |
|
||||
|
|
||||
import java.io.File; |
|
||||
import java.io.FileWriter; |
|
||||
import java.io.IOException; |
|
||||
import java.io.PrintWriter; |
|
||||
import java.util.ArrayList; |
|
||||
import java.util.HashMap; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
import java.util.stream.Collectors; |
|
||||
|
|
||||
import com.example.crawler.chart.ChartGenerator; |
|
||||
import com.example.crawler.constant.CrawlerConstants; |
|
||||
import com.example.crawler.model.Book; |
|
||||
import com.example.crawler.util.DataCleaner; |
|
||||
|
|
||||
public class BookAnalysisService { |
|
||||
|
|
||||
static { |
|
||||
File dir = new File(CrawlerConstants.REPORTS_DIR); |
|
||||
if (!dir.exists()) { |
|
||||
dir.mkdirs(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public void analyze(List<Book> books) { |
|
||||
if (books == null || books.isEmpty()) { |
|
||||
System.out.println("没有书籍数据可分析"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n========== 书籍数据分析 =========="); |
|
||||
System.out.println("共分析 " + books.size() + " 本书\n"); |
|
||||
|
|
||||
analyzePriceDistribution(books); |
|
||||
analyzeRatingDistribution(books); |
|
||||
analyzeStockStatus(books); |
|
||||
|
|
||||
generateReport(books); |
|
||||
} |
|
||||
|
|
||||
private void analyzePriceDistribution(List<Book> books) { |
|
||||
System.out.println("【价格分析】"); |
|
||||
List<Double> prices = new ArrayList<>(); |
|
||||
for (Book book : books) { |
|
||||
double price = DataCleaner.cleanPrice(book.getPrice()); |
|
||||
if (price > 0) { |
|
||||
prices.add(price); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (prices.isEmpty()) { |
|
||||
System.out.println("无法获取有效价格数据"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
double maxPrice = prices.stream().mapToDouble(Double::doubleValue).max().orElse(0); |
|
||||
double minPrice = prices.stream().mapToDouble(Double::doubleValue).min().orElse(0); |
|
||||
double avgPrice = prices.stream().mapToDouble(Double::doubleValue).average().orElse(0); |
|
||||
|
|
||||
System.out.println("最高价: £" + String.format("%.2f", maxPrice)); |
|
||||
System.out.println("最低价: £" + String.format("%.2f", minPrice)); |
|
||||
System.out.println("平均价: £" + String.format("%.2f", avgPrice)); |
|
||||
|
|
||||
Map<String, Integer> priceRanges = new HashMap<>(); |
|
||||
String[] ranges = {"0-10", "10-20", "20-30", "30-40", "40-50", "50+"}; |
|
||||
for (String range : ranges) { |
|
||||
priceRanges.put(range, 0); |
|
||||
} |
|
||||
|
|
||||
for (Double price : prices) { |
|
||||
if (price < 10) priceRanges.put("0-10", priceRanges.get("0-10") + 1); |
|
||||
else if (price < 20) priceRanges.put("10-20", priceRanges.get("10-20") + 1); |
|
||||
else if (price < 30) priceRanges.put("20-30", priceRanges.get("20-30") + 1); |
|
||||
else if (price < 40) priceRanges.put("30-40", priceRanges.get("30-40") + 1); |
|
||||
else if (price < 50) priceRanges.put("40-50", priceRanges.get("40-50") + 1); |
|
||||
else priceRanges.put("50+", priceRanges.get("50+") + 1); |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n价格区间分布:"); |
|
||||
for (Map.Entry<String, Integer> entry : priceRanges.entrySet()) { |
|
||||
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本"); |
|
||||
} |
|
||||
|
|
||||
ChartGenerator.generatePriceHistogram(priceRanges, "price_histogram.png"); |
|
||||
} |
|
||||
|
|
||||
private void analyzeRatingDistribution(List<Book> books) { |
|
||||
System.out.println("\n【评分分析】"); |
|
||||
Map<String, Integer> ratingCounts = new HashMap<>(); |
|
||||
ratingCounts.put("5星", 0); |
|
||||
ratingCounts.put("4星", 0); |
|
||||
ratingCounts.put("3星", 0); |
|
||||
ratingCounts.put("2星", 0); |
|
||||
ratingCounts.put("1星", 0); |
|
||||
ratingCounts.put("未知", 0); |
|
||||
|
|
||||
for (Book book : books) { |
|
||||
int rating = DataCleaner.cleanRating(book.getRating()); |
|
||||
switch (rating) { |
|
||||
case 5: ratingCounts.put("5星", ratingCounts.get("5星") + 1); break; |
|
||||
case 4: ratingCounts.put("4星", ratingCounts.get("4星") + 1); break; |
|
||||
case 3: ratingCounts.put("3星", ratingCounts.get("3星") + 1); break; |
|
||||
case 2: ratingCounts.put("2星", ratingCounts.get("2星") + 1); break; |
|
||||
case 1: ratingCounts.put("1星", ratingCounts.get("1星") + 1); break; |
|
||||
default: ratingCounts.put("未知", ratingCounts.get("未知") + 1); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
int total = books.size(); |
|
||||
System.out.println("评分分布:"); |
|
||||
for (Map.Entry<String, Integer> entry : ratingCounts.entrySet()) { |
|
||||
double percentage = (entry.getValue() * 100.0) / total; |
|
||||
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本 (" + String.format("%.1f", percentage) + "%)"); |
|
||||
} |
|
||||
|
|
||||
ChartGenerator.generateRatingPieChart(ratingCounts, "rating_pie.png"); |
|
||||
} |
|
||||
|
|
||||
private void analyzeStockStatus(List<Book> books) { |
|
||||
System.out.println("\n【库存分析】"); |
|
||||
int inStock = 0; |
|
||||
int outOfStock = 0; |
|
||||
|
|
||||
for (Book book : books) { |
|
||||
String availability = book.getAvailability(); |
|
||||
if (availability != null && availability.toLowerCase().contains("in stock")) { |
|
||||
inStock++; |
|
||||
} else { |
|
||||
outOfStock++; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
System.out.println("有库存: " + inStock + " 本"); |
|
||||
System.out.println("缺货: " + outOfStock + " 本"); |
|
||||
} |
|
||||
|
|
||||
private void generateReport(List<Book> books) { |
|
||||
String fileName = CrawlerConstants.REPORTS_DIR + "/book_analysis_report.txt"; |
|
||||
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
|
||||
writer.println("========== 书籍数据分析报告 =========="); |
|
||||
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
|
||||
writer.println("分析书籍总数: " + books.size()); |
|
||||
writer.println(); |
|
||||
|
|
||||
List<Double> prices = books.stream() |
|
||||
.map(b -> DataCleaner.cleanPrice(b.getPrice())) |
|
||||
.filter(p -> p > 0) |
|
||||
.collect(Collectors.toList()); |
|
||||
|
|
||||
if (!prices.isEmpty()) { |
|
||||
writer.println("【价格统计】"); |
|
||||
writer.println("最高价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).max().orElse(0))); |
|
||||
writer.println("最低价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).min().orElse(0))); |
|
||||
writer.println("平均价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).average().orElse(0))); |
|
||||
writer.println(); |
|
||||
} |
|
||||
|
|
||||
writer.println("【库存统计】"); |
|
||||
long inStock = books.stream().filter(b -> b.getAvailability() != null && b.getAvailability().toLowerCase().contains("in stock")).count(); |
|
||||
writer.println("有库存: " + inStock + " 本"); |
|
||||
writer.println("缺货: " + (books.size() - inStock) + " 本"); |
|
||||
|
|
||||
writer.println("\n报告生成完成"); |
|
||||
System.out.println("\n报告已保存: " + fileName); |
|
||||
} catch (IOException e) { |
|
||||
System.err.println("生成报告失败: " + e.getMessage()); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,138 +0,0 @@ |
|||||
package com.example.crawler.service; |
|
||||
|
|
||||
import java.io.File; |
|
||||
import java.io.FileWriter; |
|
||||
import java.io.IOException; |
|
||||
import java.io.PrintWriter; |
|
||||
import java.util.ArrayList; |
|
||||
import java.util.HashMap; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
import java.util.stream.Collectors; |
|
||||
|
|
||||
import com.example.crawler.chart.ChartGenerator; |
|
||||
import com.example.crawler.constant.CrawlerConstants; |
|
||||
import com.example.crawler.model.News; |
|
||||
import com.example.crawler.util.DataCleaner; |
|
||||
|
|
||||
public class NewsAnalysisService { |
|
||||
|
|
||||
static { |
|
||||
File dir = new File(CrawlerConstants.REPORTS_DIR); |
|
||||
if (!dir.exists()) { |
|
||||
dir.mkdirs(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public void analyze(List<News> newsList) { |
|
||||
if (newsList == null || newsList.isEmpty()) { |
|
||||
System.out.println("没有新闻数据可分析"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n========== 新闻数据分析 =========="); |
|
||||
System.out.println("共分析 " + newsList.size() + " 条新闻\n"); |
|
||||
|
|
||||
analyzeTimeDistribution(newsList); |
|
||||
analyzeKeywords(newsList); |
|
||||
|
|
||||
generateReport(newsList); |
|
||||
} |
|
||||
|
|
||||
private void analyzeTimeDistribution(List<News> newsList) { |
|
||||
System.out.println("【发布时间分布】"); |
|
||||
Map<Integer, Integer> hourDistribution = new HashMap<>(); |
|
||||
for (int i = 0; i < 24; i++) { |
|
||||
hourDistribution.put(i, 0); |
|
||||
} |
|
||||
|
|
||||
for (News news : newsList) { |
|
||||
try { |
|
||||
java.time.LocalDateTime dateTime = DataCleaner.cleanNewsTime(news.getPublishTime()); |
|
||||
int hour = DataCleaner.extractHour(dateTime); |
|
||||
hourDistribution.put(hour, hourDistribution.get(hour) + 1); |
|
||||
} catch (Exception e) { |
|
||||
// 忽略解析失败的数据
|
|
||||
} |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n按小时统计:"); |
|
||||
for (int i = 0; i < 24; i++) { |
|
||||
int count = hourDistribution.get(i); |
|
||||
String bar = "*".repeat(Math.max(1, count)); |
|
||||
System.out.printf(" %02d:00 - %02d:00: %3d %s%n", i, (i + 1) % 24, count, bar); |
|
||||
} |
|
||||
|
|
||||
int peakHour = 0; |
|
||||
int peakCount = 0; |
|
||||
for (Map.Entry<Integer, Integer> entry : hourDistribution.entrySet()) { |
|
||||
if (entry.getValue() > peakCount) { |
|
||||
peakCount = entry.getValue(); |
|
||||
peakHour = entry.getKey(); |
|
||||
} |
|
||||
} |
|
||||
System.out.println("\n高峰时段: " + String.format("%02d:00", peakHour) + " (发布 " + peakCount + " 条新闻)"); |
|
||||
|
|
||||
ChartGenerator.generateNewsTimeTrend(hourDistribution, "news_time_trend.png"); |
|
||||
} |
|
||||
|
|
||||
private void analyzeKeywords(List<News> newsList) { |
|
||||
System.out.println("\n【关键词分析】"); |
|
||||
Map<String, Integer> allWords = new HashMap<>(); |
|
||||
|
|
||||
for (News news : newsList) { |
|
||||
String title = DataCleaner.cleanTitle(news.getTitle()); |
|
||||
String[] words = DataCleaner.extractWords(title); |
|
||||
Map<String, Integer> wordFreq = DataCleaner.countWordFrequency(words); |
|
||||
for (Map.Entry<String, Integer> entry : wordFreq.entrySet()) { |
|
||||
allWords.put(entry.getKey(), allWords.getOrDefault(entry.getKey(), 0) + entry.getValue()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
List<Map.Entry<String, Integer>> sortedWords = allWords.entrySet().stream() |
|
||||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|
||||
.limit(20) |
|
||||
.collect(Collectors.toList()); |
|
||||
|
|
||||
System.out.println("\n高频词 TOP 10:"); |
|
||||
for (int i = 0; i < Math.min(10, sortedWords.size()); i++) { |
|
||||
Map.Entry<String, Integer> entry = sortedWords.get(i); |
|
||||
System.out.printf(" %2d. %s: %d%n", i + 1, entry.getKey(), entry.getValue()); |
|
||||
} |
|
||||
|
|
||||
Map<String, Integer> top10 = sortedWords.stream() |
|
||||
.limit(10) |
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
|
||||
|
|
||||
ChartGenerator.generateWordFrequencyBarChart(top10, "news_top_words.png"); |
|
||||
} |
|
||||
|
|
||||
private void generateReport(List<News> newsList) { |
|
||||
String fileName = CrawlerConstants.REPORTS_DIR + "/news_analysis_report.txt"; |
|
||||
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
|
||||
writer.println("========== 新闻数据分析报告 =========="); |
|
||||
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
|
||||
writer.println("分析新闻总数: " + newsList.size()); |
|
||||
writer.println(); |
|
||||
|
|
||||
Map<Integer, Integer> hourDistribution = new HashMap<>(); |
|
||||
for (int i = 0; i < 24; i++) hourDistribution.put(i, 0); |
|
||||
for (News news : newsList) { |
|
||||
try { |
|
||||
int hour = DataCleaner.extractHour(DataCleaner.cleanNewsTime(news.getPublishTime())); |
|
||||
hourDistribution.put(hour, hourDistribution.get(hour) + 1); |
|
||||
} catch (Exception e) {} |
|
||||
} |
|
||||
|
|
||||
writer.println("【发布时间分布】"); |
|
||||
for (int i = 0; i < 24; i++) { |
|
||||
writer.println(String.format(" %02d:00 - %02d:00: %d 条", i, (i + 1) % 24, hourDistribution.get(i))); |
|
||||
} |
|
||||
|
|
||||
writer.println("\n报告生成完成"); |
|
||||
System.out.println("\n报告已保存: " + fileName); |
|
||||
} catch (IOException e) { |
|
||||
System.err.println("生成报告失败: " + e.getMessage()); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,189 +0,0 @@ |
|||||
package com.example.crawler.service; |
|
||||
|
|
||||
import java.io.File; |
|
||||
import java.io.FileWriter; |
|
||||
import java.io.IOException; |
|
||||
import java.io.PrintWriter; |
|
||||
import java.util.ArrayList; |
|
||||
import java.util.HashMap; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
import java.util.stream.Collectors; |
|
||||
|
|
||||
import com.example.crawler.chart.ChartGenerator; |
|
||||
import com.example.crawler.constant.CrawlerConstants; |
|
||||
import com.example.crawler.model.UniversityRank; |
|
||||
import com.example.crawler.util.DataCleaner; |
|
||||
|
|
||||
public class RankingAnalysisService { |
|
||||
|
|
||||
static { |
|
||||
File dir = new File(CrawlerConstants.REPORTS_DIR); |
|
||||
if (!dir.exists()) { |
|
||||
dir.mkdirs(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public void analyze(List<UniversityRank> ranks) { |
|
||||
if (ranks == null || ranks.isEmpty()) { |
|
||||
System.out.println("没有大学排名数据可分析"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n========== 大学排名数据分析 =========="); |
|
||||
System.out.println("共分析 " + ranks.size() + " 所大学\n"); |
|
||||
|
|
||||
analyzeProvinceDistribution(ranks); |
|
||||
analyzeScoreDistribution(ranks); |
|
||||
analyzeCategoryDistribution(ranks); |
|
||||
|
|
||||
generateReport(ranks); |
|
||||
} |
|
||||
|
|
||||
private void analyzeProvinceDistribution(List<UniversityRank> ranks) { |
|
||||
System.out.println("【各省份上榜大学数量】"); |
|
||||
Map<String, Integer> provinceCounts = new HashMap<>(); |
|
||||
|
|
||||
for (UniversityRank rank : ranks) { |
|
||||
String province = rank.getProvince(); |
|
||||
if (province != null && !province.isEmpty()) { |
|
||||
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
List<Map.Entry<String, Integer>> sorted = provinceCounts.entrySet().stream() |
|
||||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|
||||
.collect(Collectors.toList()); |
|
||||
|
|
||||
System.out.println("\n省份排行榜 TOP 10:"); |
|
||||
int rankNum = 1; |
|
||||
for (Map.Entry<String, Integer> entry : sorted) { |
|
||||
if (rankNum > 10) break; |
|
||||
System.out.printf(" %2d. %s: %d 所大学%n", rankNum++, entry.getKey(), entry.getValue()); |
|
||||
} |
|
||||
|
|
||||
Map<String, Integer> top10 = sorted.stream() |
|
||||
.limit(10) |
|
||||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
|
||||
|
|
||||
ChartGenerator.generateProvinceBarChart(top10, "province_bar.png"); |
|
||||
} |
|
||||
|
|
||||
private void analyzeScoreDistribution(List<UniversityRank> ranks) { |
|
||||
System.out.println("\n【总分分析】"); |
|
||||
List<Double> scores = new ArrayList<>(); |
|
||||
|
|
||||
for (UniversityRank rank : ranks) { |
|
||||
double score = DataCleaner.cleanScore(rank.getTotalScore()); |
|
||||
if (score > 0) { |
|
||||
scores.add(score); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (scores.isEmpty()) { |
|
||||
System.out.println("无法获取有效分数数据"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
double maxScore = scores.stream().mapToDouble(Double::doubleValue).max().orElse(0); |
|
||||
double minScore = scores.stream().mapToDouble(Double::doubleValue).min().orElse(0); |
|
||||
double avgScore = scores.stream().mapToDouble(Double::doubleValue).average().orElse(0); |
|
||||
|
|
||||
List<Double> sortedScores = scores.stream().sorted().collect(Collectors.toList()); |
|
||||
double median = sortedScores.get(sortedScores.size() / 2); |
|
||||
|
|
||||
System.out.println("最高分: " + String.format("%.2f", maxScore)); |
|
||||
System.out.println("最低分: " + String.format("%.2f", minScore)); |
|
||||
System.out.println("平均分: " + String.format("%.2f", avgScore)); |
|
||||
System.out.println("中位数: " + String.format("%.2f", median)); |
|
||||
|
|
||||
Map<String, Integer> scoreRanges = new HashMap<>(); |
|
||||
String[] ranges = {"0-20", "20-40", "40-60", "60-80", "80-100"}; |
|
||||
for (String range : ranges) { |
|
||||
scoreRanges.put(range, 0); |
|
||||
} |
|
||||
|
|
||||
for (Double score : scores) { |
|
||||
if (score < 20) scoreRanges.put("0-20", scoreRanges.get("0-20") + 1); |
|
||||
else if (score < 40) scoreRanges.put("20-40", scoreRanges.get("20-40") + 1); |
|
||||
else if (score < 60) scoreRanges.put("40-60", scoreRanges.get("40-60") + 1); |
|
||||
else if (score < 80) scoreRanges.put("60-80", scoreRanges.get("60-80") + 1); |
|
||||
else scoreRanges.put("80-100", scoreRanges.get("80-100") + 1); |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n分数区间分布:"); |
|
||||
for (Map.Entry<String, Integer> entry : scoreRanges.entrySet()) { |
|
||||
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 所"); |
|
||||
} |
|
||||
|
|
||||
ChartGenerator.generateScoreHistogram(scoreRanges, "score_boxplot.png"); |
|
||||
} |
|
||||
|
|
||||
private void analyzeCategoryDistribution(List<UniversityRank> ranks) { |
|
||||
System.out.println("\n【办学层次统计】"); |
|
||||
Map<String, Integer> categoryCounts = new HashMap<>(); |
|
||||
|
|
||||
for (UniversityRank rank : ranks) { |
|
||||
String category = rank.getCategory(); |
|
||||
if (category != null && !category.isEmpty()) { |
|
||||
categoryCounts.put(category, categoryCounts.getOrDefault(category, 0) + 1); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (categoryCounts.isEmpty()) { |
|
||||
System.out.println("没有办学层次数据"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
List<Map.Entry<String, Integer>> sorted = categoryCounts.entrySet().stream() |
|
||||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|
||||
.collect(Collectors.toList()); |
|
||||
|
|
||||
System.out.println("\n办学层次分布:"); |
|
||||
for (Map.Entry<String, Integer> entry : sorted) { |
|
||||
System.out.printf(" %s: %d 所%n", entry.getKey(), entry.getValue()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private void generateReport(List<UniversityRank> ranks) { |
|
||||
String fileName = CrawlerConstants.REPORTS_DIR + "/ranking_analysis_report.txt"; |
|
||||
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
|
||||
writer.println("========== 大学排名数据分析报告 =========="); |
|
||||
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
|
||||
writer.println("分析大学总数: " + ranks.size()); |
|
||||
writer.println(); |
|
||||
|
|
||||
Map<String, Integer> provinceCounts = new HashMap<>(); |
|
||||
for (UniversityRank rank : ranks) { |
|
||||
String province = rank.getProvince(); |
|
||||
if (province != null && !province.isEmpty()) { |
|
||||
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
writer.println("【省份排行榜 TOP 10】"); |
|
||||
provinceCounts.entrySet().stream() |
|
||||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|
||||
.limit(10) |
|
||||
.forEach(e -> writer.println(" " + e.getKey() + ": " + e.getValue() + " 所大学")); |
|
||||
|
|
||||
List<Double> scores = ranks.stream() |
|
||||
.map(r -> DataCleaner.cleanScore(r.getTotalScore())) |
|
||||
.filter(s -> s > 0) |
|
||||
.collect(Collectors.toList()); |
|
||||
|
|
||||
if (!scores.isEmpty()) { |
|
||||
writer.println(); |
|
||||
writer.println("【分数统计】"); |
|
||||
writer.println("最高分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).max().orElse(0))); |
|
||||
writer.println("最低分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).min().orElse(0))); |
|
||||
writer.println("平均分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).average().orElse(0))); |
|
||||
} |
|
||||
|
|
||||
writer.println("\n报告生成完成"); |
|
||||
System.out.println("\n报告已保存: " + fileName); |
|
||||
} catch (IOException e) { |
|
||||
System.err.println("生成报告失败: " + e.getMessage()); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,163 +0,0 @@ |
|||||
package com.example.crawler.service; |
|
||||
|
|
||||
import java.io.File; |
|
||||
import java.io.FileWriter; |
|
||||
import java.io.IOException; |
|
||||
import java.io.PrintWriter; |
|
||||
import java.util.ArrayList; |
|
||||
import java.util.HashMap; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
import com.example.crawler.chart.ChartGenerator; |
|
||||
import com.example.crawler.constant.CrawlerConstants; |
|
||||
import com.example.crawler.model.Weather; |
|
||||
|
|
||||
public class WeatherAnalysisService { |
|
||||
|
|
||||
static { |
|
||||
File dir = new File(CrawlerConstants.REPORTS_DIR); |
|
||||
if (!dir.exists()) { |
|
||||
dir.mkdirs(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public void analyze(List<Weather> weatherList) { |
|
||||
if (weatherList == null || weatherList.isEmpty()) { |
|
||||
System.out.println("没有天气数据可分析"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
System.out.println("\n========== 天气数据分析 =========="); |
|
||||
System.out.println("共分析 " + weatherList.size() + " 个城市\n"); |
|
||||
|
|
||||
analyzeCurrentWeather(weatherList); |
|
||||
analyzeTemperatureTrend(weatherList); |
|
||||
analyzeHumidityTrend(weatherList); |
|
||||
analyzeComfortIndex(weatherList); |
|
||||
|
|
||||
generateReport(weatherList); |
|
||||
} |
|
||||
|
|
||||
private void analyzeCurrentWeather(List<Weather> weatherList) { |
|
||||
System.out.println("【当前天气对比】"); |
|
||||
System.out.println("┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐"); |
|
||||
System.out.println("│ 城市名称 │ 温度(°C)│ 湿度(%) │ 风速(km/h)│ 天气状况 │ 舒适度 │"); |
|
||||
System.out.println("├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤"); |
|
||||
|
|
||||
for (Weather weather : weatherList) { |
|
||||
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity()); |
|
||||
String comfortDesc = getComfortDescription(comfort); |
|
||||
System.out.printf("│ %-8s │ %8.1f │ %8.0f │ %8.1f │ %-8s │ %-8s │%n", |
|
||||
weather.getCityName(), |
|
||||
weather.getTemperature(), |
|
||||
weather.getHumidity(), |
|
||||
weather.getWindSpeed(), |
|
||||
weather.getWeatherDescription(), |
|
||||
comfortDesc); |
|
||||
} |
|
||||
System.out.println("└──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘"); |
|
||||
} |
|
||||
|
|
||||
private void analyzeTemperatureTrend(List<Weather> weatherList) { |
|
||||
System.out.println("\n【未来24小时温度分析】"); |
|
||||
|
|
||||
Map<String, List<Double>> cityTemperatures = new HashMap<>(); |
|
||||
for (Weather weather : weatherList) { |
|
||||
cityTemperatures.put(weather.getCityName(), weather.getHourlyTemperatures()); |
|
||||
|
|
||||
List<Double> temps = weather.getHourlyTemperatures(); |
|
||||
if (!temps.isEmpty()) { |
|
||||
double maxTemp = temps.stream().mapToDouble(Double::doubleValue).max().orElse(0); |
|
||||
double minTemp = temps.stream().mapToDouble(Double::doubleValue).min().orElse(0); |
|
||||
double avgTemp = temps.stream().mapToDouble(Double::doubleValue).average().orElse(0); |
|
||||
|
|
||||
int maxIndex = temps.indexOf(maxTemp); |
|
||||
int minIndex = temps.indexOf(minTemp); |
|
||||
|
|
||||
String maxTime = maxIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(maxIndex) : ""; |
|
||||
String minTime = minIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(minIndex) : ""; |
|
||||
|
|
||||
System.out.printf(" %s: 最高 %.1f°C(%s) 最低 %.1f°C(%s) 平均 %.1f°C%n", |
|
||||
weather.getCityName(), maxTemp, maxTime, minTemp, minTime, avgTemp); |
|
||||
} |
|
||||
|
|
||||
ChartGenerator.generateTemperatureTrend( |
|
||||
weather.getHourlyTimes(), |
|
||||
weather.getHourlyTemperatures(), |
|
||||
weather.getCityName(), |
|
||||
"temperature_" + weather.getCityName() + ".png" |
|
||||
); |
|
||||
} |
|
||||
|
|
||||
ChartGenerator.generateMultiCityTemperatureComparison(cityTemperatures, "temperature_comparison.png"); |
|
||||
} |
|
||||
|
|
||||
private void analyzeHumidityTrend(List<Weather> weatherList) { |
|
||||
System.out.println("\n【未来24小时湿度分析】"); |
|
||||
for (Weather weather : weatherList) { |
|
||||
List<Integer> humidities = weather.getHourlyHumidities(); |
|
||||
if (!humidities.isEmpty()) { |
|
||||
double avgHumidity = humidities.stream().mapToInt(Integer::intValue).average().orElse(0); |
|
||||
System.out.printf(" %s: 平均湿度 %.0f%%%n", weather.getCityName(), avgHumidity); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private void analyzeComfortIndex(List<Weather> weatherList) { |
|
||||
System.out.println("\n【舒适度指数分析】"); |
|
||||
System.out.println("(基于温度和湿度的体感舒适度计算,0-100分制)"); |
|
||||
|
|
||||
for (Weather weather : weatherList) { |
|
||||
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity()); |
|
||||
String description = getComfortDescription(comfort); |
|
||||
System.out.printf(" %s: %.1f分 (%s)%n", weather.getCityName(), comfort, description); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private double calculateComfortIndex(double temperature, double humidity) { |
|
||||
double tempDiff = Math.abs(temperature - 22); |
|
||||
double humDiff = Math.abs(humidity - 50); |
|
||||
|
|
||||
double comfort = 100 - (tempDiff * 3 + humDiff * 0.5); |
|
||||
return Math.max(0, Math.min(100, comfort)); |
|
||||
} |
|
||||
|
|
||||
private String getComfortDescription(double comfort) { |
|
||||
if (comfort >= 80) return "非常舒适"; |
|
||||
if (comfort >= 60) return "舒适"; |
|
||||
if (comfort >= 40) return "一般"; |
|
||||
if (comfort >= 20) return "不舒适"; |
|
||||
return "极不舒适"; |
|
||||
} |
|
||||
|
|
||||
private void generateReport(List<Weather> weatherList) { |
|
||||
String fileName = CrawlerConstants.REPORTS_DIR + "/weather_analysis_report.txt"; |
|
||||
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
|
||||
writer.println("========== 天气数据分析报告 =========="); |
|
||||
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
|
||||
writer.println("分析城市数量: " + weatherList.size()); |
|
||||
writer.println("数据来源: Open-Meteo API (CC BY 4.0)"); |
|
||||
writer.println(); |
|
||||
|
|
||||
writer.println("【多城市天气对比】"); |
|
||||
for (Weather weather : weatherList) { |
|
||||
writer.println("\n城市: " + weather.getCityName()); |
|
||||
writer.println(" 当前温度: " + String.format("%.1f°C", weather.getTemperature())); |
|
||||
writer.println(" 当前湿度: " + String.format("%.0f%%", weather.getHumidity())); |
|
||||
writer.println(" 风速: " + String.format("%.1f km/h", weather.getWindSpeed())); |
|
||||
writer.println(" 天气: " + weather.getWeatherDescription()); |
|
||||
|
|
||||
List<Double> temps = weather.getHourlyTemperatures(); |
|
||||
if (!temps.isEmpty()) { |
|
||||
writer.println(" 24小时平均温度: " + String.format("%.1f°C", temps.stream().mapToDouble(Double::doubleValue).average().orElse(0))); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
writer.println("\n报告生成完成"); |
|
||||
System.out.println("\n报告已保存: " + fileName); |
|
||||
} catch (IOException e) { |
|
||||
System.err.println("生成报告失败: " + e.getMessage()); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,127 +0,0 @@ |
|||||
package com.example.crawler.strategy; |
|
||||
|
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
import org.jsoup.Jsoup; |
|
||||
import org.jsoup.nodes.Document; |
|
||||
import org.jsoup.nodes.Element; |
|
||||
import org.jsoup.select.Elements; |
|
||||
|
|
||||
import com.example.crawler.exception.CrawlException; |
|
||||
import com.example.crawler.exception.NetworkException; |
|
||||
import com.example.crawler.exception.ParseException; |
|
||||
import com.example.crawler.model.Book; |
|
||||
import com.example.crawler.util.HttpUtil; |
|
||||
|
|
||||
/** |
|
||||
* 书籍爬取策略 |
|
||||
* // 策略模式:书籍信息爬取策略
|
|
||||
*/ |
|
||||
public class BookCrawlStrategy implements CrawlStrategy<Book> { |
|
||||
|
|
||||
private static final String BASE_URL = "https://books.toscrape.com/"; |
|
||||
private static final String PAGE_URL_FORMAT = "https://books.toscrape.com/catalogue/page-%d.html"; |
|
||||
private static final int MAX_PAGES = 30; // 最大爬取页数
|
|
||||
|
|
||||
@Override |
|
||||
public List<Book> crawl() throws CrawlException { |
|
||||
List<Book> books = new ArrayList<>(); |
|
||||
int pageNum = 1; |
|
||||
|
|
||||
try { |
|
||||
while (true) { |
|
||||
// 达到最大页数限制时停止
|
|
||||
if (pageNum > MAX_PAGES) { |
|
||||
System.out.println("已达到最大爬取页数限制(" + MAX_PAGES + "页),停止爬取"); |
|
||||
break; |
|
||||
} |
|
||||
|
|
||||
String url = pageNum == 1 ? BASE_URL : String.format(PAGE_URL_FORMAT, pageNum); |
|
||||
|
|
||||
// 设置请求头
|
|
||||
Map<String, String> headers = Map.of( |
|
||||
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
|
||||
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" |
|
||||
); |
|
||||
|
|
||||
String html = HttpUtil.get(url, headers); |
|
||||
Document doc = Jsoup.parse(html); |
|
||||
|
|
||||
Elements bookElements = doc.select(".product_pod"); |
|
||||
|
|
||||
// 如果没有书籍元素,说明已到达最后一页
|
|
||||
if (bookElements.isEmpty()) { |
|
||||
System.out.println("第 " + pageNum + " 页没有书籍数据,停止爬取"); |
|
||||
break; |
|
||||
} |
|
||||
|
|
||||
for (Element bookElement : bookElements) { |
|
||||
Book book = parseBook(bookElement); |
|
||||
books.add(book); |
|
||||
} |
|
||||
|
|
||||
System.out.println("已爬取第 " + pageNum + " 页,共 " + books.size() + " 本书"); |
|
||||
|
|
||||
// 设置请求间隔
|
|
||||
HttpUtil.sleep(1); |
|
||||
|
|
||||
pageNum++; |
|
||||
} |
|
||||
|
|
||||
return books; |
|
||||
} catch (NetworkException e) { |
|
||||
// 如果是404错误且已经爬取了一些数据,返回已获取的数据
|
|
||||
if (e.getMessage().contains("404") && !books.isEmpty()) { |
|
||||
System.out.println("第 " + pageNum + " 页不存在(404),返回已爬取的 " + books.size() + " 本书"); |
|
||||
return books; |
|
||||
} |
|
||||
throw new NetworkException("爬取书籍信息时网络异常: " + e.getMessage(), e); |
|
||||
} catch (ParseException e) { |
|
||||
throw new ParseException("解析书籍信息时异常: " + e.getMessage(), e); |
|
||||
} catch (Exception e) { |
|
||||
throw new CrawlException("爬取书籍信息时发生未知异常: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 解析书籍元素 |
|
||||
*/ |
|
||||
private Book parseBook(Element bookElement) throws ParseException { |
|
||||
try { |
|
||||
// 获取书名
|
|
||||
Element titleElement = bookElement.selectFirst("h3 a"); |
|
||||
String title = titleElement != null ? titleElement.attr("title") : "未知书名"; |
|
||||
|
|
||||
// 获取价格
|
|
||||
Element priceElement = bookElement.selectFirst(".price_color"); |
|
||||
String price = priceElement != null ? priceElement.text() : "未知价格"; |
|
||||
|
|
||||
// 获取库存状态
|
|
||||
Element availabilityElement = bookElement.selectFirst(".instock.availability"); |
|
||||
String availability = availabilityElement != null ? availabilityElement.text().trim() : "未知库存"; |
|
||||
|
|
||||
// 获取星级评分
|
|
||||
Element ratingElement = bookElement.selectFirst(".star-rating"); |
|
||||
String rating = "未知"; |
|
||||
if (ratingElement != null) { |
|
||||
String classAttr = ratingElement.attr("class"); |
|
||||
if (classAttr.contains("One")) rating = "1星"; |
|
||||
else if (classAttr.contains("Two")) rating = "2星"; |
|
||||
else if (classAttr.contains("Three")) rating = "3星"; |
|
||||
else if (classAttr.contains("Four")) rating = "4星"; |
|
||||
else if (classAttr.contains("Five")) rating = "5星"; |
|
||||
} |
|
||||
|
|
||||
return new Book(title, price, availability, rating); |
|
||||
} catch (Exception e) { |
|
||||
throw new ParseException("解析书籍信息失败: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getDataSourceName() { |
|
||||
return "toscrape.com书籍信息"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,27 +0,0 @@ |
|||||
package com.example.crawler.strategy; |
|
||||
|
|
||||
import com.example.crawler.exception.CrawlException; |
|
||||
|
|
||||
import java.util.List; |
|
||||
|
|
||||
/** |
|
||||
* 爬取策略接口 |
|
||||
* 定义爬取操作的标准方法,实现策略模式 |
|
||||
*/ |
|
||||
public interface CrawlStrategy<T> { |
|
||||
|
|
||||
/** |
|
||||
* 执行爬取操作 |
|
||||
* |
|
||||
* @return 爬取到的数据列表 |
|
||||
* @throws CrawlException 爬虫异常 |
|
||||
*/ |
|
||||
List<T> crawl() throws CrawlException; |
|
||||
|
|
||||
/** |
|
||||
* 获取数据源名称 |
|
||||
* |
|
||||
* @return 数据源名称 |
|
||||
*/ |
|
||||
String getDataSourceName(); |
|
||||
} |
|
||||
@ -1,151 +0,0 @@ |
|||||
package com.example.crawler.strategy; |
|
||||
|
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
import org.jsoup.Jsoup; |
|
||||
import org.jsoup.nodes.Document; |
|
||||
import org.jsoup.nodes.Element; |
|
||||
import org.jsoup.select.Elements; |
|
||||
|
|
||||
import com.example.crawler.exception.CrawlException; |
|
||||
import com.example.crawler.exception.NetworkException; |
|
||||
import com.example.crawler.exception.ParseException; |
|
||||
import com.example.crawler.model.News; |
|
||||
import com.example.crawler.util.HttpUtil; |
|
||||
|
|
||||
/** |
|
||||
* 新浪新闻爬取策略 |
|
||||
* // 策略模式:新浪新闻爬取策略
|
|
||||
*/ |
|
||||
public class NewsCrawlStrategy implements CrawlStrategy<News> { |
|
||||
|
|
||||
private static final String NEWS_URL = "https://news.sina.com.cn/china/"; |
|
||||
private static final int MAX_NEWS_COUNT = 20; |
|
||||
|
|
||||
@Override |
|
||||
public List<News> crawl() throws CrawlException { |
|
||||
List<News> newsList = new ArrayList<>(); |
|
||||
|
|
||||
try { |
|
||||
// 设置请求头
|
|
||||
Map<String, String> headers = Map.of( |
|
||||
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
|
||||
"Referer", "https://news.sina.com.cn/", |
|
||||
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" |
|
||||
); |
|
||||
|
|
||||
String html = HttpUtil.get(NEWS_URL, headers); |
|
||||
Document doc = Jsoup.parse(html); |
|
||||
|
|
||||
// 新浪新闻页面结构可能变化,使用多种选择器尝试
|
|
||||
Elements newsElements = doc.select(".news-item, .news-list li, .list-item, .feed-card-item"); |
|
||||
|
|
||||
// 如果上述选择器都没找到,尝试更通用的选择器
|
|
||||
if (newsElements.isEmpty()) { |
|
||||
newsElements = doc.select("a[href*=sina.com.cn]"); |
|
||||
} |
|
||||
|
|
||||
int count = 0; |
|
||||
for (Element element : newsElements) { |
|
||||
if (count >= MAX_NEWS_COUNT) { |
|
||||
break; |
|
||||
} |
|
||||
|
|
||||
try { |
|
||||
News news = parseNews(element); |
|
||||
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) { |
|
||||
newsList.add(news); |
|
||||
count++; |
|
||||
} |
|
||||
} catch (ParseException e) { |
|
||||
// 跳过解析失败的新闻,继续处理下一个
|
|
||||
continue; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// 如果使用通用选择器获取的结果不够,尝试另一种方式
|
|
||||
if (newsList.size() < MAX_NEWS_COUNT) { |
|
||||
Elements titleElements = doc.select("h2 a, h3 a, .title a, .news-title a"); |
|
||||
for (Element element : titleElements) { |
|
||||
if (count >= MAX_NEWS_COUNT) { |
|
||||
break; |
|
||||
} |
|
||||
try { |
|
||||
News news = parseNewsFromTitleElement(element); |
|
||||
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) { |
|
||||
newsList.add(news); |
|
||||
count++; |
|
||||
} |
|
||||
} catch (ParseException e) { |
|
||||
continue; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
System.out.println("已爬取 " + newsList.size() + " 条新浪新闻"); |
|
||||
return newsList; |
|
||||
|
|
||||
} catch (NetworkException e) { |
|
||||
throw new NetworkException("爬取新浪新闻时网络异常: " + e.getMessage(), e); |
|
||||
} catch (Exception e) { |
|
||||
throw new CrawlException("爬取新浪新闻时发生未知异常: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 解析新闻元素 |
|
||||
*/ |
|
||||
private News parseNews(Element element) throws ParseException { |
|
||||
try { |
|
||||
String title = ""; |
|
||||
String url = ""; |
|
||||
String publishTime = ""; |
|
||||
|
|
||||
// 尝试获取标题和链接
|
|
||||
Element linkElement = element.selectFirst("a"); |
|
||||
if (linkElement != null) { |
|
||||
title = linkElement.text().trim(); |
|
||||
url = linkElement.attr("abs:href"); |
|
||||
} |
|
||||
|
|
||||
// 尝试获取发布时间
|
|
||||
Element timeElement = element.selectFirst(".time, .pubtime, span[class*=time]"); |
|
||||
if (timeElement != null) { |
|
||||
publishTime = timeElement.text().trim(); |
|
||||
} |
|
||||
|
|
||||
if (title.isEmpty() || url.isEmpty()) { |
|
||||
return null; |
|
||||
} |
|
||||
|
|
||||
return new News(title, publishTime, url); |
|
||||
} catch (Exception e) { |
|
||||
throw new ParseException("解析新闻信息失败: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 从标题元素解析新闻 |
|
||||
*/ |
|
||||
private News parseNewsFromTitleElement(Element element) throws ParseException { |
|
||||
try { |
|
||||
String title = element.text().trim(); |
|
||||
String url = element.attr("abs:href"); |
|
||||
|
|
||||
if (title.isEmpty() || url.isEmpty()) { |
|
||||
return null; |
|
||||
} |
|
||||
|
|
||||
return new News(title, "", url); |
|
||||
} catch (Exception e) { |
|
||||
throw new ParseException("解析新闻标题失败: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getDataSourceName() { |
|
||||
return "新浪国内新闻"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,24 +0,0 @@ |
|||||
package com.example.crawler.strategy; |
|
||||
|
|
||||
import com.example.crawler.strategy.BookCrawlStrategy; |
|
||||
import com.example.crawler.strategy.NewsCrawlStrategy; |
|
||||
import com.example.crawler.strategy.UniversityRankCrawlStrategy; |
|
||||
import com.example.crawler.strategy.WeatherCrawlStrategy; |
|
||||
|
|
||||
public class StrategyFactory { |
|
||||
|
|
||||
public static CrawlStrategy<?> getStrategy(int choice) { |
|
||||
switch (choice) { |
|
||||
case 1: |
|
||||
return new BookCrawlStrategy(); |
|
||||
case 2: |
|
||||
return new NewsCrawlStrategy(); |
|
||||
case 3: |
|
||||
return new UniversityRankCrawlStrategy(); |
|
||||
case 4: |
|
||||
return new WeatherCrawlStrategy(); |
|
||||
default: |
|
||||
throw new IllegalArgumentException("Invalid choice: " + choice); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,148 +0,0 @@ |
|||||
package com.example.crawler.strategy; |
|
||||
|
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
import org.jsoup.Jsoup; |
|
||||
import org.jsoup.nodes.Document; |
|
||||
import org.jsoup.nodes.Element; |
|
||||
import org.jsoup.select.Elements; |
|
||||
|
|
||||
import com.example.crawler.exception.CrawlException; |
|
||||
import com.example.crawler.exception.NetworkException; |
|
||||
import com.example.crawler.exception.ParseException; |
|
||||
import com.example.crawler.model.UniversityRank; |
|
||||
import com.example.crawler.util.HttpUtil; |
|
||||
|
|
||||
/** |
|
||||
* 软科中国大学排名爬取策略 |
|
||||
* // 策略模式:软科中国大学排名爬取策略
|
|
||||
*/ |
|
||||
public class UniversityRankCrawlStrategy implements CrawlStrategy<UniversityRank> { |
|
||||
|
|
||||
private static final String RANKING_URL = "https://www.shanghairanking.cn/rankings/bcur/2025"; |
|
||||
|
|
||||
@Override |
|
||||
public List<UniversityRank> crawl() throws CrawlException { |
|
||||
List<UniversityRank> rankings = new ArrayList<>(); |
|
||||
|
|
||||
try { |
|
||||
// 设置请求头
|
|
||||
Map<String, String> headers = Map.of( |
|
||||
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
|
||||
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", |
|
||||
"Referer", "https://www.shanghairanking.cn/" |
|
||||
); |
|
||||
|
|
||||
// 设置请求延迟
|
|
||||
HttpUtil.sleep(3); |
|
||||
|
|
||||
String html = HttpUtil.get(RANKING_URL, headers); |
|
||||
Document doc = Jsoup.parse(html); |
|
||||
|
|
||||
// 提取表格数据
|
|
||||
Elements rows = doc.select("table tbody tr"); |
|
||||
|
|
||||
if (rows.isEmpty()) { |
|
||||
// 如果第一个选择器失败,尝试其他可能的选择器
|
|
||||
rows = doc.select(".rk-table tbody tr"); |
|
||||
} |
|
||||
|
|
||||
if (rows.isEmpty()) { |
|
||||
// 尝试更通用的选择器
|
|
||||
rows = doc.select("tr"); |
|
||||
} |
|
||||
|
|
||||
int count = 0; |
|
||||
for (Element row : rows) { |
|
||||
try { |
|
||||
UniversityRank ranking = parseRow(row); |
|
||||
if (ranking != null && ranking.getRank() != null) { |
|
||||
rankings.add(ranking); |
|
||||
count++; |
|
||||
|
|
||||
// 最多爬取200条数据
|
|
||||
if (count >= 200) { |
|
||||
break; |
|
||||
} |
|
||||
} |
|
||||
} catch (ParseException e) { |
|
||||
// 跳过解析失败的行
|
|
||||
continue; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
System.out.println("已爬取 " + rankings.size() + " 条大学排名数据"); |
|
||||
return rankings; |
|
||||
|
|
||||
} catch (NetworkException e) { |
|
||||
throw new NetworkException("爬取软科大学排名时网络异常: " + e.getMessage(), e); |
|
||||
} catch (Exception e) { |
|
||||
throw new CrawlException("爬取软科大学排名时发生未知异常: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 解析表格行数据 |
|
||||
*/ |
|
||||
private UniversityRank parseRow(Element row) throws ParseException { |
|
||||
try { |
|
||||
Elements cells = row.select("td"); |
|
||||
|
|
||||
if (cells.size() < 4) { |
|
||||
return null; |
|
||||
} |
|
||||
|
|
||||
// 第1列:排名
|
|
||||
String rankStr = cells.get(0).text().trim(); |
|
||||
Integer rank = null; |
|
||||
try { |
|
||||
rank = Integer.parseInt(rankStr); |
|
||||
} catch (NumberFormatException e) { |
|
||||
// 如果排名不是数字(如"1-3"这样的范围),尝试提取第一个数字
|
|
||||
String numPart = rankStr.replaceAll("[^0-9]", ""); |
|
||||
if (!numPart.isEmpty()) { |
|
||||
rank = Integer.parseInt(numPart); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (rank == null) { |
|
||||
return null; |
|
||||
} |
|
||||
|
|
||||
// 第2列:学校名称
|
|
||||
String universityName = cells.get(1).text().trim(); |
|
||||
|
|
||||
// 第4列:总分
|
|
||||
String totalScore = ""; |
|
||||
if (cells.size() > 3) { |
|
||||
totalScore = cells.get(3).text().trim(); |
|
||||
} |
|
||||
|
|
||||
// 尝试提取省份和办学层次(第3列可能包含这些信息)
|
|
||||
String province = ""; |
|
||||
String category = ""; |
|
||||
if (cells.size() > 2) { |
|
||||
String thirdColumn = cells.get(2).text().trim(); |
|
||||
// 尝试解析省份和办学层次
|
|
||||
String[] parts = thirdColumn.split("\\s+"); |
|
||||
if (parts.length >= 1) { |
|
||||
province = parts[0]; |
|
||||
} |
|
||||
if (parts.length >= 2) { |
|
||||
category = parts[1]; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
return new UniversityRank(rank, universityName, totalScore, province, category); |
|
||||
} catch (Exception e) { |
|
||||
throw new ParseException("解析大学排名行数据失败: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getDataSourceName() { |
|
||||
return "软科中国大学排名"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,177 +0,0 @@ |
|||||
package com.example.crawler.strategy; |
|
||||
|
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
import com.example.crawler.constant.CrawlerConstants; |
|
||||
import com.example.crawler.exception.CrawlException; |
|
||||
import com.example.crawler.exception.NetworkException; |
|
||||
import com.example.crawler.exception.ParseException; |
|
||||
import com.example.crawler.model.Weather; |
|
||||
import com.example.crawler.util.HttpUtil; |
|
||||
import com.google.gson.JsonArray; |
|
||||
import com.google.gson.JsonElement; |
|
||||
import com.google.gson.JsonObject; |
|
||||
import com.google.gson.JsonParser; |
|
||||
|
|
||||
public class WeatherCrawlStrategy implements CrawlStrategy<Weather> { |
|
||||
|
|
||||
@Override |
|
||||
public List<Weather> crawl() throws CrawlException { |
|
||||
List<Weather> weatherList = new ArrayList<>(); |
|
||||
|
|
||||
try { |
|
||||
for (Map.Entry<String, double[]> entry : CrawlerConstants.CITY_COORDINATES.entrySet()) { |
|
||||
String cityName = entry.getKey(); |
|
||||
double[] coords = entry.getValue(); |
|
||||
double latitude = coords[0]; |
|
||||
double longitude = coords[1]; |
|
||||
|
|
||||
String weatherUrl = buildApiUrl(latitude, longitude); |
|
||||
Map<String, String> headers = Map.of( |
|
||||
"User-Agent", CrawlerConstants.USER_AGENT |
|
||||
); |
|
||||
|
|
||||
String response = HttpUtil.get(weatherUrl, headers); |
|
||||
Weather weather = parseWeatherData(cityName, response); |
|
||||
weatherList.add(weather); |
|
||||
|
|
||||
System.out.println("已获取 " + cityName + " 的天气信息"); |
|
||||
|
|
||||
HttpUtil.sleep(2); |
|
||||
} |
|
||||
|
|
||||
return weatherList; |
|
||||
|
|
||||
} catch (NetworkException e) { |
|
||||
throw new NetworkException("爬取天气数据时网络异常: " + e.getMessage(), e); |
|
||||
} catch (ParseException e) { |
|
||||
throw new ParseException("解析天气数据时异常: " + e.getMessage(), e); |
|
||||
} catch (Exception e) { |
|
||||
throw new CrawlException("爬取天气数据时发生未知异常: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private String buildApiUrl(double latitude, double longitude) { |
|
||||
return CrawlerConstants.URL_WEATHER_API + "?latitude=" + latitude + |
|
||||
"&longitude=" + longitude + |
|
||||
"¤t_weather=true" + |
|
||||
"&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m" + |
|
||||
"&forecast_days=1" + |
|
||||
"&timezone=Asia/Shanghai"; |
|
||||
} |
|
||||
|
|
||||
private Weather parseWeatherData(String cityName, String jsonData) throws ParseException { |
|
||||
try { |
|
||||
JsonObject obj = JsonParser.parseString(jsonData).getAsJsonObject(); |
|
||||
|
|
||||
Weather weather = new Weather(); |
|
||||
weather.setCityName(cityName); |
|
||||
|
|
||||
JsonObject currentWeather = obj.getAsJsonObject("current_weather"); |
|
||||
if (currentWeather != null) { |
|
||||
weather.setTemperature(cleanTemperature(getJsonDouble(currentWeather, "temperature", 0))); |
|
||||
weather.setWindSpeed(cleanWindSpeed(getJsonDouble(currentWeather, "windspeed", 0))); |
|
||||
weather.setWeatherCode(String.valueOf(getJsonInt(currentWeather, "weathercode", -1))); |
|
||||
} |
|
||||
|
|
||||
JsonObject hourly = obj.getAsJsonObject("hourly"); |
|
||||
if (hourly != null) { |
|
||||
JsonArray times = hourly.getAsJsonArray("time"); |
|
||||
JsonArray temps = hourly.getAsJsonArray("temperature_2m"); |
|
||||
JsonArray humidities = hourly.getAsJsonArray("relative_humidity_2m"); |
|
||||
JsonArray windSpeeds = hourly.getAsJsonArray("wind_speed_10m"); |
|
||||
|
|
||||
if (times != null && temps != null) { |
|
||||
int count = Math.min(times.size(), 24); |
|
||||
for (int i = 0; i < count; i++) { |
|
||||
weather.getHourlyTimes().add(cleanTimeString(getJsonString(times, i, ""))); |
|
||||
weather.getHourlyTemperatures().add(cleanTemperature(getJsonDouble(temps, i, 0))); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (humidities != null) { |
|
||||
int count = Math.min(humidities.size(), 24); |
|
||||
for (int i = 0; i < count; i++) { |
|
||||
weather.getHourlyHumidities().add(cleanHumidity(getJsonInt(humidities, i, 50))); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (windSpeeds != null) { |
|
||||
int count = Math.min(windSpeeds.size(), 24); |
|
||||
for (int i = 0; i < count; i++) { |
|
||||
weather.getHourlyWindSpeeds().add(cleanWindSpeed(getJsonDouble(windSpeeds, i, 0))); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
if (!weather.getHourlyHumidities().isEmpty()) { |
|
||||
weather.setHumidity(weather.getHourlyHumidities().get(0)); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
return weather; |
|
||||
} catch (Exception e) { |
|
||||
throw new ParseException("解析天气JSON数据失败: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private String getJsonString(JsonArray arr, int index, String defaultValue) { |
|
||||
if (arr == null || index >= arr.size()) return defaultValue; |
|
||||
JsonElement element = arr.get(index); |
|
||||
return element.isJsonNull() ? defaultValue : element.getAsString(); |
|
||||
} |
|
||||
|
|
||||
private double getJsonDouble(JsonObject obj, String key, double defaultValue) { |
|
||||
JsonElement element = obj.get(key); |
|
||||
if (element == null || element.isJsonNull()) return defaultValue; |
|
||||
return element.getAsDouble(); |
|
||||
} |
|
||||
|
|
||||
private int getJsonInt(JsonObject obj, String key, int defaultValue) { |
|
||||
JsonElement element = obj.get(key); |
|
||||
if (element == null || element.isJsonNull()) return defaultValue; |
|
||||
return element.getAsInt(); |
|
||||
} |
|
||||
|
|
||||
private double getJsonDouble(JsonArray arr, int index, double defaultValue) { |
|
||||
if (arr == null || index >= arr.size()) return defaultValue; |
|
||||
JsonElement element = arr.get(index); |
|
||||
if (element == null || element.isJsonNull()) return defaultValue; |
|
||||
return element.getAsDouble(); |
|
||||
} |
|
||||
|
|
||||
private int getJsonInt(JsonArray arr, int index, int defaultValue) { |
|
||||
if (arr == null || index >= arr.size()) return defaultValue; |
|
||||
JsonElement element = arr.get(index); |
|
||||
if (element == null || element.isJsonNull()) return defaultValue; |
|
||||
return element.getAsInt(); |
|
||||
} |
|
||||
|
|
||||
private double cleanTemperature(double temp) { |
|
||||
return Math.round(temp * 10.0) / 10.0; |
|
||||
} |
|
||||
|
|
||||
private double cleanWindSpeed(double speed) { |
|
||||
return Math.round(speed * 10.0) / 10.0; |
|
||||
} |
|
||||
|
|
||||
private int cleanHumidity(int humidity) { |
|
||||
if (humidity < 0) return 50; |
|
||||
if (humidity > 100) return 100; |
|
||||
return humidity; |
|
||||
} |
|
||||
|
|
||||
private String cleanTimeString(String time) { |
|
||||
if (time == null || time.isEmpty()) return ""; |
|
||||
if (time.contains("T")) { |
|
||||
return time.substring(time.indexOf("T") + 1, time.indexOf("T") + 6); |
|
||||
} |
|
||||
return time; |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String getDataSourceName() { |
|
||||
return "Open-Meteo 实时天气"; |
|
||||
} |
|
||||
} |
|
||||
@ -1,122 +0,0 @@ |
|||||
package com.example.crawler.util; |
|
||||
|
|
||||
import java.time.LocalDateTime; |
|
||||
import java.time.format.DateTimeFormatter; |
|
||||
import java.util.HashMap; |
|
||||
import java.util.Map; |
|
||||
import java.util.regex.Matcher; |
|
||||
import java.util.regex.Pattern; |
|
||||
|
|
||||
/** |
|
||||
* 数据清洗工具类 |
|
||||
* 提供各类数据的清洗方法 |
|
||||
*/ |
|
||||
public class DataCleaner { |
|
||||
|
|
||||
private static final Map<String, String> STOP_WORDS = new HashMap<>(); |
|
||||
static { |
|
||||
STOP_WORDS.put("的", "的"); |
|
||||
STOP_WORDS.put("了", "了"); |
|
||||
STOP_WORDS.put("是", "是"); |
|
||||
STOP_WORDS.put("在", "在"); |
|
||||
STOP_WORDS.put("和", "和"); |
|
||||
STOP_WORDS.put("与", "与"); |
|
||||
STOP_WORDS.put("对", "对"); |
|
||||
STOP_WORDS.put("为", "为"); |
|
||||
STOP_WORDS.put("有", "有"); |
|
||||
STOP_WORDS.put("我", "我"); |
|
||||
STOP_WORDS.put("你", "你"); |
|
||||
STOP_WORDS.put("他", "他"); |
|
||||
STOP_WORDS.put("她", "她"); |
|
||||
STOP_WORDS.put("它", "它"); |
|
||||
STOP_WORDS.put("这", "这"); |
|
||||
STOP_WORDS.put("那", "那"); |
|
||||
STOP_WORDS.put("就", "就"); |
|
||||
STOP_WORDS.put("也", "也"); |
|
||||
STOP_WORDS.put("都", "都"); |
|
||||
STOP_WORDS.put("要", "要"); |
|
||||
STOP_WORDS.put("会", "会"); |
|
||||
STOP_WORDS.put("能", "能"); |
|
||||
STOP_WORDS.put("可", "可"); |
|
||||
STOP_WORDS.put("以", "以"); |
|
||||
STOP_WORDS.put("说", "说"); |
|
||||
STOP_WORDS.put("到", "到"); |
|
||||
STOP_WORDS.put("来", "来"); |
|
||||
STOP_WORDS.put("去", "去"); |
|
||||
STOP_WORDS.put("着", "着"); |
|
||||
STOP_WORDS.put("过", "过"); |
|
||||
} |
|
||||
|
|
||||
public static double cleanPrice(String price) { |
|
||||
if (price == null || price.isEmpty()) return 0.0; |
|
||||
String cleaned = price.replaceAll("[^0-9.]", ""); |
|
||||
try { |
|
||||
return Double.parseDouble(cleaned); |
|
||||
} catch (NumberFormatException e) { |
|
||||
return 0.0; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public static int cleanRating(String ratingClass) { |
|
||||
if (ratingClass == null) return 0; |
|
||||
if (ratingClass.contains("Five")) return 5; |
|
||||
if (ratingClass.contains("Four")) return 4; |
|
||||
if (ratingClass.contains("Three")) return 3; |
|
||||
if (ratingClass.contains("Two")) return 2; |
|
||||
if (ratingClass.contains("One")) return 1; |
|
||||
return 0; |
|
||||
} |
|
||||
|
|
||||
public static LocalDateTime cleanNewsTime(String timeStr) { |
|
||||
if (timeStr == null || timeStr.isEmpty()) return LocalDateTime.now(); |
|
||||
try { |
|
||||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); |
|
||||
return LocalDateTime.parse(timeStr, formatter); |
|
||||
} catch (Exception e) { |
|
||||
try { |
|
||||
DateTimeFormatter formatter2 = DateTimeFormatter.ofPattern("yyyy年MM月dd日 HH:mm"); |
|
||||
return LocalDateTime.parse(timeStr, formatter2); |
|
||||
} catch (Exception e2) { |
|
||||
return LocalDateTime.now(); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public static String cleanTitle(String title) { |
|
||||
if (title == null) return ""; |
|
||||
return title.trim().replaceAll("\\s+", " "); |
|
||||
} |
|
||||
|
|
||||
public static double cleanScore(String score) { |
|
||||
if (score == null || score.isEmpty()) return 0.0; |
|
||||
String cleaned = score.replaceAll("[^0-9.]", ""); |
|
||||
try { |
|
||||
return Double.parseDouble(cleaned); |
|
||||
} catch (NumberFormatException e) { |
|
||||
return 0.0; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
public static String[] extractWords(String text) { |
|
||||
if (text == null || text.isEmpty()) return new String[0]; |
|
||||
String cleaned = text.replaceAll("[^\u4e00-\u9fa5a-zA-Z0-9]", " "); |
|
||||
return cleaned.split("\\s+"); |
|
||||
} |
|
||||
|
|
||||
public static boolean isStopWord(String word) { |
|
||||
return word == null || word.length() < 2 || STOP_WORDS.containsKey(word); |
|
||||
} |
|
||||
|
|
||||
public static Map<String, Integer> countWordFrequency(String[] words) { |
|
||||
Map<String, Integer> frequency = new HashMap<>(); |
|
||||
for (String word : words) { |
|
||||
if (isStopWord(word)) continue; |
|
||||
frequency.put(word, frequency.getOrDefault(word, 0) + 1); |
|
||||
} |
|
||||
return frequency; |
|
||||
} |
|
||||
|
|
||||
public static int extractHour(LocalDateTime dateTime) { |
|
||||
return dateTime.getHour(); |
|
||||
} |
|
||||
} |
|
||||
@ -1,126 +0,0 @@ |
|||||
package com.example.crawler.util; |
|
||||
|
|
||||
import com.example.crawler.exception.NetworkException; |
|
||||
|
|
||||
import java.net.URI; |
|
||||
import java.net.http.HttpClient; |
|
||||
import java.net.http.HttpRequest; |
|
||||
import java.net.http.HttpResponse; |
|
||||
import java.time.Duration; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
/** |
|
||||
* HTTP工具类 |
|
||||
* 封装HTTP请求操作,使用Java 11内置HttpClient |
|
||||
*/ |
|
||||
public class HttpUtil { |
|
||||
|
|
||||
private static final HttpClient httpClient = HttpClient.newBuilder() |
|
||||
.connectTimeout(Duration.ofSeconds(30)) |
|
||||
.followRedirects(HttpClient.Redirect.NORMAL) |
|
||||
.build(); |
|
||||
|
|
||||
private static final String DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"; |
|
||||
|
|
||||
/** |
|
||||
* 发送GET请求 |
|
||||
* |
|
||||
* @param url 请求URL |
|
||||
* @return 响应内容 |
|
||||
* @throws NetworkException 网络异常 |
|
||||
*/ |
|
||||
public static String get(String url) throws NetworkException { |
|
||||
return get(url, Map.of()); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 发送GET请求(带请求头) |
|
||||
* |
|
||||
* @param url 请求URL |
|
||||
* @param headers 请求头 |
|
||||
* @return 响应内容 |
|
||||
* @throws NetworkException 网络异常 |
|
||||
*/ |
|
||||
public static String get(String url, Map<String, String> headers) throws NetworkException { |
|
||||
try { |
|
||||
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() |
|
||||
.uri(URI.create(url)) |
|
||||
.timeout(Duration.ofSeconds(30)) |
|
||||
.GET(); |
|
||||
|
|
||||
// 添加默认User-Agent
|
|
||||
if (!headers.containsKey("User-Agent")) { |
|
||||
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT); |
|
||||
} |
|
||||
|
|
||||
// 添加自定义请求头
|
|
||||
headers.forEach(requestBuilder::header); |
|
||||
|
|
||||
HttpRequest request = requestBuilder.build(); |
|
||||
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); |
|
||||
|
|
||||
if (response.statusCode() != 200) { |
|
||||
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode()); |
|
||||
} |
|
||||
|
|
||||
return response.body(); |
|
||||
} catch (NetworkException e) { |
|
||||
throw e; |
|
||||
} catch (Exception e) { |
|
||||
throw new NetworkException("网络请求失败: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 发送POST请求 |
|
||||
* |
|
||||
* @param url 请求URL |
|
||||
* @param body 请求体 |
|
||||
* @param headers 请求头 |
|
||||
* @return 响应内容 |
|
||||
* @throws NetworkException 网络异常 |
|
||||
*/ |
|
||||
public static String post(String url, String body, Map<String, String> headers) throws NetworkException { |
|
||||
try { |
|
||||
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() |
|
||||
.uri(URI.create(url)) |
|
||||
.timeout(Duration.ofSeconds(30)) |
|
||||
.header("Content-Type", "application/json") |
|
||||
.POST(HttpRequest.BodyPublishers.ofString(body)); |
|
||||
|
|
||||
// 添加默认User-Agent
|
|
||||
if (!headers.containsKey("User-Agent")) { |
|
||||
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT); |
|
||||
} |
|
||||
|
|
||||
// 添加自定义请求头
|
|
||||
headers.forEach(requestBuilder::header); |
|
||||
|
|
||||
HttpRequest request = requestBuilder.build(); |
|
||||
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); |
|
||||
|
|
||||
if (response.statusCode() != 200) { |
|
||||
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode()); |
|
||||
} |
|
||||
|
|
||||
return response.body(); |
|
||||
} catch (NetworkException e) { |
|
||||
throw e; |
|
||||
} catch (Exception e) { |
|
||||
throw new NetworkException("网络请求失败: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 设置请求间隔,避免对服务器造成压力 |
|
||||
* |
|
||||
* @param seconds 间隔秒数 |
|
||||
*/ |
|
||||
public static void sleep(int seconds) { |
|
||||
try { |
|
||||
Thread.sleep(seconds * 1000L); |
|
||||
} catch (InterruptedException e) { |
|
||||
Thread.currentThread().interrupt(); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,95 +0,0 @@ |
|||||
package com.example.crawler.util; |
|
||||
|
|
||||
import com.example.crawler.exception.DataSaveException; |
|
||||
import com.google.gson.Gson; |
|
||||
import com.google.gson.GsonBuilder; |
|
||||
|
|
||||
import java.io.FileWriter; |
|
||||
import java.io.IOException; |
|
||||
import java.nio.file.Files; |
|
||||
import java.nio.file.Path; |
|
||||
import java.nio.file.Paths; |
|
||||
import java.util.List; |
|
||||
|
|
||||
/** |
|
||||
* JSON工具类 |
|
||||
* 封装JSON序列化和文件读写操作 |
|
||||
*/ |
|
||||
public class JsonUtil { |
|
||||
|
|
||||
private static final Gson gson = new GsonBuilder() |
|
||||
.setPrettyPrinting() |
|
||||
.disableHtmlEscaping() |
|
||||
.create(); |
|
||||
|
|
||||
/** |
|
||||
* 将对象序列化为JSON字符串 |
|
||||
* |
|
||||
* @param obj 对象 |
|
||||
* @return JSON字符串 |
|
||||
*/ |
|
||||
public static String toJson(Object obj) { |
|
||||
return gson.toJson(obj); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 将JSON字符串反序列化为对象 |
|
||||
* |
|
||||
* @param json JSON字符串 |
|
||||
* @param classOfT 目标类 |
|
||||
* @param <T> 泛型类型 |
|
||||
* @return 反序列化后的对象 |
|
||||
*/ |
|
||||
public static <T> T fromJson(String json, Class<T> classOfT) { |
|
||||
return gson.fromJson(json, classOfT); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 将对象保存为JSON文件 |
|
||||
* |
|
||||
* @param obj 对象 |
|
||||
* @param filePath 文件路径 |
|
||||
* @throws DataSaveException 数据保存异常 |
|
||||
*/ |
|
||||
public static void saveToJsonFile(Object obj, String filePath) throws DataSaveException { |
|
||||
try { |
|
||||
// 确保目录存在
|
|
||||
Path path = Paths.get(filePath); |
|
||||
Path parentDir = path.getParent(); |
|
||||
if (parentDir != null && !Files.exists(parentDir)) { |
|
||||
Files.createDirectories(parentDir); |
|
||||
} |
|
||||
|
|
||||
try (FileWriter writer = new FileWriter(filePath)) { |
|
||||
gson.toJson(obj, writer); |
|
||||
} |
|
||||
} catch (IOException e) { |
|
||||
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 将列表保存为JSON文件 |
|
||||
* |
|
||||
* @param list 列表 |
|
||||
* @param filePath 文件路径 |
|
||||
* @param <T> 泛型类型 |
|
||||
* @throws DataSaveException 数据保存异常 |
|
||||
*/ |
|
||||
public static <T> void saveListToJsonFile(List<T> list, String filePath) throws DataSaveException { |
|
||||
try { |
|
||||
// 确保目录存在
|
|
||||
Path path = Paths.get(filePath); |
|
||||
Path parentDir = path.getParent(); |
|
||||
if (parentDir != null && !Files.exists(parentDir)) { |
|
||||
Files.createDirectories(parentDir); |
|
||||
} |
|
||||
|
|
||||
try (FileWriter writer = new FileWriter(filePath)) { |
|
||||
gson.toJson(list, writer); |
|
||||
} |
|
||||
} catch (IOException e) { |
|
||||
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,72 +0,0 @@ |
|||||
package com.example.crawler.view; |
|
||||
|
|
||||
import java.util.Scanner; |
|
||||
|
|
||||
/** |
|
||||
* 爬虫视图类 |
|
||||
* // MVC模式:View层,负责CLI界面显示和用户交互
|
|
||||
*/ |
|
||||
public class CrawlerView { |
|
||||
|
|
||||
/** |
|
||||
* 显示主菜单 |
|
||||
*/ |
|
||||
public void showMenu() { |
|
||||
System.out.println("\n=== 数据爬取与分析系统 ==="); |
|
||||
System.out.println("1. 爬取书籍信息(toscrape.com)"); |
|
||||
System.out.println("2. 爬取新浪国内新闻"); |
|
||||
System.out.println("3. 爬取软科中国大学排名"); |
|
||||
System.out.println("4. 爬取Open-Meteo实时天气"); |
|
||||
System.out.println("5. 爬取全部数据并保存"); |
|
||||
System.out.println("6. 保存当前数据到文件"); |
|
||||
System.out.println("7. 生成所有数据源的分析报告与图表"); |
|
||||
System.out.println("8. 爬取并分析所有数据(一键完成)"); |
|
||||
System.out.println("9. 退出"); |
|
||||
System.out.print("请选择操作:"); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 获取用户输入 |
|
||||
* |
|
||||
* @param scanner 输入扫描器 |
|
||||
* @return 用户选择的数字 |
|
||||
*/ |
|
||||
public int getInput(Scanner scanner) { |
|
||||
try { |
|
||||
String input = scanner.nextLine().trim(); |
|
||||
return Integer.parseInt(input); |
|
||||
} catch (NumberFormatException e) { |
|
||||
return -1; // 返回无效值
|
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 显示错误信息 |
|
||||
* |
|
||||
* @param message 错误信息 |
|
||||
*/ |
|
||||
public void showError(String message) { |
|
||||
System.err.println("错误: " + message); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 显示成功信息 |
|
||||
* |
|
||||
* @param message 成功信息 |
|
||||
*/ |
|
||||
public void showSuccess(String message) { |
|
||||
System.out.println("成功: " + message); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 暂停并等待用户按回车键继续 |
|
||||
* |
|
||||
* @param scanner 输入扫描器 |
|
||||
*/ |
|
||||
public void pause(Scanner scanner) { |
|
||||
System.out.print("\n按回车键继续..."); |
|
||||
scanner.nextLine(); |
|
||||
System.out.print("\033[H\033[2J"); |
|
||||
System.out.flush(); |
|
||||
} |
|
||||
} |
|
||||