@ -0,0 +1,4 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
public class HistoryCommand { |
||||
|
} |
||||
@ -0,0 +1,10 @@ |
|||||
|
# 默认忽略的文件 |
||||
|
/shelf/ |
||||
|
/workspace.xml |
||||
|
# 已忽略包含查询文件的默认文件夹 |
||||
|
/queries/ |
||||
|
# Datasource local storage ignored files |
||||
|
/dataSources/ |
||||
|
/dataSources.local.xml |
||||
|
# 基于编辑器的 HTTP 客户端请求 |
||||
|
/httpRequests/ |
||||
@ -0,0 +1,13 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="CompilerConfiguration"> |
||||
|
<annotationProcessing> |
||||
|
<profile name="Maven default annotation processors profile" enabled="true"> |
||||
|
<sourceOutputDir name="target/generated-sources/annotations" /> |
||||
|
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> |
||||
|
<outputRelativeToContentRoot value="true" /> |
||||
|
<module name="crawler-project" /> |
||||
|
</profile> |
||||
|
</annotationProcessing> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,7 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="Encoding"> |
||||
|
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" /> |
||||
|
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" /> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,20 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="RemoteRepositoriesConfiguration"> |
||||
|
<remote-repository> |
||||
|
<option name="id" value="central" /> |
||||
|
<option name="name" value="Central Repository" /> |
||||
|
<option name="url" value="https://repo.maven.apache.org/maven2" /> |
||||
|
</remote-repository> |
||||
|
<remote-repository> |
||||
|
<option name="id" value="central" /> |
||||
|
<option name="name" value="Maven Central repository" /> |
||||
|
<option name="url" value="https://repo1.maven.org/maven2" /> |
||||
|
</remote-repository> |
||||
|
<remote-repository> |
||||
|
<option name="id" value="jboss.community" /> |
||||
|
<option name="name" value="JBoss Community repository" /> |
||||
|
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" /> |
||||
|
</remote-repository> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,12 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="ExternalStorageConfigurationManager" enabled="true" /> |
||||
|
<component name="MavenProjectsManager"> |
||||
|
<option name="originalFiles"> |
||||
|
<list> |
||||
|
<option value="$PROJECT_DIR$/pom.xml" /> |
||||
|
</list> |
||||
|
</option> |
||||
|
</component> |
||||
|
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="21" project-jdk-type="JavaSDK" /> |
||||
|
</project> |
||||
@ -0,0 +1,6 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="VcsDirectoryMappings"> |
||||
|
<mapping directory="$PROJECT_DIR$/.." vcs="Git" /> |
||||
|
</component> |
||||
|
</project> |
||||
|
After Width: | Height: | Size: 18 KiB |
|
After Width: | Height: | Size: 35 KiB |
|
After Width: | Height: | Size: 26 KiB |
|
After Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 54 KiB |
|
Before Width: | Height: | Size: 55 KiB |
|
Before Width: | Height: | Size: 55 KiB |
|
Before Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 17 KiB |
|
After Width: | Height: | Size: 24 KiB |
|
After Width: | Height: | Size: 44 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
After Width: | Height: | Size: 32 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 31 KiB |
|
Before Width: | Height: | Size: 31 KiB |
|
Before Width: | Height: | Size: 31 KiB |
|
|
|
@ -0,0 +1,82 @@ |
|||||
|
[ |
||||
|
{ |
||||
|
"title": "专栏", |
||||
|
"publishTime": "", |
||||
|
"url": "http://zhuanlan.sina.com.cn/" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "导航", |
||||
|
"publishTime": "", |
||||
|
"url": "http://news.sina.com.cn/guide/" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "新浪财经", |
||||
|
"publishTime": "", |
||||
|
"url": "https://finance.sina.com.cn/mobile/comfinanceweb.shtml" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "新浪博客", |
||||
|
"publishTime": "", |
||||
|
"url": "https://blog.sina.com.cn/lm/z/app/" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "我的收藏", |
||||
|
"publishTime": "", |
||||
|
"url": "http://my.sina.com.cn/#location=fav" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "注册", |
||||
|
"publishTime": "", |
||||
|
"url": "https://login.sina.com.cn/signup/signup?entry=news" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "新闻中心", |
||||
|
"publishTime": "", |
||||
|
"url": "http://news.sina.com.cn/" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "新闻排行", |
||||
|
"publishTime": "", |
||||
|
"url": "http://news.sina.com.cn/hotnews/" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "联系我们", |
||||
|
"publishTime": "", |
||||
|
"url": "http://www.sina.com.cn/contactus.html" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "广告服务", |
||||
|
"publishTime": "", |
||||
|
"url": "http://emarketing.sina.com.cn/" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "通行证注册", |
||||
|
"publishTime": "", |
||||
|
"url": "http://login.sina.com.cn/signup/signup" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "产品答疑", |
||||
|
"publishTime": "", |
||||
|
"url": "http://help.sina.com.cn/" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "招聘信息", |
||||
|
"publishTime": "", |
||||
|
"url": "http://career.sina.com.cn/" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "网站律师", |
||||
|
"publishTime": "", |
||||
|
"url": "http://corp.sina.com.cn/lawfirm/sina.htm" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "版权所有", |
||||
|
"publishTime": "", |
||||
|
"url": "https://corp.sina.com.cn/chn/copyright.html" |
||||
|
}, |
||||
|
{ |
||||
|
"title": "意见反馈", |
||||
|
"publishTime": "", |
||||
|
"url": "http://news.sina.com.cn/feedback/post.html" |
||||
|
} |
||||
|
] |
||||
@ -0,0 +1,212 @@ |
|||||
|
[ |
||||
|
{ |
||||
|
"rank": 1, |
||||
|
"universityName": "清华大学 Tsinghua University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "北京", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 2, |
||||
|
"universityName": "北京大学 Peking University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "北京", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 3, |
||||
|
"universityName": "浙江大学 Zhejiang University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "浙江", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 4, |
||||
|
"universityName": "上海交通大学 Shanghai Jiao Tong University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "上海", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 5, |
||||
|
"universityName": "复旦大学 Fudan University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "上海", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 6, |
||||
|
"universityName": "南京大学 Nanjing University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "江苏", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 7, |
||||
|
"universityName": "中国科学技术大学 University of Science and Technology of China 双一流/985/211", |
||||
|
"totalScore": "理工", |
||||
|
"province": "安徽", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 8, |
||||
|
"universityName": "武汉大学 Wuhan University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "湖北", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 9, |
||||
|
"universityName": "华中科技大学 Huazhong University of Science and Technology 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "湖北", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 10, |
||||
|
"universityName": "西安交通大学 Xi'an Jiaotong University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "陕西", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 11, |
||||
|
"universityName": "北京航空航天大学 Beihang University 双一流/985/211", |
||||
|
"totalScore": "理工", |
||||
|
"province": "北京", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 12, |
||||
|
"universityName": "中山大学 Sun Yat-sen University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "广东", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 13, |
||||
|
"universityName": "北京理工大学 Beijing Institute of Technology 双一流/985/211", |
||||
|
"totalScore": "理工", |
||||
|
"province": "北京", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 14, |
||||
|
"universityName": "哈尔滨工业大学 Harbin Institute of Technology 双一流/985/211", |
||||
|
"totalScore": "理工", |
||||
|
"province": "黑龙江", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 15, |
||||
|
"universityName": "四川大学 Sichuan University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "四川", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 16, |
||||
|
"universityName": "东南大学 Southeast University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "江苏", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 17, |
||||
|
"universityName": "中国人民大学 Renmin University of China 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "北京", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 18, |
||||
|
"universityName": "同济大学 Tongji University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "上海", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 19, |
||||
|
"universityName": "北京师范大学 Beijing Normal University 双一流/985/211", |
||||
|
"totalScore": "师范", |
||||
|
"province": "北京", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 20, |
||||
|
"universityName": "天津大学 Tianjin University 双一流/985/211", |
||||
|
"totalScore": "理工", |
||||
|
"province": "天津", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 21, |
||||
|
"universityName": "西北工业大学 Northwestern Polytechnical University 双一流/985/211", |
||||
|
"totalScore": "理工", |
||||
|
"province": "陕西", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 22, |
||||
|
"universityName": "山东大学 Shandong University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "山东", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 23, |
||||
|
"universityName": "南开大学 Nankai University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "天津", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 24, |
||||
|
"universityName": "厦门大学 Xiamen University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "福建", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 25, |
||||
|
"universityName": "中国农业大学 China Agricultural University 双一流/985/211", |
||||
|
"totalScore": "农业", |
||||
|
"province": "北京", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 26, |
||||
|
"universityName": "吉林大学 Jilin University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "吉林", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 27, |
||||
|
"universityName": "中南大学 Central South University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "湖南", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 28, |
||||
|
"universityName": "大连理工大学 Dalian University of Technology 双一流/985/211", |
||||
|
"totalScore": "理工", |
||||
|
"province": "辽宁", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 29, |
||||
|
"universityName": "湖南大学 Hunan University 双一流/985/211", |
||||
|
"totalScore": "综合", |
||||
|
"province": "湖南", |
||||
|
"category": "" |
||||
|
}, |
||||
|
{ |
||||
|
"rank": 30, |
||||
|
"universityName": "华东师范大学 East China Normal University 双一流/985/211", |
||||
|
"totalScore": "师范", |
||||
|
"province": "上海", |
||||
|
"category": "" |
||||
|
} |
||||
|
] |
||||
@ -0,0 +1,335 @@ |
|||||
|
[ |
||||
|
{ |
||||
|
"cityName": "上海", |
||||
|
"temperature": 22.7, |
||||
|
"humidity": 83.0, |
||||
|
"windSpeed": 7.8, |
||||
|
"weatherCode": "3", |
||||
|
"hourlyTimes": [ |
||||
|
"00:00", |
||||
|
"01:00", |
||||
|
"02:00", |
||||
|
"03:00", |
||||
|
"04:00", |
||||
|
"05:00", |
||||
|
"06:00", |
||||
|
"07:00", |
||||
|
"08:00", |
||||
|
"09:00", |
||||
|
"10:00", |
||||
|
"11:00", |
||||
|
"12:00", |
||||
|
"13:00", |
||||
|
"14:00", |
||||
|
"15:00", |
||||
|
"16:00", |
||||
|
"17:00", |
||||
|
"18:00", |
||||
|
"19:00", |
||||
|
"20:00", |
||||
|
"21:00", |
||||
|
"22:00", |
||||
|
"23:00" |
||||
|
], |
||||
|
"hourlyTemperatures": [ |
||||
|
19.2, |
||||
|
19.0, |
||||
|
18.9, |
||||
|
18.3, |
||||
|
18.1, |
||||
|
17.8, |
||||
|
18.7, |
||||
|
20.9, |
||||
|
23.5, |
||||
|
24.9, |
||||
|
26.2, |
||||
|
27.0, |
||||
|
27.5, |
||||
|
28.1, |
||||
|
28.2, |
||||
|
27.4, |
||||
|
26.7, |
||||
|
25.0, |
||||
|
23.8, |
||||
|
22.7, |
||||
|
22.0, |
||||
|
20.6, |
||||
|
19.9, |
||||
|
19.4 |
||||
|
], |
||||
|
"hourlyHumidities": [ |
||||
|
83, |
||||
|
84, |
||||
|
85, |
||||
|
87, |
||||
|
89, |
||||
|
92, |
||||
|
90, |
||||
|
79, |
||||
|
55, |
||||
|
43, |
||||
|
38, |
||||
|
34, |
||||
|
33, |
||||
|
31, |
||||
|
30, |
||||
|
32, |
||||
|
35, |
||||
|
45, |
||||
|
54, |
||||
|
63, |
||||
|
67, |
||||
|
73, |
||||
|
76, |
||||
|
78 |
||||
|
], |
||||
|
"hourlyWindSpeeds": [ |
||||
|
3.8, |
||||
|
3.3, |
||||
|
2.6, |
||||
|
1.9, |
||||
|
1.0, |
||||
|
0.6, |
||||
|
2.3, |
||||
|
0.6, |
||||
|
1.8, |
||||
|
2.7, |
||||
|
3.0, |
||||
|
3.5, |
||||
|
5.4, |
||||
|
5.4, |
||||
|
6.0, |
||||
|
7.8, |
||||
|
9.2, |
||||
|
9.0, |
||||
|
8.1, |
||||
|
7.8, |
||||
|
7.2, |
||||
|
7.1, |
||||
|
7.1, |
||||
|
7.1 |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cityName": "广州", |
||||
|
"temperature": 25.9, |
||||
|
"humidity": 85.0, |
||||
|
"windSpeed": 5.3, |
||||
|
"weatherCode": "81", |
||||
|
"hourlyTimes": [ |
||||
|
"00:00", |
||||
|
"01:00", |
||||
|
"02:00", |
||||
|
"03:00", |
||||
|
"04:00", |
||||
|
"05:00", |
||||
|
"06:00", |
||||
|
"07:00", |
||||
|
"08:00", |
||||
|
"09:00", |
||||
|
"10:00", |
||||
|
"11:00", |
||||
|
"12:00", |
||||
|
"13:00", |
||||
|
"14:00", |
||||
|
"15:00", |
||||
|
"16:00", |
||||
|
"17:00", |
||||
|
"18:00", |
||||
|
"19:00", |
||||
|
"20:00", |
||||
|
"21:00", |
||||
|
"22:00", |
||||
|
"23:00" |
||||
|
], |
||||
|
"hourlyTemperatures": [ |
||||
|
27.7, |
||||
|
27.2, |
||||
|
26.0, |
||||
|
25.5, |
||||
|
25.4, |
||||
|
25.0, |
||||
|
25.0, |
||||
|
26.0, |
||||
|
28.1, |
||||
|
29.3, |
||||
|
30.6, |
||||
|
31.9, |
||||
|
33.0, |
||||
|
33.8, |
||||
|
33.9, |
||||
|
33.6, |
||||
|
34.2, |
||||
|
30.5, |
||||
|
29.4, |
||||
|
25.9, |
||||
|
26.4, |
||||
|
26.5, |
||||
|
26.3, |
||||
|
26.2 |
||||
|
], |
||||
|
"hourlyHumidities": [ |
||||
|
85, |
||||
|
87, |
||||
|
82, |
||||
|
84, |
||||
|
85, |
||||
|
90, |
||||
|
92, |
||||
|
87, |
||||
|
76, |
||||
|
70, |
||||
|
63, |
||||
|
57, |
||||
|
54, |
||||
|
53, |
||||
|
53, |
||||
|
54, |
||||
|
51, |
||||
|
69, |
||||
|
72, |
||||
|
95, |
||||
|
97, |
||||
|
96, |
||||
|
98, |
||||
|
98 |
||||
|
], |
||||
|
"hourlyWindSpeeds": [ |
||||
|
5.8, |
||||
|
4.9, |
||||
|
4.4, |
||||
|
3.3, |
||||
|
3.4, |
||||
|
3.8, |
||||
|
4.1, |
||||
|
5.6, |
||||
|
4.0, |
||||
|
3.8, |
||||
|
4.0, |
||||
|
2.8, |
||||
|
1.3, |
||||
|
3.3, |
||||
|
5.1, |
||||
|
5.2, |
||||
|
5.1, |
||||
|
12.3, |
||||
|
3.1, |
||||
|
5.3, |
||||
|
3.6, |
||||
|
1.7, |
||||
|
2.0, |
||||
|
1.4 |
||||
|
] |
||||
|
}, |
||||
|
{ |
||||
|
"cityName": "北京", |
||||
|
"temperature": 32.3, |
||||
|
"humidity": 56.0, |
||||
|
"windSpeed": 17.1, |
||||
|
"weatherCode": "0", |
||||
|
"hourlyTimes": [ |
||||
|
"00:00", |
||||
|
"01:00", |
||||
|
"02:00", |
||||
|
"03:00", |
||||
|
"04:00", |
||||
|
"05:00", |
||||
|
"06:00", |
||||
|
"07:00", |
||||
|
"08:00", |
||||
|
"09:00", |
||||
|
"10:00", |
||||
|
"11:00", |
||||
|
"12:00", |
||||
|
"13:00", |
||||
|
"14:00", |
||||
|
"15:00", |
||||
|
"16:00", |
||||
|
"17:00", |
||||
|
"18:00", |
||||
|
"19:00", |
||||
|
"20:00", |
||||
|
"21:00", |
||||
|
"22:00", |
||||
|
"23:00" |
||||
|
], |
||||
|
"hourlyTemperatures": [ |
||||
|
22.8, |
||||
|
21.9, |
||||
|
21.2, |
||||
|
20.1, |
||||
|
19.6, |
||||
|
18.8, |
||||
|
19.2, |
||||
|
20.7, |
||||
|
23.7, |
||||
|
27.0, |
||||
|
29.9, |
||||
|
32.5, |
||||
|
34.5, |
||||
|
35.8, |
||||
|
36.3, |
||||
|
36.6, |
||||
|
36.2, |
||||
|
35.7, |
||||
|
34.2, |
||||
|
32.3, |
||||
|
30.9, |
||||
|
29.9, |
||||
|
29.1, |
||||
|
28.6 |
||||
|
], |
||||
|
"hourlyHumidities": [ |
||||
|
56, |
||||
|
60, |
||||
|
63, |
||||
|
69, |
||||
|
71, |
||||
|
75, |
||||
|
74, |
||||
|
67, |
||||
|
57, |
||||
|
45, |
||||
|
37, |
||||
|
28, |
||||
|
21, |
||||
|
18, |
||||
|
20, |
||||
|
21, |
||||
|
26, |
||||
|
26, |
||||
|
30, |
||||
|
33, |
||||
|
35, |
||||
|
36, |
||||
|
35, |
||||
|
34 |
||||
|
], |
||||
|
"hourlyWindSpeeds": [ |
||||
|
11.6, |
||||
|
10.6, |
||||
|
7.6, |
||||
|
4.5, |
||||
|
3.9, |
||||
|
2.3, |
||||
|
2.3, |
||||
|
0.6, |
||||
|
0.8, |
||||
|
2.2, |
||||
|
2.4, |
||||
|
4.9, |
||||
|
7.6, |
||||
|
10.4, |
||||
|
12.2, |
||||
|
13.4, |
||||
|
14.7, |
||||
|
15.1, |
||||
|
14.5, |
||||
|
17.1, |
||||
|
16.9, |
||||
|
18.1, |
||||
|
19.7, |
||||
|
20.1 |
||||
|
] |
||||
|
} |
||||
|
] |
||||
@ -0,0 +1,14 @@ |
|||||
|
========== 书籍数据分析报告 ========== |
||||
|
生成时间: 2026-05-30T17:47:42.026682900 |
||||
|
分析书籍总数: 600 |
||||
|
|
||||
|
【价格统计】 |
||||
|
最高价: £59.92 |
||||
|
最低价: £10.01 |
||||
|
平均价: £35.29 |
||||
|
|
||||
|
【库存统计】 |
||||
|
有库存: 600 本 |
||||
|
缺货: 0 本 |
||||
|
|
||||
|
报告生成完成 |
||||
@ -0,0 +1,31 @@ |
|||||
|
========== 新闻数据分析报告 ========== |
||||
|
生成时间: 2026-05-30T17:47:42.145591 |
||||
|
分析新闻总数: 16 |
||||
|
|
||||
|
【发布时间分布】 |
||||
|
00:00 - 01:00: 0 条 |
||||
|
01:00 - 02:00: 0 条 |
||||
|
02:00 - 03:00: 0 条 |
||||
|
03:00 - 04:00: 0 条 |
||||
|
04:00 - 05:00: 0 条 |
||||
|
05:00 - 06:00: 0 条 |
||||
|
06:00 - 07:00: 0 条 |
||||
|
07:00 - 08:00: 0 条 |
||||
|
08:00 - 09:00: 0 条 |
||||
|
09:00 - 10:00: 0 条 |
||||
|
10:00 - 11:00: 0 条 |
||||
|
11:00 - 12:00: 0 条 |
||||
|
12:00 - 13:00: 0 条 |
||||
|
13:00 - 14:00: 0 条 |
||||
|
14:00 - 15:00: 0 条 |
||||
|
15:00 - 16:00: 0 条 |
||||
|
16:00 - 17:00: 0 条 |
||||
|
17:00 - 18:00: 16 条 |
||||
|
18:00 - 19:00: 0 条 |
||||
|
19:00 - 20:00: 0 条 |
||||
|
20:00 - 21:00: 0 条 |
||||
|
21:00 - 22:00: 0 条 |
||||
|
22:00 - 23:00: 0 条 |
||||
|
23:00 - 00:00: 0 条 |
||||
|
|
||||
|
报告生成完成 |
||||
@ -0,0 +1,17 @@ |
|||||
|
========== 大学排名数据分析报告 ========== |
||||
|
生成时间: 2026-05-30T17:47:42.272388 |
||||
|
分析大学总数: 30 |
||||
|
|
||||
|
【省份排行榜 TOP 10】 |
||||
|
北京: 7 所大学 |
||||
|
上海: 4 所大学 |
||||
|
湖北: 2 所大学 |
||||
|
湖南: 2 所大学 |
||||
|
天津: 2 所大学 |
||||
|
陕西: 2 所大学 |
||||
|
江苏: 2 所大学 |
||||
|
山东: 1 所大学 |
||||
|
福建: 1 所大学 |
||||
|
吉林: 1 所大学 |
||||
|
|
||||
|
报告生成完成 |
||||
@ -0,0 +1,29 @@ |
|||||
|
========== 天气数据分析报告 ========== |
||||
|
生成时间: 2026-05-30T17:47:42.585539200 |
||||
|
分析城市数量: 3 |
||||
|
数据来源: Open-Meteo API (CC BY 4.0) |
||||
|
|
||||
|
【多城市天气对比】 |
||||
|
|
||||
|
城市: 上海 |
||||
|
当前温度: 24.0°C |
||||
|
当前湿度: 83% |
||||
|
风速: 8.3 km/h |
||||
|
天气: 多云 |
||||
|
24小时平均温度: 22.7°C |
||||
|
|
||||
|
城市: 广州 |
||||
|
当前温度: 29.8°C |
||||
|
当前湿度: 85% |
||||
|
风速: 2.4 km/h |
||||
|
天气: 小毛毛雨 |
||||
|
24小时平均温度: 28.6°C |
||||
|
|
||||
|
城市: 北京 |
||||
|
当前温度: 34.6°C |
||||
|
当前湿度: 56% |
||||
|
风速: 14.4 km/h |
||||
|
天气: 晴 |
||||
|
24小时平均温度: 28.2°C |
||||
|
|
||||
|
报告生成完成 |
||||
@ -0,0 +1,15 @@ |
|||||
|
package com.example.crawler; |
||||
|
|
||||
|
import com.example.crawler.controller.CrawlerController; |
||||
|
|
||||
|
/** |
||||
|
* 爬虫项目主入口类 |
||||
|
*/ |
||||
|
public class Main { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
// 创建控制器并启动CLI界面
|
||||
|
CrawlerController controller = new CrawlerController(); |
||||
|
controller.start(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,229 @@ |
|||||
|
package com.example.crawler.chart; |
||||
|
|
||||
|
import java.awt.Color; |
||||
|
import java.awt.Font; |
||||
|
import java.io.File; |
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
import com.example.crawler.constant.CrawlerConstants; |
||||
|
import org.jfree.chart.ChartFactory; |
||||
|
import org.jfree.chart.ChartUtils; |
||||
|
import org.jfree.chart.JFreeChart; |
||||
|
import org.jfree.chart.axis.CategoryAxis; |
||||
|
import org.jfree.chart.axis.NumberAxis; |
||||
|
import org.jfree.chart.plot.CategoryPlot; |
||||
|
import org.jfree.chart.plot.PiePlot; |
||||
|
import org.jfree.chart.plot.XYPlot; |
||||
|
import org.jfree.chart.renderer.category.BarRenderer; |
||||
|
import org.jfree.chart.renderer.category.LineAndShapeRenderer; |
||||
|
import org.jfree.data.category.DefaultCategoryDataset; |
||||
|
import org.jfree.data.general.DefaultPieDataset; |
||||
|
import org.jfree.data.xy.XYDataset; |
||||
|
import org.jfree.data.xy.XYSeries; |
||||
|
import org.jfree.data.xy.XYSeriesCollection; |
||||
|
|
||||
|
public class ChartGenerator { |
||||
|
|
||||
|
static { |
||||
|
File dir = new File(CrawlerConstants.CHARTS_DIR); |
||||
|
if (!dir.exists()) { |
||||
|
dir.mkdirs(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static void generatePriceHistogram(Map<String, Integer> priceDistribution, String fileName) { |
||||
|
DefaultCategoryDataset dataset = createCategoryDataset(priceDistribution); |
||||
|
JFreeChart chart = ChartFactory.createBarChart( |
||||
|
"书籍价格分布", |
||||
|
"价格区间(£)", |
||||
|
"书籍数量", |
||||
|
dataset |
||||
|
); |
||||
|
customizeBarChart(chart); |
||||
|
saveChart(chart, fileName); |
||||
|
} |
||||
|
|
||||
|
public static void generateRatingPieChart(Map<String, Integer> ratingDistribution, String fileName) { |
||||
|
DefaultPieDataset<String> dataset = new DefaultPieDataset<>(); |
||||
|
for (Map.Entry<String, Integer> entry : ratingDistribution.entrySet()) { |
||||
|
dataset.setValue(entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
JFreeChart chart = ChartFactory.createPieChart( |
||||
|
"书籍评分分布", |
||||
|
dataset, |
||||
|
true, |
||||
|
true, |
||||
|
false |
||||
|
); |
||||
|
customizePieChart(chart); |
||||
|
saveChart(chart, fileName); |
||||
|
} |
||||
|
|
||||
|
public static void generateNewsTimeTrend(Map<Integer, Integer> hourDistribution, String fileName) { |
||||
|
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
||||
|
for (int i = 0; i < 24; i++) { |
||||
|
int count = hourDistribution.getOrDefault(i, 0); |
||||
|
dataset.addValue(count, "新闻数量", String.format("%02d:00", i)); |
||||
|
} |
||||
|
JFreeChart chart = ChartFactory.createLineChart( |
||||
|
"新闻发布时间分布", |
||||
|
"小时", |
||||
|
"新闻数量", |
||||
|
dataset |
||||
|
); |
||||
|
customizeLineChart(chart); |
||||
|
saveChart(chart, fileName); |
||||
|
} |
||||
|
|
||||
|
public static void generateWordFrequencyBarChart(Map<String, Integer> wordFrequency, String fileName) { |
||||
|
Map<String, Integer> top10 = wordFrequency.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.limit(10) |
||||
|
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
||||
|
|
||||
|
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
||||
|
for (Map.Entry<String, Integer> entry : top10.entrySet()) { |
||||
|
dataset.addValue(entry.getValue(), "词频", entry.getKey()); |
||||
|
} |
||||
|
JFreeChart chart = ChartFactory.createBarChart( |
||||
|
"新闻高频词 TOP 10", |
||||
|
"关键词", |
||||
|
"出现次数", |
||||
|
dataset |
||||
|
); |
||||
|
customizeBarChart(chart); |
||||
|
saveChart(chart, fileName); |
||||
|
} |
||||
|
|
||||
|
public static void generateProvinceBarChart(Map<String, Integer> provinceDistribution, String fileName) { |
||||
|
Map<String, Integer> top10 = provinceDistribution.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.limit(10) |
||||
|
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
||||
|
|
||||
|
DefaultCategoryDataset dataset = createCategoryDataset(top10); |
||||
|
JFreeChart chart = ChartFactory.createBarChart( |
||||
|
"各省上榜大学数量 TOP 10", |
||||
|
"省份", |
||||
|
"大学数量", |
||||
|
dataset |
||||
|
); |
||||
|
customizeBarChart(chart); |
||||
|
saveChart(chart, fileName); |
||||
|
} |
||||
|
|
||||
|
public static void generateScoreHistogram(Map<String, Integer> scoreDistribution, String fileName) { |
||||
|
DefaultCategoryDataset dataset = createCategoryDataset(scoreDistribution); |
||||
|
JFreeChart chart = ChartFactory.createBarChart( |
||||
|
"大学总分分布", |
||||
|
"分数区间", |
||||
|
"大学数量", |
||||
|
dataset |
||||
|
); |
||||
|
customizeBarChart(chart); |
||||
|
saveChart(chart, fileName); |
||||
|
} |
||||
|
|
||||
|
public static void generateTemperatureTrend(List<String> times, List<Double> temperatures, String cityName, String fileName) { |
||||
|
XYSeries series = new XYSeries(cityName); |
||||
|
for (int i = 0; i < Math.min(times.size(), temperatures.size()); i++) { |
||||
|
series.add(i, temperatures.get(i)); |
||||
|
} |
||||
|
XYDataset dataset = new XYSeriesCollection(series); |
||||
|
JFreeChart chart = ChartFactory.createXYLineChart( |
||||
|
cityName + " 未来24小时温度变化", |
||||
|
"小时", |
||||
|
"温度(°C)", |
||||
|
dataset |
||||
|
); |
||||
|
customizeXYLineChart(chart); |
||||
|
saveChart(chart, fileName); |
||||
|
} |
||||
|
|
||||
|
public static void generateMultiCityTemperatureComparison(Map<String, List<Double>> cityTemperatures, String fileName) { |
||||
|
XYSeriesCollection dataset = new XYSeriesCollection(); |
||||
|
for (Map.Entry<String, List<Double>> entry : cityTemperatures.entrySet()) { |
||||
|
XYSeries series = new XYSeries(entry.getKey()); |
||||
|
List<Double> temps = entry.getValue(); |
||||
|
for (int i = 0; i < Math.min(temps.size(), 24); i++) { |
||||
|
series.add(i, temps.get(i)); |
||||
|
} |
||||
|
dataset.addSeries(series); |
||||
|
} |
||||
|
JFreeChart chart = ChartFactory.createXYLineChart( |
||||
|
"多城市未来24小时温度对比", |
||||
|
"小时", |
||||
|
"温度(°C)", |
||||
|
dataset |
||||
|
); |
||||
|
customizeXYLineChart(chart); |
||||
|
saveChart(chart, fileName); |
||||
|
} |
||||
|
|
||||
|
private static DefaultCategoryDataset createCategoryDataset(Map<String, Integer> data) { |
||||
|
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
||||
|
for (Map.Entry<String, Integer> entry : data.entrySet()) { |
||||
|
dataset.addValue(entry.getValue(), "数值", entry.getKey()); |
||||
|
} |
||||
|
return dataset; |
||||
|
} |
||||
|
|
||||
|
private static void customizeBarChart(JFreeChart chart) { |
||||
|
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
||||
|
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
||||
|
|
||||
|
CategoryPlot plot = chart.getCategoryPlot(); |
||||
|
CategoryAxis domainAxis = plot.getDomainAxis(); |
||||
|
domainAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
||||
|
domainAxis.setTickLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 10)); |
||||
|
|
||||
|
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis(); |
||||
|
rangeAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
||||
|
|
||||
|
BarRenderer renderer = (BarRenderer) plot.getRenderer(); |
||||
|
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
||||
|
} |
||||
|
|
||||
|
private static void customizePieChart(JFreeChart chart) { |
||||
|
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
||||
|
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
||||
|
|
||||
|
PiePlot plot = (PiePlot) chart.getPlot(); |
||||
|
plot.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
||||
|
} |
||||
|
|
||||
|
private static void customizeLineChart(JFreeChart chart) { |
||||
|
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
||||
|
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
||||
|
|
||||
|
CategoryPlot plot = chart.getCategoryPlot(); |
||||
|
LineAndShapeRenderer renderer = (LineAndShapeRenderer) plot.getRenderer(); |
||||
|
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
||||
|
} |
||||
|
|
||||
|
private static void customizeXYLineChart(JFreeChart chart) { |
||||
|
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
||||
|
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
||||
|
|
||||
|
XYPlot plot = chart.getXYPlot(); |
||||
|
|
||||
|
NumberAxis xAxis = (NumberAxis) plot.getDomainAxis(); |
||||
|
xAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
||||
|
|
||||
|
NumberAxis yAxis = (NumberAxis) plot.getRangeAxis(); |
||||
|
yAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
||||
|
} |
||||
|
|
||||
|
private static void saveChart(JFreeChart chart, String fileName) { |
||||
|
try { |
||||
|
File file = new File(CrawlerConstants.CHARTS_DIR, fileName); |
||||
|
ChartUtils.saveChartAsPNG(file, chart, 800, 500); |
||||
|
System.out.println("图表已保存: " + file.getAbsolutePath()); |
||||
|
} catch (IOException e) { |
||||
|
System.err.println("保存图表失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,60 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
import com.example.crawler.constant.CrawlerConstants; |
||||
|
import com.example.crawler.exception.CrawlException; |
||||
|
import com.example.crawler.exception.NetworkException; |
||||
|
import com.example.crawler.repository.DataRepository; |
||||
|
import com.example.crawler.strategy.CrawlStrategy; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public abstract class BaseCrawlCommand implements Command { |
||||
|
|
||||
|
protected static final Logger logger = LoggerFactory.getLogger(BaseCrawlCommand.class); |
||||
|
|
||||
|
protected DataRepository repository; |
||||
|
protected int maxRetries; |
||||
|
protected long retryDelayMs; |
||||
|
|
||||
|
public BaseCrawlCommand(DataRepository repository) { |
||||
|
this.repository = repository; |
||||
|
this.maxRetries = CrawlerConstants.MAX_RETRIES; |
||||
|
this.retryDelayMs = 2000; |
||||
|
} |
||||
|
|
||||
|
protected abstract CrawlStrategy<?> getStrategy(); |
||||
|
|
||||
|
protected abstract void saveToRepository(Object data); |
||||
|
|
||||
|
@Override |
||||
|
public void execute() { |
||||
|
try { |
||||
|
Object data = crawlWithRetry(); |
||||
|
saveToRepository(data); |
||||
|
logger.info("Crawling completed and saved to repository"); |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Crawling failed", e); |
||||
|
System.err.println("爬取失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
protected Object crawlWithRetry() throws Exception { |
||||
|
int attempts = 0; |
||||
|
while (attempts < maxRetries) { |
||||
|
try { |
||||
|
CrawlStrategy<?> strategy = getStrategy(); |
||||
|
return strategy.crawl(); |
||||
|
} catch (NetworkException e) { |
||||
|
attempts++; |
||||
|
if (attempts < maxRetries) { |
||||
|
logger.warn("Network error, retrying in {}ms (attempt {}/{})", retryDelayMs, attempts, maxRetries); |
||||
|
Thread.sleep(retryDelayMs); |
||||
|
} else { |
||||
|
logger.error("Max retries reached, giving up"); |
||||
|
throw e; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
throw new CrawlException("Max retries exceeded"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,32 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
import com.example.crawler.model.Book; |
||||
|
import com.example.crawler.repository.DataRepository; |
||||
|
import com.example.crawler.strategy.BookCrawlStrategy; |
||||
|
import com.example.crawler.strategy.CrawlStrategy; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class BookCommand extends BaseCrawlCommand { |
||||
|
|
||||
|
public BookCommand(DataRepository repository) { |
||||
|
super(repository); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
protected CrawlStrategy<?> getStrategy() { |
||||
|
return new BookCrawlStrategy(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
@SuppressWarnings("unchecked") |
||||
|
protected void saveToRepository(Object data) { |
||||
|
repository.saveBooks((List<Book>) data); |
||||
|
System.out.println("成功爬取 " + ((List<Book>) data).size() + " 本书籍信息"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "爬取书籍信息"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,20 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
/** |
||||
|
* 命令接口 |
||||
|
* 定义命令执行的标准方法,实现Command模式 |
||||
|
*/ |
||||
|
public interface Command { |
||||
|
|
||||
|
/** |
||||
|
* 执行命令 |
||||
|
*/ |
||||
|
void execute(); |
||||
|
|
||||
|
/** |
||||
|
* 获取命令名称 |
||||
|
* |
||||
|
* @return 命令名称 |
||||
|
*/ |
||||
|
String getName(); |
||||
|
} |
||||
@ -0,0 +1,45 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
import com.example.crawler.controller.CrawlerController; |
||||
|
import com.example.crawler.repository.DataRepository; |
||||
|
|
||||
|
public class CrawlAllCommand implements Command { |
||||
|
|
||||
|
private final DataRepository repository; |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public CrawlAllCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
this.repository = controller.getRepository(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() { |
||||
|
System.out.println("\n=== 开始爬取全部数据源 ==="); |
||||
|
|
||||
|
Command[] commands = { |
||||
|
new BookCommand(repository), |
||||
|
new NewsCommand(repository), |
||||
|
new CrawlRankingCommand(repository), |
||||
|
new WeatherCommand(repository) |
||||
|
}; |
||||
|
|
||||
|
for (Command command : commands) { |
||||
|
command.execute(); |
||||
|
try { |
||||
|
Thread.sleep(2000); |
||||
|
} catch (InterruptedException e) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
new SaveCommand(controller).execute(); |
||||
|
|
||||
|
System.out.println("\n=== 全部数据爬取完成 ==="); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "爬取全部数据并保存"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,104 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
import com.example.crawler.controller.CrawlerController; |
||||
|
import com.example.crawler.repository.DataRepository; |
||||
|
import com.example.crawler.service.BookAnalysisService; |
||||
|
import com.example.crawler.service.NewsAnalysisService; |
||||
|
import com.example.crawler.service.RankingAnalysisService; |
||||
|
import com.example.crawler.service.WeatherAnalysisService; |
||||
|
|
||||
|
public class CrawlAndAnalyzeAllCommand implements Command { |
||||
|
|
||||
|
private final DataRepository repository; |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public CrawlAndAnalyzeAllCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
this.repository = controller.getRepository(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() { |
||||
|
System.out.println("\n========== 爬取全部数据并生成分析 ==========\n"); |
||||
|
|
||||
|
System.out.println("第1步:爬取书籍信息..."); |
||||
|
try { |
||||
|
BookCommand bookCommand = new BookCommand(repository); |
||||
|
bookCommand.execute(); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("书籍爬取失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n第2步:爬取新闻信息..."); |
||||
|
try { |
||||
|
NewsCommand newsCommand = new NewsCommand(repository); |
||||
|
newsCommand.execute(); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("新闻爬取失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n第3步:爬取大学排名..."); |
||||
|
try { |
||||
|
CrawlRankingCommand rankingCommand = new CrawlRankingCommand(repository); |
||||
|
rankingCommand.execute(); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("大学排名爬取失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n第4步:爬取天气数据..."); |
||||
|
try { |
||||
|
WeatherCommand weatherCommand = new WeatherCommand(repository); |
||||
|
weatherCommand.execute(); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("天气数据爬取失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n========== 数据爬取完成,开始分析 ==========\n"); |
||||
|
|
||||
|
try { |
||||
|
BookAnalysisService bookService = new BookAnalysisService(); |
||||
|
if (!repository.getBooks().isEmpty()) { |
||||
|
bookService.analyze(repository.getBooks()); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("书籍分析失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
NewsAnalysisService newsService = new NewsAnalysisService(); |
||||
|
if (!repository.getNewsList().isEmpty()) { |
||||
|
newsService.analyze(repository.getNewsList()); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("新闻分析失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
RankingAnalysisService rankingService = new RankingAnalysisService(); |
||||
|
if (!repository.getRankings().isEmpty()) { |
||||
|
rankingService.analyze(repository.getRankings()); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("大学排名分析失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
WeatherAnalysisService weatherService = new WeatherAnalysisService(); |
||||
|
if (!repository.getWeatherList().isEmpty()) { |
||||
|
weatherService.analyze(repository.getWeatherList()); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("天气分析失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n========== 全部完成 =========="); |
||||
|
System.out.println("原始数据已保存到 output/ 目录"); |
||||
|
System.out.println("分析报告已保存到 reports/ 目录"); |
||||
|
System.out.println("图表已保存到 charts/ 目录"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "爬取并分析全部数据"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,32 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
import com.example.crawler.model.UniversityRank; |
||||
|
import com.example.crawler.repository.DataRepository; |
||||
|
import com.example.crawler.strategy.CrawlStrategy; |
||||
|
import com.example.crawler.strategy.UniversityRankCrawlStrategy; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class CrawlRankingCommand extends BaseCrawlCommand { |
||||
|
|
||||
|
public CrawlRankingCommand(DataRepository repository) { |
||||
|
super(repository); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
protected CrawlStrategy<?> getStrategy() { |
||||
|
return new UniversityRankCrawlStrategy(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
@SuppressWarnings("unchecked") |
||||
|
protected void saveToRepository(Object data) { |
||||
|
repository.saveRankings((List<UniversityRank>) data); |
||||
|
System.out.println("成功爬取 " + ((List<UniversityRank>) data).size() + " 条大学排名数据"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "爬取软科中国大学排名"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,19 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
/** |
||||
|
* 退出命令 |
||||
|
* // Command模式:退出命令
|
||||
|
*/ |
||||
|
public class ExitCommand implements Command { |
||||
|
|
||||
|
@Override |
||||
|
public void execute() { |
||||
|
System.out.println("\n=== 感谢使用数据爬取系统 ==="); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "退出"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,77 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
import com.example.crawler.controller.CrawlerController; |
||||
|
import com.example.crawler.repository.DataRepository; |
||||
|
import com.example.crawler.service.BookAnalysisService; |
||||
|
import com.example.crawler.service.NewsAnalysisService; |
||||
|
import com.example.crawler.service.RankingAnalysisService; |
||||
|
import com.example.crawler.service.WeatherAnalysisService; |
||||
|
|
||||
|
public class GenerateAllAnalysisCommand implements Command { |
||||
|
|
||||
|
private final DataRepository repository; |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public GenerateAllAnalysisCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
this.repository = controller.getRepository(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() { |
||||
|
System.out.println("\n========== 生成所有数据源分析报告 ==========\n"); |
||||
|
|
||||
|
try { |
||||
|
BookAnalysisService bookService = new BookAnalysisService(); |
||||
|
if (!repository.getBooks().isEmpty()) { |
||||
|
bookService.analyze(repository.getBooks()); |
||||
|
} else { |
||||
|
System.out.println("没有书籍数据,跳过书籍分析"); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("书籍分析失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
NewsAnalysisService newsService = new NewsAnalysisService(); |
||||
|
if (!repository.getNewsList().isEmpty()) { |
||||
|
newsService.analyze(repository.getNewsList()); |
||||
|
} else { |
||||
|
System.out.println("没有新闻数据,跳过新闻分析"); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("新闻分析失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
RankingAnalysisService rankingService = new RankingAnalysisService(); |
||||
|
if (!repository.getRankings().isEmpty()) { |
||||
|
rankingService.analyze(repository.getRankings()); |
||||
|
} else { |
||||
|
System.out.println("没有大学排名数据,跳过排名分析"); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("大学排名分析失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
WeatherAnalysisService weatherService = new WeatherAnalysisService(); |
||||
|
if (!repository.getWeatherList().isEmpty()) { |
||||
|
weatherService.analyze(repository.getWeatherList()); |
||||
|
} else { |
||||
|
System.out.println("没有天气数据,跳过天气分析"); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("天气分析失败: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n========== 分析完成 =========="); |
||||
|
System.out.println("报告已保存到 reports/ 目录"); |
||||
|
System.out.println("图表已保存到 charts/ 目录"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "生成所有分析报告"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,32 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
import com.example.crawler.model.News; |
||||
|
import com.example.crawler.repository.DataRepository; |
||||
|
import com.example.crawler.strategy.CrawlStrategy; |
||||
|
import com.example.crawler.strategy.NewsCrawlStrategy; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class NewsCommand extends BaseCrawlCommand { |
||||
|
|
||||
|
public NewsCommand(DataRepository repository) { |
||||
|
super(repository); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
protected CrawlStrategy<?> getStrategy() { |
||||
|
return new NewsCrawlStrategy(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
@SuppressWarnings("unchecked") |
||||
|
protected void saveToRepository(Object data) { |
||||
|
repository.saveNewsList((List<News>) data); |
||||
|
System.out.println("成功爬取 " + ((List<News>) data).size() + " 条新闻"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "爬取新浪国内新闻"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,74 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
import java.time.LocalDateTime; |
||||
|
import java.time.format.DateTimeFormatter; |
||||
|
import java.util.List; |
||||
|
|
||||
|
import com.example.crawler.constant.CrawlerConstants; |
||||
|
import com.example.crawler.controller.CrawlerController; |
||||
|
import com.example.crawler.model.Book; |
||||
|
import com.example.crawler.model.News; |
||||
|
import com.example.crawler.model.UniversityRank; |
||||
|
import com.example.crawler.model.Weather; |
||||
|
import com.example.crawler.util.JsonUtil; |
||||
|
|
||||
|
public class SaveCommand implements Command { |
||||
|
|
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public SaveCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() { |
||||
|
System.out.println("\n=== 开始保存数据 ==="); |
||||
|
|
||||
|
try { |
||||
|
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); |
||||
|
|
||||
|
// 保存书籍数据
|
||||
|
List<Book> books = controller.getBooks(); |
||||
|
if (books != null && !books.isEmpty()) { |
||||
|
String bookFileName = CrawlerConstants.OUTPUT_DIR + "/books_" + timestamp + ".json"; |
||||
|
JsonUtil.saveListToJsonFile(books, bookFileName); |
||||
|
System.out.println("书籍数据已保存到: " + bookFileName); |
||||
|
} |
||||
|
|
||||
|
// 保存新闻数据
|
||||
|
List<News> newsList = controller.getNewsList(); |
||||
|
if (newsList != null && !newsList.isEmpty()) { |
||||
|
String newsFileName = CrawlerConstants.OUTPUT_DIR + "/news_" + timestamp + ".json"; |
||||
|
JsonUtil.saveListToJsonFile(newsList, newsFileName); |
||||
|
System.out.println("新闻数据已保存到: " + newsFileName); |
||||
|
} |
||||
|
|
||||
|
// 保存大学排名数据
|
||||
|
List<UniversityRank> universityRankList = controller.getUniversityRankList(); |
||||
|
if (universityRankList != null && !universityRankList.isEmpty()) { |
||||
|
String rankingFileName = CrawlerConstants.OUTPUT_DIR + "/university_ranking_" + timestamp + ".json"; |
||||
|
JsonUtil.saveListToJsonFile(universityRankList, rankingFileName); |
||||
|
System.out.println("大学排名数据已保存到: " + rankingFileName); |
||||
|
} |
||||
|
|
||||
|
// 保存天气数据
|
||||
|
List<Weather> weatherList = controller.getWeatherList(); |
||||
|
if (weatherList != null && !weatherList.isEmpty()) { |
||||
|
String weatherFileName = CrawlerConstants.OUTPUT_DIR + "/weather_" + timestamp + ".json"; |
||||
|
JsonUtil.saveListToJsonFile(weatherList, weatherFileName); |
||||
|
System.out.println("天气数据已保存到: " + weatherFileName); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n=== 数据保存完成 ==="); |
||||
|
|
||||
|
} catch (Exception e) { |
||||
|
System.err.println("保存数据失败: " + e.getMessage()); |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "保存当前数据到文件"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,32 @@ |
|||||
|
package com.example.crawler.command; |
||||
|
|
||||
|
import com.example.crawler.model.Weather; |
||||
|
import com.example.crawler.repository.DataRepository; |
||||
|
import com.example.crawler.strategy.CrawlStrategy; |
||||
|
import com.example.crawler.strategy.WeatherCrawlStrategy; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class WeatherCommand extends BaseCrawlCommand { |
||||
|
|
||||
|
public WeatherCommand(DataRepository repository) { |
||||
|
super(repository); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
protected CrawlStrategy<?> getStrategy() { |
||||
|
return new WeatherCrawlStrategy(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
@SuppressWarnings("unchecked") |
||||
|
protected void saveToRepository(Object data) { |
||||
|
repository.saveWeatherList((List<Weather>) data); |
||||
|
System.out.println("成功爬取 " + ((List<Weather>) data).size() + " 个城市的天气信息"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "爬取天气数据"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,31 @@ |
|||||
|
package com.example.crawler.constant; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CrawlerConstants { |
||||
|
|
||||
|
public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"; |
||||
|
public static final String REFERER = "https://www.baidu.com"; |
||||
|
|
||||
|
public static final int TIMEOUT_MS = 10000; |
||||
|
public static final int MAX_RETRIES = 3; |
||||
|
public static final long DELAY_MS = 3000; |
||||
|
|
||||
|
public static final String URL_BOOKS = "https://books.toscrape.com/"; |
||||
|
public static final String URL_NEWS = "https://news.sina.com.cn/china/"; |
||||
|
public static final String URL_RANKING = "https://www.shanghairanking.cn/rankings/bcur/202310"; |
||||
|
public static final String URL_WEATHER_API = "https://api.open-meteo.com/v1/forecast"; |
||||
|
|
||||
|
public static final String OUTPUT_DIR = "output"; |
||||
|
public static final String REPORTS_DIR = "reports"; |
||||
|
public static final String CHARTS_DIR = "charts"; |
||||
|
|
||||
|
public static final Map<String, double[]> CITY_COORDINATES; |
||||
|
static { |
||||
|
CITY_COORDINATES = new HashMap<>(); |
||||
|
CITY_COORDINATES.put("北京", new double[]{39.9042, 116.4074}); |
||||
|
CITY_COORDINATES.put("上海", new double[]{31.2304, 121.4737}); |
||||
|
CITY_COORDINATES.put("广州", new double[]{23.1291, 113.2644}); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,90 @@ |
|||||
|
package com.example.crawler.controller; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
import com.example.crawler.command.BookCommand; |
||||
|
import com.example.crawler.command.Command; |
||||
|
import com.example.crawler.command.CrawlAllCommand; |
||||
|
import com.example.crawler.command.CrawlAndAnalyzeAllCommand; |
||||
|
import com.example.crawler.command.CrawlRankingCommand; |
||||
|
import com.example.crawler.command.ExitCommand; |
||||
|
import com.example.crawler.command.GenerateAllAnalysisCommand; |
||||
|
import com.example.crawler.command.NewsCommand; |
||||
|
import com.example.crawler.command.SaveCommand; |
||||
|
import com.example.crawler.command.WeatherCommand; |
||||
|
import com.example.crawler.model.Book; |
||||
|
import com.example.crawler.model.News; |
||||
|
import com.example.crawler.model.UniversityRank; |
||||
|
import com.example.crawler.model.Weather; |
||||
|
import com.example.crawler.repository.DataRepository; |
||||
|
import com.example.crawler.view.CrawlerView; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
|
||||
|
private final CrawlerView view; |
||||
|
private final Map<Integer, Command> commandMap; |
||||
|
private final DataRepository repository; |
||||
|
|
||||
|
public CrawlerController() { |
||||
|
this.view = new CrawlerView(); |
||||
|
this.repository = DataRepository.getInstance(); |
||||
|
this.commandMap = new HashMap<>(); |
||||
|
initCommands(); |
||||
|
} |
||||
|
|
||||
|
private void initCommands() { |
||||
|
commandMap.put(1, new BookCommand(repository)); |
||||
|
commandMap.put(2, new NewsCommand(repository)); |
||||
|
commandMap.put(3, new CrawlRankingCommand(repository)); |
||||
|
commandMap.put(4, new WeatherCommand(repository)); |
||||
|
commandMap.put(5, new CrawlAllCommand(this)); |
||||
|
commandMap.put(6, new SaveCommand(this)); |
||||
|
commandMap.put(7, new GenerateAllAnalysisCommand(this)); |
||||
|
commandMap.put(8, new CrawlAndAnalyzeAllCommand(this)); |
||||
|
commandMap.put(9, new ExitCommand()); |
||||
|
} |
||||
|
|
||||
|
public void start() { |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
while (true) { |
||||
|
view.showMenu(); |
||||
|
|
||||
|
int choice = view.getInput(scanner); |
||||
|
|
||||
|
Command command = commandMap.get(choice); |
||||
|
if (command != null) { |
||||
|
command.execute(); |
||||
|
} else { |
||||
|
view.showError("无效的选择,请输入1-9之间的数字"); |
||||
|
} |
||||
|
|
||||
|
if (choice != 9) { |
||||
|
view.pause(scanner); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public List<Book> getBooks() { |
||||
|
return repository.getBooks(); |
||||
|
} |
||||
|
|
||||
|
public List<News> getNewsList() { |
||||
|
return repository.getNewsList(); |
||||
|
} |
||||
|
|
||||
|
public List<UniversityRank> getUniversityRankList() { |
||||
|
return repository.getRankings(); |
||||
|
} |
||||
|
|
||||
|
public List<Weather> getWeatherList() { |
||||
|
return repository.getWeatherList(); |
||||
|
} |
||||
|
|
||||
|
public DataRepository getRepository() { |
||||
|
return repository; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,16 @@ |
|||||
|
package com.example.crawler.exception; |
||||
|
|
||||
|
/** |
||||
|
* 爬虫异常基类 |
||||
|
* 所有爬虫相关异常都继承此类 |
||||
|
*/ |
||||
|
public class CrawlException extends Exception { |
||||
|
|
||||
|
public CrawlException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public CrawlException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,16 @@ |
|||||
|
package com.example.crawler.exception; |
||||
|
|
||||
|
/** |
||||
|
* 数据保存异常 |
||||
|
* 用于处理文件写入失败、JSON序列化失败等数据保存相关错误 |
||||
|
*/ |
||||
|
public class DataSaveException extends CrawlException { |
||||
|
|
||||
|
public DataSaveException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public DataSaveException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,16 @@ |
|||||
|
package com.example.crawler.exception; |
||||
|
|
||||
|
/** |
||||
|
* 网络异常 |
||||
|
* 用于处理HTTP请求失败、连接超时等网络相关错误 |
||||
|
*/ |
||||
|
public class NetworkException extends CrawlException { |
||||
|
|
||||
|
public NetworkException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public NetworkException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,16 @@ |
|||||
|
package com.example.crawler.exception; |
||||
|
|
||||
|
/** |
||||
|
* 解析异常 |
||||
|
* 用于处理HTML解析失败、JSON解析失败等数据解析相关错误 |
||||
|
*/ |
||||
|
public class ParseException extends CrawlException { |
||||
|
|
||||
|
public ParseException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public ParseException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,65 @@ |
|||||
|
package com.example.crawler.model; |
||||
|
|
||||
|
/** |
||||
|
* 书籍数据模型 |
||||
|
* 存储toscrape.com网站的书籍信息 |
||||
|
*/ |
||||
|
public class Book { |
||||
|
|
||||
|
private String title; |
||||
|
private String price; |
||||
|
private String availability; |
||||
|
private String rating; |
||||
|
|
||||
|
public Book() { |
||||
|
} |
||||
|
|
||||
|
public Book(String title, String price, String availability, String rating) { |
||||
|
this.title = title; |
||||
|
this.price = price; |
||||
|
this.availability = availability; |
||||
|
this.rating = rating; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public String getPrice() { |
||||
|
return price; |
||||
|
} |
||||
|
|
||||
|
public void setPrice(String price) { |
||||
|
this.price = price; |
||||
|
} |
||||
|
|
||||
|
public String getAvailability() { |
||||
|
return availability; |
||||
|
} |
||||
|
|
||||
|
public void setAvailability(String availability) { |
||||
|
this.availability = availability; |
||||
|
} |
||||
|
|
||||
|
public String getRating() { |
||||
|
return rating; |
||||
|
} |
||||
|
|
||||
|
public void setRating(String rating) { |
||||
|
this.rating = rating; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Book{" + |
||||
|
"title='" + title + '\'' + |
||||
|
", price='" + price + '\'' + |
||||
|
", availability='" + availability + '\'' + |
||||
|
", rating='" + rating + '\'' + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,54 @@ |
|||||
|
package com.example.crawler.model; |
||||
|
|
||||
|
/** |
||||
|
* 新闻数据模型 |
||||
|
* 存储新浪新闻的国内新闻信息 |
||||
|
*/ |
||||
|
public class News { |
||||
|
|
||||
|
private String title; |
||||
|
private String publishTime; |
||||
|
private String url; |
||||
|
|
||||
|
public News() { |
||||
|
} |
||||
|
|
||||
|
public News(String title, String publishTime, String url) { |
||||
|
this.title = title; |
||||
|
this.publishTime = publishTime; |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public String getPublishTime() { |
||||
|
return publishTime; |
||||
|
} |
||||
|
|
||||
|
public void setPublishTime(String publishTime) { |
||||
|
this.publishTime = publishTime; |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
public void setUrl(String url) { |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "News{" + |
||||
|
"title='" + title + '\'' + |
||||
|
", publishTime='" + publishTime + '\'' + |
||||
|
", url='" + url + '\'' + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,76 @@ |
|||||
|
package com.example.crawler.model; |
||||
|
|
||||
|
/** |
||||
|
* 大学排名数据模型 |
||||
|
* 存储软科中国大学排名信息 |
||||
|
*/ |
||||
|
public class UniversityRank { |
||||
|
|
||||
|
private Integer rank; |
||||
|
private String universityName; |
||||
|
private String totalScore; |
||||
|
private String province; |
||||
|
private String category; |
||||
|
|
||||
|
public UniversityRank() { |
||||
|
} |
||||
|
|
||||
|
public UniversityRank(Integer rank, String universityName, String totalScore, String province, String category) { |
||||
|
this.rank = rank; |
||||
|
this.universityName = universityName; |
||||
|
this.totalScore = totalScore; |
||||
|
this.province = province; |
||||
|
this.category = category; |
||||
|
} |
||||
|
|
||||
|
public Integer getRank() { |
||||
|
return rank; |
||||
|
} |
||||
|
|
||||
|
public void setRank(Integer rank) { |
||||
|
this.rank = rank; |
||||
|
} |
||||
|
|
||||
|
public String getUniversityName() { |
||||
|
return universityName; |
||||
|
} |
||||
|
|
||||
|
public void setUniversityName(String universityName) { |
||||
|
this.universityName = universityName; |
||||
|
} |
||||
|
|
||||
|
public String getTotalScore() { |
||||
|
return totalScore; |
||||
|
} |
||||
|
|
||||
|
public void setTotalScore(String totalScore) { |
||||
|
this.totalScore = totalScore; |
||||
|
} |
||||
|
|
||||
|
public String getProvince() { |
||||
|
return province; |
||||
|
} |
||||
|
|
||||
|
public void setProvince(String province) { |
||||
|
this.province = province; |
||||
|
} |
||||
|
|
||||
|
public String getCategory() { |
||||
|
return category; |
||||
|
} |
||||
|
|
||||
|
public void setCategory(String category) { |
||||
|
this.category = category; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "UniversityRank{" + |
||||
|
"rank=" + rank + |
||||
|
", universityName='" + universityName + '\'' + |
||||
|
", totalScore='" + totalScore + '\'' + |
||||
|
", province='" + province + '\'' + |
||||
|
", category='" + category + '\'' + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,140 @@ |
|||||
|
package com.example.crawler.model; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
/** |
||||
|
* 天气数据模型 |
||||
|
* 存储 Open-Meteo API 的城市天气信息 |
||||
|
* 数据来源:Open-Meteo (CC BY 4.0) |
||||
|
*/ |
||||
|
public class Weather { |
||||
|
|
||||
|
private String cityName; |
||||
|
private double temperature; |
||||
|
private double humidity; |
||||
|
private double windSpeed; |
||||
|
private String weatherCode; |
||||
|
private List<String> hourlyTimes; |
||||
|
private List<Double> hourlyTemperatures; |
||||
|
private List<Integer> hourlyHumidities; |
||||
|
private List<Double> hourlyWindSpeeds; |
||||
|
|
||||
|
public Weather() { |
||||
|
this.hourlyTimes = new ArrayList<>(); |
||||
|
this.hourlyTemperatures = new ArrayList<>(); |
||||
|
this.hourlyHumidities = new ArrayList<>(); |
||||
|
this.hourlyWindSpeeds = new ArrayList<>(); |
||||
|
} |
||||
|
|
||||
|
public Weather(String cityName, double temperature, double humidity, double windSpeed, String weatherCode) { |
||||
|
this.cityName = cityName; |
||||
|
this.temperature = temperature; |
||||
|
this.humidity = humidity; |
||||
|
this.windSpeed = windSpeed; |
||||
|
this.weatherCode = weatherCode; |
||||
|
this.hourlyTimes = new ArrayList<>(); |
||||
|
this.hourlyTemperatures = new ArrayList<>(); |
||||
|
this.hourlyHumidities = new ArrayList<>(); |
||||
|
this.hourlyWindSpeeds = new ArrayList<>(); |
||||
|
} |
||||
|
|
||||
|
public String getCityName() { |
||||
|
return cityName; |
||||
|
} |
||||
|
|
||||
|
public void setCityName(String cityName) { |
||||
|
this.cityName = cityName; |
||||
|
} |
||||
|
|
||||
|
public double getTemperature() { |
||||
|
return temperature; |
||||
|
} |
||||
|
|
||||
|
public void setTemperature(double temperature) { |
||||
|
this.temperature = temperature; |
||||
|
} |
||||
|
|
||||
|
public double getHumidity() { |
||||
|
return humidity; |
||||
|
} |
||||
|
|
||||
|
public void setHumidity(double humidity) { |
||||
|
this.humidity = humidity; |
||||
|
} |
||||
|
|
||||
|
public double getWindSpeed() { |
||||
|
return windSpeed; |
||||
|
} |
||||
|
|
||||
|
public void setWindSpeed(double windSpeed) { |
||||
|
this.windSpeed = windSpeed; |
||||
|
} |
||||
|
|
||||
|
public String getWeatherCode() { |
||||
|
return weatherCode; |
||||
|
} |
||||
|
|
||||
|
public void setWeatherCode(String weatherCode) { |
||||
|
this.weatherCode = weatherCode; |
||||
|
} |
||||
|
|
||||
|
public List<String> getHourlyTimes() { |
||||
|
return hourlyTimes; |
||||
|
} |
||||
|
|
||||
|
public void setHourlyTimes(List<String> hourlyTimes) { |
||||
|
this.hourlyTimes = hourlyTimes; |
||||
|
} |
||||
|
|
||||
|
public List<Double> getHourlyTemperatures() { |
||||
|
return hourlyTemperatures; |
||||
|
} |
||||
|
|
||||
|
public void setHourlyTemperatures(List<Double> hourlyTemperatures) { |
||||
|
this.hourlyTemperatures = hourlyTemperatures; |
||||
|
} |
||||
|
|
||||
|
public List<Integer> getHourlyHumidities() { |
||||
|
return hourlyHumidities; |
||||
|
} |
||||
|
|
||||
|
public void setHourlyHumidities(List<Integer> hourlyHumidities) { |
||||
|
this.hourlyHumidities = hourlyHumidities; |
||||
|
} |
||||
|
|
||||
|
public List<Double> getHourlyWindSpeeds() { |
||||
|
return hourlyWindSpeeds; |
||||
|
} |
||||
|
|
||||
|
public void setHourlyWindSpeeds(List<Double> hourlyWindSpeeds) { |
||||
|
this.hourlyWindSpeeds = hourlyWindSpeeds; |
||||
|
} |
||||
|
|
||||
|
public String getWeatherDescription() { |
||||
|
if (weatherCode == null) return "未知"; |
||||
|
switch (weatherCode) { |
||||
|
case "0": return "晴"; |
||||
|
case "1": case "2": case "3": return "多云"; |
||||
|
case "45": case "48": return "雾"; |
||||
|
case "51": case "53": case "55": return "小毛毛雨"; |
||||
|
case "61": case "63": case "65": return "小雨"; |
||||
|
case "80": case "81": case "82": return "阵雨"; |
||||
|
case "95": return "雷暴"; |
||||
|
case "96": case "99": return "雷暴加冰雹"; |
||||
|
default: return "未知"; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Weather{" + |
||||
|
"cityName='" + cityName + '\'' + |
||||
|
", temperature=" + temperature + |
||||
|
", humidity=" + humidity + |
||||
|
", windSpeed=" + windSpeed + |
||||
|
", weatherCode='" + weatherCode + '\'' + |
||||
|
", weather='" + getWeatherDescription() + '\'' + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,75 @@ |
|||||
|
package com.example.crawler.repository; |
||||
|
|
||||
|
import com.example.crawler.model.Book; |
||||
|
import com.example.crawler.model.News; |
||||
|
import com.example.crawler.model.UniversityRank; |
||||
|
import com.example.crawler.model.Weather; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class DataRepository { |
||||
|
|
||||
|
private static DataRepository instance; |
||||
|
|
||||
|
private List<Book> books; |
||||
|
private List<News> newsList; |
||||
|
private List<UniversityRank> rankings; |
||||
|
private List<Weather> weatherList; |
||||
|
|
||||
|
private DataRepository() { |
||||
|
this.books = new ArrayList<>(); |
||||
|
this.newsList = new ArrayList<>(); |
||||
|
this.rankings = new ArrayList<>(); |
||||
|
this.weatherList = new ArrayList<>(); |
||||
|
} |
||||
|
|
||||
|
public static synchronized DataRepository getInstance() { |
||||
|
if (instance == null) { |
||||
|
instance = new DataRepository(); |
||||
|
} |
||||
|
return instance; |
||||
|
} |
||||
|
|
||||
|
public List<Book> getBooks() { |
||||
|
return new ArrayList<>(books); |
||||
|
} |
||||
|
|
||||
|
public void saveBooks(List<Book> books) { |
||||
|
this.books.clear(); |
||||
|
this.books.addAll(books); |
||||
|
} |
||||
|
|
||||
|
public List<News> getNewsList() { |
||||
|
return new ArrayList<>(newsList); |
||||
|
} |
||||
|
|
||||
|
public void saveNewsList(List<News> newsList) { |
||||
|
this.newsList.clear(); |
||||
|
this.newsList.addAll(newsList); |
||||
|
} |
||||
|
|
||||
|
public List<UniversityRank> getRankings() { |
||||
|
return new ArrayList<>(rankings); |
||||
|
} |
||||
|
|
||||
|
public void saveRankings(List<UniversityRank> rankings) { |
||||
|
this.rankings.clear(); |
||||
|
this.rankings.addAll(rankings); |
||||
|
} |
||||
|
|
||||
|
public List<Weather> getWeatherList() { |
||||
|
return new ArrayList<>(weatherList); |
||||
|
} |
||||
|
|
||||
|
public void saveWeatherList(List<Weather> weatherList) { |
||||
|
this.weatherList.clear(); |
||||
|
this.weatherList.addAll(weatherList); |
||||
|
} |
||||
|
|
||||
|
public void clearAll() { |
||||
|
books.clear(); |
||||
|
newsList.clear(); |
||||
|
rankings.clear(); |
||||
|
weatherList.clear(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,171 @@ |
|||||
|
package com.example.crawler.service; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.io.PrintWriter; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
import com.example.crawler.chart.ChartGenerator; |
||||
|
import com.example.crawler.constant.CrawlerConstants; |
||||
|
import com.example.crawler.model.Book; |
||||
|
import com.example.crawler.util.DataCleaner; |
||||
|
|
||||
|
public class BookAnalysisService { |
||||
|
|
||||
|
static { |
||||
|
File dir = new File(CrawlerConstants.REPORTS_DIR); |
||||
|
if (!dir.exists()) { |
||||
|
dir.mkdirs(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void analyze(List<Book> books) { |
||||
|
if (books == null || books.isEmpty()) { |
||||
|
System.out.println("没有书籍数据可分析"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n========== 书籍数据分析 =========="); |
||||
|
System.out.println("共分析 " + books.size() + " 本书\n"); |
||||
|
|
||||
|
analyzePriceDistribution(books); |
||||
|
analyzeRatingDistribution(books); |
||||
|
analyzeStockStatus(books); |
||||
|
|
||||
|
generateReport(books); |
||||
|
} |
||||
|
|
||||
|
private void analyzePriceDistribution(List<Book> books) { |
||||
|
System.out.println("【价格分析】"); |
||||
|
List<Double> prices = new ArrayList<>(); |
||||
|
for (Book book : books) { |
||||
|
double price = DataCleaner.cleanPrice(book.getPrice()); |
||||
|
if (price > 0) { |
||||
|
prices.add(price); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (prices.isEmpty()) { |
||||
|
System.out.println("无法获取有效价格数据"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
double maxPrice = prices.stream().mapToDouble(Double::doubleValue).max().orElse(0); |
||||
|
double minPrice = prices.stream().mapToDouble(Double::doubleValue).min().orElse(0); |
||||
|
double avgPrice = prices.stream().mapToDouble(Double::doubleValue).average().orElse(0); |
||||
|
|
||||
|
System.out.println("最高价: £" + String.format("%.2f", maxPrice)); |
||||
|
System.out.println("最低价: £" + String.format("%.2f", minPrice)); |
||||
|
System.out.println("平均价: £" + String.format("%.2f", avgPrice)); |
||||
|
|
||||
|
Map<String, Integer> priceRanges = new HashMap<>(); |
||||
|
String[] ranges = {"0-10", "10-20", "20-30", "30-40", "40-50", "50+"}; |
||||
|
for (String range : ranges) { |
||||
|
priceRanges.put(range, 0); |
||||
|
} |
||||
|
|
||||
|
for (Double price : prices) { |
||||
|
if (price < 10) priceRanges.put("0-10", priceRanges.get("0-10") + 1); |
||||
|
else if (price < 20) priceRanges.put("10-20", priceRanges.get("10-20") + 1); |
||||
|
else if (price < 30) priceRanges.put("20-30", priceRanges.get("20-30") + 1); |
||||
|
else if (price < 40) priceRanges.put("30-40", priceRanges.get("30-40") + 1); |
||||
|
else if (price < 50) priceRanges.put("40-50", priceRanges.get("40-50") + 1); |
||||
|
else priceRanges.put("50+", priceRanges.get("50+") + 1); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n价格区间分布:"); |
||||
|
for (Map.Entry<String, Integer> entry : priceRanges.entrySet()) { |
||||
|
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本"); |
||||
|
} |
||||
|
|
||||
|
ChartGenerator.generatePriceHistogram(priceRanges, "price_histogram.png"); |
||||
|
} |
||||
|
|
||||
|
private void analyzeRatingDistribution(List<Book> books) { |
||||
|
System.out.println("\n【评分分析】"); |
||||
|
Map<String, Integer> ratingCounts = new HashMap<>(); |
||||
|
ratingCounts.put("5星", 0); |
||||
|
ratingCounts.put("4星", 0); |
||||
|
ratingCounts.put("3星", 0); |
||||
|
ratingCounts.put("2星", 0); |
||||
|
ratingCounts.put("1星", 0); |
||||
|
ratingCounts.put("未知", 0); |
||||
|
|
||||
|
for (Book book : books) { |
||||
|
int rating = DataCleaner.cleanRating(book.getRating()); |
||||
|
switch (rating) { |
||||
|
case 5: ratingCounts.put("5星", ratingCounts.get("5星") + 1); break; |
||||
|
case 4: ratingCounts.put("4星", ratingCounts.get("4星") + 1); break; |
||||
|
case 3: ratingCounts.put("3星", ratingCounts.get("3星") + 1); break; |
||||
|
case 2: ratingCounts.put("2星", ratingCounts.get("2星") + 1); break; |
||||
|
case 1: ratingCounts.put("1星", ratingCounts.get("1星") + 1); break; |
||||
|
default: ratingCounts.put("未知", ratingCounts.get("未知") + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
int total = books.size(); |
||||
|
System.out.println("评分分布:"); |
||||
|
for (Map.Entry<String, Integer> entry : ratingCounts.entrySet()) { |
||||
|
double percentage = (entry.getValue() * 100.0) / total; |
||||
|
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本 (" + String.format("%.1f", percentage) + "%)"); |
||||
|
} |
||||
|
|
||||
|
ChartGenerator.generateRatingPieChart(ratingCounts, "rating_pie.png"); |
||||
|
} |
||||
|
|
||||
|
private void analyzeStockStatus(List<Book> books) { |
||||
|
System.out.println("\n【库存分析】"); |
||||
|
int inStock = 0; |
||||
|
int outOfStock = 0; |
||||
|
|
||||
|
for (Book book : books) { |
||||
|
String availability = book.getAvailability(); |
||||
|
if (availability != null && availability.toLowerCase().contains("in stock")) { |
||||
|
inStock++; |
||||
|
} else { |
||||
|
outOfStock++; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println("有库存: " + inStock + " 本"); |
||||
|
System.out.println("缺货: " + outOfStock + " 本"); |
||||
|
} |
||||
|
|
||||
|
private void generateReport(List<Book> books) { |
||||
|
String fileName = CrawlerConstants.REPORTS_DIR + "/book_analysis_report.txt"; |
||||
|
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
||||
|
writer.println("========== 书籍数据分析报告 =========="); |
||||
|
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
||||
|
writer.println("分析书籍总数: " + books.size()); |
||||
|
writer.println(); |
||||
|
|
||||
|
List<Double> prices = books.stream() |
||||
|
.map(b -> DataCleaner.cleanPrice(b.getPrice())) |
||||
|
.filter(p -> p > 0) |
||||
|
.collect(Collectors.toList()); |
||||
|
|
||||
|
if (!prices.isEmpty()) { |
||||
|
writer.println("【价格统计】"); |
||||
|
writer.println("最高价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).max().orElse(0))); |
||||
|
writer.println("最低价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).min().orElse(0))); |
||||
|
writer.println("平均价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).average().orElse(0))); |
||||
|
writer.println(); |
||||
|
} |
||||
|
|
||||
|
writer.println("【库存统计】"); |
||||
|
long inStock = books.stream().filter(b -> b.getAvailability() != null && b.getAvailability().toLowerCase().contains("in stock")).count(); |
||||
|
writer.println("有库存: " + inStock + " 本"); |
||||
|
writer.println("缺货: " + (books.size() - inStock) + " 本"); |
||||
|
|
||||
|
writer.println("\n报告生成完成"); |
||||
|
System.out.println("\n报告已保存: " + fileName); |
||||
|
} catch (IOException e) { |
||||
|
System.err.println("生成报告失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,138 @@ |
|||||
|
package com.example.crawler.service; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.io.PrintWriter; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
import com.example.crawler.chart.ChartGenerator; |
||||
|
import com.example.crawler.constant.CrawlerConstants; |
||||
|
import com.example.crawler.model.News; |
||||
|
import com.example.crawler.util.DataCleaner; |
||||
|
|
||||
|
public class NewsAnalysisService { |
||||
|
|
||||
|
static { |
||||
|
File dir = new File(CrawlerConstants.REPORTS_DIR); |
||||
|
if (!dir.exists()) { |
||||
|
dir.mkdirs(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void analyze(List<News> newsList) { |
||||
|
if (newsList == null || newsList.isEmpty()) { |
||||
|
System.out.println("没有新闻数据可分析"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n========== 新闻数据分析 =========="); |
||||
|
System.out.println("共分析 " + newsList.size() + " 条新闻\n"); |
||||
|
|
||||
|
analyzeTimeDistribution(newsList); |
||||
|
analyzeKeywords(newsList); |
||||
|
|
||||
|
generateReport(newsList); |
||||
|
} |
||||
|
|
||||
|
private void analyzeTimeDistribution(List<News> newsList) { |
||||
|
System.out.println("【发布时间分布】"); |
||||
|
Map<Integer, Integer> hourDistribution = new HashMap<>(); |
||||
|
for (int i = 0; i < 24; i++) { |
||||
|
hourDistribution.put(i, 0); |
||||
|
} |
||||
|
|
||||
|
for (News news : newsList) { |
||||
|
try { |
||||
|
java.time.LocalDateTime dateTime = DataCleaner.cleanNewsTime(news.getPublishTime()); |
||||
|
int hour = DataCleaner.extractHour(dateTime); |
||||
|
hourDistribution.put(hour, hourDistribution.get(hour) + 1); |
||||
|
} catch (Exception e) { |
||||
|
// 忽略解析失败的数据
|
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n按小时统计:"); |
||||
|
for (int i = 0; i < 24; i++) { |
||||
|
int count = hourDistribution.get(i); |
||||
|
String bar = "*".repeat(Math.max(1, count)); |
||||
|
System.out.printf(" %02d:00 - %02d:00: %3d %s%n", i, (i + 1) % 24, count, bar); |
||||
|
} |
||||
|
|
||||
|
int peakHour = 0; |
||||
|
int peakCount = 0; |
||||
|
for (Map.Entry<Integer, Integer> entry : hourDistribution.entrySet()) { |
||||
|
if (entry.getValue() > peakCount) { |
||||
|
peakCount = entry.getValue(); |
||||
|
peakHour = entry.getKey(); |
||||
|
} |
||||
|
} |
||||
|
System.out.println("\n高峰时段: " + String.format("%02d:00", peakHour) + " (发布 " + peakCount + " 条新闻)"); |
||||
|
|
||||
|
ChartGenerator.generateNewsTimeTrend(hourDistribution, "news_time_trend.png"); |
||||
|
} |
||||
|
|
||||
|
private void analyzeKeywords(List<News> newsList) { |
||||
|
System.out.println("\n【关键词分析】"); |
||||
|
Map<String, Integer> allWords = new HashMap<>(); |
||||
|
|
||||
|
for (News news : newsList) { |
||||
|
String title = DataCleaner.cleanTitle(news.getTitle()); |
||||
|
String[] words = DataCleaner.extractWords(title); |
||||
|
Map<String, Integer> wordFreq = DataCleaner.countWordFrequency(words); |
||||
|
for (Map.Entry<String, Integer> entry : wordFreq.entrySet()) { |
||||
|
allWords.put(entry.getKey(), allWords.getOrDefault(entry.getKey(), 0) + entry.getValue()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
List<Map.Entry<String, Integer>> sortedWords = allWords.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.limit(20) |
||||
|
.collect(Collectors.toList()); |
||||
|
|
||||
|
System.out.println("\n高频词 TOP 10:"); |
||||
|
for (int i = 0; i < Math.min(10, sortedWords.size()); i++) { |
||||
|
Map.Entry<String, Integer> entry = sortedWords.get(i); |
||||
|
System.out.printf(" %2d. %s: %d%n", i + 1, entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
|
||||
|
Map<String, Integer> top10 = sortedWords.stream() |
||||
|
.limit(10) |
||||
|
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
||||
|
|
||||
|
ChartGenerator.generateWordFrequencyBarChart(top10, "news_top_words.png"); |
||||
|
} |
||||
|
|
||||
|
private void generateReport(List<News> newsList) { |
||||
|
String fileName = CrawlerConstants.REPORTS_DIR + "/news_analysis_report.txt"; |
||||
|
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
||||
|
writer.println("========== 新闻数据分析报告 =========="); |
||||
|
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
||||
|
writer.println("分析新闻总数: " + newsList.size()); |
||||
|
writer.println(); |
||||
|
|
||||
|
Map<Integer, Integer> hourDistribution = new HashMap<>(); |
||||
|
for (int i = 0; i < 24; i++) hourDistribution.put(i, 0); |
||||
|
for (News news : newsList) { |
||||
|
try { |
||||
|
int hour = DataCleaner.extractHour(DataCleaner.cleanNewsTime(news.getPublishTime())); |
||||
|
hourDistribution.put(hour, hourDistribution.get(hour) + 1); |
||||
|
} catch (Exception e) {} |
||||
|
} |
||||
|
|
||||
|
writer.println("【发布时间分布】"); |
||||
|
for (int i = 0; i < 24; i++) { |
||||
|
writer.println(String.format(" %02d:00 - %02d:00: %d 条", i, (i + 1) % 24, hourDistribution.get(i))); |
||||
|
} |
||||
|
|
||||
|
writer.println("\n报告生成完成"); |
||||
|
System.out.println("\n报告已保存: " + fileName); |
||||
|
} catch (IOException e) { |
||||
|
System.err.println("生成报告失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,189 @@ |
|||||
|
package com.example.crawler.service; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.io.PrintWriter; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
import com.example.crawler.chart.ChartGenerator; |
||||
|
import com.example.crawler.constant.CrawlerConstants; |
||||
|
import com.example.crawler.model.UniversityRank; |
||||
|
import com.example.crawler.util.DataCleaner; |
||||
|
|
||||
|
public class RankingAnalysisService { |
||||
|
|
||||
|
static { |
||||
|
File dir = new File(CrawlerConstants.REPORTS_DIR); |
||||
|
if (!dir.exists()) { |
||||
|
dir.mkdirs(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void analyze(List<UniversityRank> ranks) { |
||||
|
if (ranks == null || ranks.isEmpty()) { |
||||
|
System.out.println("没有大学排名数据可分析"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n========== 大学排名数据分析 =========="); |
||||
|
System.out.println("共分析 " + ranks.size() + " 所大学\n"); |
||||
|
|
||||
|
analyzeProvinceDistribution(ranks); |
||||
|
analyzeScoreDistribution(ranks); |
||||
|
analyzeCategoryDistribution(ranks); |
||||
|
|
||||
|
generateReport(ranks); |
||||
|
} |
||||
|
|
||||
|
private void analyzeProvinceDistribution(List<UniversityRank> ranks) { |
||||
|
System.out.println("【各省份上榜大学数量】"); |
||||
|
Map<String, Integer> provinceCounts = new HashMap<>(); |
||||
|
|
||||
|
for (UniversityRank rank : ranks) { |
||||
|
String province = rank.getProvince(); |
||||
|
if (province != null && !province.isEmpty()) { |
||||
|
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
List<Map.Entry<String, Integer>> sorted = provinceCounts.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.collect(Collectors.toList()); |
||||
|
|
||||
|
System.out.println("\n省份排行榜 TOP 10:"); |
||||
|
int rankNum = 1; |
||||
|
for (Map.Entry<String, Integer> entry : sorted) { |
||||
|
if (rankNum > 10) break; |
||||
|
System.out.printf(" %2d. %s: %d 所大学%n", rankNum++, entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
|
||||
|
Map<String, Integer> top10 = sorted.stream() |
||||
|
.limit(10) |
||||
|
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
||||
|
|
||||
|
ChartGenerator.generateProvinceBarChart(top10, "province_bar.png"); |
||||
|
} |
||||
|
|
||||
|
private void analyzeScoreDistribution(List<UniversityRank> ranks) { |
||||
|
System.out.println("\n【总分分析】"); |
||||
|
List<Double> scores = new ArrayList<>(); |
||||
|
|
||||
|
for (UniversityRank rank : ranks) { |
||||
|
double score = DataCleaner.cleanScore(rank.getTotalScore()); |
||||
|
if (score > 0) { |
||||
|
scores.add(score); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (scores.isEmpty()) { |
||||
|
System.out.println("无法获取有效分数数据"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
double maxScore = scores.stream().mapToDouble(Double::doubleValue).max().orElse(0); |
||||
|
double minScore = scores.stream().mapToDouble(Double::doubleValue).min().orElse(0); |
||||
|
double avgScore = scores.stream().mapToDouble(Double::doubleValue).average().orElse(0); |
||||
|
|
||||
|
List<Double> sortedScores = scores.stream().sorted().collect(Collectors.toList()); |
||||
|
double median = sortedScores.get(sortedScores.size() / 2); |
||||
|
|
||||
|
System.out.println("最高分: " + String.format("%.2f", maxScore)); |
||||
|
System.out.println("最低分: " + String.format("%.2f", minScore)); |
||||
|
System.out.println("平均分: " + String.format("%.2f", avgScore)); |
||||
|
System.out.println("中位数: " + String.format("%.2f", median)); |
||||
|
|
||||
|
Map<String, Integer> scoreRanges = new HashMap<>(); |
||||
|
String[] ranges = {"0-20", "20-40", "40-60", "60-80", "80-100"}; |
||||
|
for (String range : ranges) { |
||||
|
scoreRanges.put(range, 0); |
||||
|
} |
||||
|
|
||||
|
for (Double score : scores) { |
||||
|
if (score < 20) scoreRanges.put("0-20", scoreRanges.get("0-20") + 1); |
||||
|
else if (score < 40) scoreRanges.put("20-40", scoreRanges.get("20-40") + 1); |
||||
|
else if (score < 60) scoreRanges.put("40-60", scoreRanges.get("40-60") + 1); |
||||
|
else if (score < 80) scoreRanges.put("60-80", scoreRanges.get("60-80") + 1); |
||||
|
else scoreRanges.put("80-100", scoreRanges.get("80-100") + 1); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n分数区间分布:"); |
||||
|
for (Map.Entry<String, Integer> entry : scoreRanges.entrySet()) { |
||||
|
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 所"); |
||||
|
} |
||||
|
|
||||
|
ChartGenerator.generateScoreHistogram(scoreRanges, "score_boxplot.png"); |
||||
|
} |
||||
|
|
||||
|
private void analyzeCategoryDistribution(List<UniversityRank> ranks) { |
||||
|
System.out.println("\n【办学层次统计】"); |
||||
|
Map<String, Integer> categoryCounts = new HashMap<>(); |
||||
|
|
||||
|
for (UniversityRank rank : ranks) { |
||||
|
String category = rank.getCategory(); |
||||
|
if (category != null && !category.isEmpty()) { |
||||
|
categoryCounts.put(category, categoryCounts.getOrDefault(category, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (categoryCounts.isEmpty()) { |
||||
|
System.out.println("没有办学层次数据"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
List<Map.Entry<String, Integer>> sorted = categoryCounts.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.collect(Collectors.toList()); |
||||
|
|
||||
|
System.out.println("\n办学层次分布:"); |
||||
|
for (Map.Entry<String, Integer> entry : sorted) { |
||||
|
System.out.printf(" %s: %d 所%n", entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void generateReport(List<UniversityRank> ranks) { |
||||
|
String fileName = CrawlerConstants.REPORTS_DIR + "/ranking_analysis_report.txt"; |
||||
|
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
||||
|
writer.println("========== 大学排名数据分析报告 =========="); |
||||
|
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
||||
|
writer.println("分析大学总数: " + ranks.size()); |
||||
|
writer.println(); |
||||
|
|
||||
|
Map<String, Integer> provinceCounts = new HashMap<>(); |
||||
|
for (UniversityRank rank : ranks) { |
||||
|
String province = rank.getProvince(); |
||||
|
if (province != null && !province.isEmpty()) { |
||||
|
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
writer.println("【省份排行榜 TOP 10】"); |
||||
|
provinceCounts.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.limit(10) |
||||
|
.forEach(e -> writer.println(" " + e.getKey() + ": " + e.getValue() + " 所大学")); |
||||
|
|
||||
|
List<Double> scores = ranks.stream() |
||||
|
.map(r -> DataCleaner.cleanScore(r.getTotalScore())) |
||||
|
.filter(s -> s > 0) |
||||
|
.collect(Collectors.toList()); |
||||
|
|
||||
|
if (!scores.isEmpty()) { |
||||
|
writer.println(); |
||||
|
writer.println("【分数统计】"); |
||||
|
writer.println("最高分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).max().orElse(0))); |
||||
|
writer.println("最低分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).min().orElse(0))); |
||||
|
writer.println("平均分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).average().orElse(0))); |
||||
|
} |
||||
|
|
||||
|
writer.println("\n报告生成完成"); |
||||
|
System.out.println("\n报告已保存: " + fileName); |
||||
|
} catch (IOException e) { |
||||
|
System.err.println("生成报告失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,163 @@ |
|||||
|
package com.example.crawler.service; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.io.PrintWriter; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
import com.example.crawler.chart.ChartGenerator; |
||||
|
import com.example.crawler.constant.CrawlerConstants; |
||||
|
import com.example.crawler.model.Weather; |
||||
|
|
||||
|
public class WeatherAnalysisService { |
||||
|
|
||||
|
static { |
||||
|
File dir = new File(CrawlerConstants.REPORTS_DIR); |
||||
|
if (!dir.exists()) { |
||||
|
dir.mkdirs(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void analyze(List<Weather> weatherList) { |
||||
|
if (weatherList == null || weatherList.isEmpty()) { |
||||
|
System.out.println("没有天气数据可分析"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n========== 天气数据分析 =========="); |
||||
|
System.out.println("共分析 " + weatherList.size() + " 个城市\n"); |
||||
|
|
||||
|
analyzeCurrentWeather(weatherList); |
||||
|
analyzeTemperatureTrend(weatherList); |
||||
|
analyzeHumidityTrend(weatherList); |
||||
|
analyzeComfortIndex(weatherList); |
||||
|
|
||||
|
generateReport(weatherList); |
||||
|
} |
||||
|
|
||||
|
private void analyzeCurrentWeather(List<Weather> weatherList) { |
||||
|
System.out.println("【当前天气对比】"); |
||||
|
System.out.println("┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐"); |
||||
|
System.out.println("│ 城市名称 │ 温度(°C)│ 湿度(%) │ 风速(km/h)│ 天气状况 │ 舒适度 │"); |
||||
|
System.out.println("├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤"); |
||||
|
|
||||
|
for (Weather weather : weatherList) { |
||||
|
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity()); |
||||
|
String comfortDesc = getComfortDescription(comfort); |
||||
|
System.out.printf("│ %-8s │ %8.1f │ %8.0f │ %8.1f │ %-8s │ %-8s │%n", |
||||
|
weather.getCityName(), |
||||
|
weather.getTemperature(), |
||||
|
weather.getHumidity(), |
||||
|
weather.getWindSpeed(), |
||||
|
weather.getWeatherDescription(), |
||||
|
comfortDesc); |
||||
|
} |
||||
|
System.out.println("└──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘"); |
||||
|
} |
||||
|
|
||||
|
private void analyzeTemperatureTrend(List<Weather> weatherList) { |
||||
|
System.out.println("\n【未来24小时温度分析】"); |
||||
|
|
||||
|
Map<String, List<Double>> cityTemperatures = new HashMap<>(); |
||||
|
for (Weather weather : weatherList) { |
||||
|
cityTemperatures.put(weather.getCityName(), weather.getHourlyTemperatures()); |
||||
|
|
||||
|
List<Double> temps = weather.getHourlyTemperatures(); |
||||
|
if (!temps.isEmpty()) { |
||||
|
double maxTemp = temps.stream().mapToDouble(Double::doubleValue).max().orElse(0); |
||||
|
double minTemp = temps.stream().mapToDouble(Double::doubleValue).min().orElse(0); |
||||
|
double avgTemp = temps.stream().mapToDouble(Double::doubleValue).average().orElse(0); |
||||
|
|
||||
|
int maxIndex = temps.indexOf(maxTemp); |
||||
|
int minIndex = temps.indexOf(minTemp); |
||||
|
|
||||
|
String maxTime = maxIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(maxIndex) : ""; |
||||
|
String minTime = minIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(minIndex) : ""; |
||||
|
|
||||
|
System.out.printf(" %s: 最高 %.1f°C(%s) 最低 %.1f°C(%s) 平均 %.1f°C%n", |
||||
|
weather.getCityName(), maxTemp, maxTime, minTemp, minTime, avgTemp); |
||||
|
} |
||||
|
|
||||
|
ChartGenerator.generateTemperatureTrend( |
||||
|
weather.getHourlyTimes(), |
||||
|
weather.getHourlyTemperatures(), |
||||
|
weather.getCityName(), |
||||
|
"temperature_" + weather.getCityName() + ".png" |
||||
|
); |
||||
|
} |
||||
|
|
||||
|
ChartGenerator.generateMultiCityTemperatureComparison(cityTemperatures, "temperature_comparison.png"); |
||||
|
} |
||||
|
|
||||
|
private void analyzeHumidityTrend(List<Weather> weatherList) { |
||||
|
System.out.println("\n【未来24小时湿度分析】"); |
||||
|
for (Weather weather : weatherList) { |
||||
|
List<Integer> humidities = weather.getHourlyHumidities(); |
||||
|
if (!humidities.isEmpty()) { |
||||
|
double avgHumidity = humidities.stream().mapToInt(Integer::intValue).average().orElse(0); |
||||
|
System.out.printf(" %s: 平均湿度 %.0f%%%n", weather.getCityName(), avgHumidity); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void analyzeComfortIndex(List<Weather> weatherList) { |
||||
|
System.out.println("\n【舒适度指数分析】"); |
||||
|
System.out.println("(基于温度和湿度的体感舒适度计算,0-100分制)"); |
||||
|
|
||||
|
for (Weather weather : weatherList) { |
||||
|
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity()); |
||||
|
String description = getComfortDescription(comfort); |
||||
|
System.out.printf(" %s: %.1f分 (%s)%n", weather.getCityName(), comfort, description); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private double calculateComfortIndex(double temperature, double humidity) { |
||||
|
double tempDiff = Math.abs(temperature - 22); |
||||
|
double humDiff = Math.abs(humidity - 50); |
||||
|
|
||||
|
double comfort = 100 - (tempDiff * 3 + humDiff * 0.5); |
||||
|
return Math.max(0, Math.min(100, comfort)); |
||||
|
} |
||||
|
|
||||
|
private String getComfortDescription(double comfort) { |
||||
|
if (comfort >= 80) return "非常舒适"; |
||||
|
if (comfort >= 60) return "舒适"; |
||||
|
if (comfort >= 40) return "一般"; |
||||
|
if (comfort >= 20) return "不舒适"; |
||||
|
return "极不舒适"; |
||||
|
} |
||||
|
|
||||
|
private void generateReport(List<Weather> weatherList) { |
||||
|
String fileName = CrawlerConstants.REPORTS_DIR + "/weather_analysis_report.txt"; |
||||
|
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
||||
|
writer.println("========== 天气数据分析报告 =========="); |
||||
|
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
||||
|
writer.println("分析城市数量: " + weatherList.size()); |
||||
|
writer.println("数据来源: Open-Meteo API (CC BY 4.0)"); |
||||
|
writer.println(); |
||||
|
|
||||
|
writer.println("【多城市天气对比】"); |
||||
|
for (Weather weather : weatherList) { |
||||
|
writer.println("\n城市: " + weather.getCityName()); |
||||
|
writer.println(" 当前温度: " + String.format("%.1f°C", weather.getTemperature())); |
||||
|
writer.println(" 当前湿度: " + String.format("%.0f%%", weather.getHumidity())); |
||||
|
writer.println(" 风速: " + String.format("%.1f km/h", weather.getWindSpeed())); |
||||
|
writer.println(" 天气: " + weather.getWeatherDescription()); |
||||
|
|
||||
|
List<Double> temps = weather.getHourlyTemperatures(); |
||||
|
if (!temps.isEmpty()) { |
||||
|
writer.println(" 24小时平均温度: " + String.format("%.1f°C", temps.stream().mapToDouble(Double::doubleValue).average().orElse(0))); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
writer.println("\n报告生成完成"); |
||||
|
System.out.println("\n报告已保存: " + fileName); |
||||
|
} catch (IOException e) { |
||||
|
System.err.println("生成报告失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,127 @@ |
|||||
|
package com.example.crawler.strategy; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import com.example.crawler.exception.CrawlException; |
||||
|
import com.example.crawler.exception.NetworkException; |
||||
|
import com.example.crawler.exception.ParseException; |
||||
|
import com.example.crawler.model.Book; |
||||
|
import com.example.crawler.util.HttpUtil; |
||||
|
|
||||
|
/** |
||||
|
* 书籍爬取策略 |
||||
|
* // 策略模式:书籍信息爬取策略
|
||||
|
*/ |
||||
|
public class BookCrawlStrategy implements CrawlStrategy<Book> { |
||||
|
|
||||
|
private static final String BASE_URL = "https://books.toscrape.com/"; |
||||
|
private static final String PAGE_URL_FORMAT = "https://books.toscrape.com/catalogue/page-%d.html"; |
||||
|
private static final int MAX_PAGES = 30; // 最大爬取页数
|
||||
|
|
||||
|
@Override |
||||
|
public List<Book> crawl() throws CrawlException { |
||||
|
List<Book> books = new ArrayList<>(); |
||||
|
int pageNum = 1; |
||||
|
|
||||
|
try { |
||||
|
while (true) { |
||||
|
// 达到最大页数限制时停止
|
||||
|
if (pageNum > MAX_PAGES) { |
||||
|
System.out.println("已达到最大爬取页数限制(" + MAX_PAGES + "页),停止爬取"); |
||||
|
break; |
||||
|
} |
||||
|
|
||||
|
String url = pageNum == 1 ? BASE_URL : String.format(PAGE_URL_FORMAT, pageNum); |
||||
|
|
||||
|
// 设置请求头
|
||||
|
Map<String, String> headers = Map.of( |
||||
|
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
||||
|
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" |
||||
|
); |
||||
|
|
||||
|
String html = HttpUtil.get(url, headers); |
||||
|
Document doc = Jsoup.parse(html); |
||||
|
|
||||
|
Elements bookElements = doc.select(".product_pod"); |
||||
|
|
||||
|
// 如果没有书籍元素,说明已到达最后一页
|
||||
|
if (bookElements.isEmpty()) { |
||||
|
System.out.println("第 " + pageNum + " 页没有书籍数据,停止爬取"); |
||||
|
break; |
||||
|
} |
||||
|
|
||||
|
for (Element bookElement : bookElements) { |
||||
|
Book book = parseBook(bookElement); |
||||
|
books.add(book); |
||||
|
} |
||||
|
|
||||
|
System.out.println("已爬取第 " + pageNum + " 页,共 " + books.size() + " 本书"); |
||||
|
|
||||
|
// 设置请求间隔
|
||||
|
HttpUtil.sleep(1); |
||||
|
|
||||
|
pageNum++; |
||||
|
} |
||||
|
|
||||
|
return books; |
||||
|
} catch (NetworkException e) { |
||||
|
// 如果是404错误且已经爬取了一些数据,返回已获取的数据
|
||||
|
if (e.getMessage().contains("404") && !books.isEmpty()) { |
||||
|
System.out.println("第 " + pageNum + " 页不存在(404),返回已爬取的 " + books.size() + " 本书"); |
||||
|
return books; |
||||
|
} |
||||
|
throw new NetworkException("爬取书籍信息时网络异常: " + e.getMessage(), e); |
||||
|
} catch (ParseException e) { |
||||
|
throw new ParseException("解析书籍信息时异常: " + e.getMessage(), e); |
||||
|
} catch (Exception e) { |
||||
|
throw new CrawlException("爬取书籍信息时发生未知异常: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 解析书籍元素 |
||||
|
*/ |
||||
|
private Book parseBook(Element bookElement) throws ParseException { |
||||
|
try { |
||||
|
// 获取书名
|
||||
|
Element titleElement = bookElement.selectFirst("h3 a"); |
||||
|
String title = titleElement != null ? titleElement.attr("title") : "未知书名"; |
||||
|
|
||||
|
// 获取价格
|
||||
|
Element priceElement = bookElement.selectFirst(".price_color"); |
||||
|
String price = priceElement != null ? priceElement.text() : "未知价格"; |
||||
|
|
||||
|
// 获取库存状态
|
||||
|
Element availabilityElement = bookElement.selectFirst(".instock.availability"); |
||||
|
String availability = availabilityElement != null ? availabilityElement.text().trim() : "未知库存"; |
||||
|
|
||||
|
// 获取星级评分
|
||||
|
Element ratingElement = bookElement.selectFirst(".star-rating"); |
||||
|
String rating = "未知"; |
||||
|
if (ratingElement != null) { |
||||
|
String classAttr = ratingElement.attr("class"); |
||||
|
if (classAttr.contains("One")) rating = "1星"; |
||||
|
else if (classAttr.contains("Two")) rating = "2星"; |
||||
|
else if (classAttr.contains("Three")) rating = "3星"; |
||||
|
else if (classAttr.contains("Four")) rating = "4星"; |
||||
|
else if (classAttr.contains("Five")) rating = "5星"; |
||||
|
} |
||||
|
|
||||
|
return new Book(title, price, availability, rating); |
||||
|
} catch (Exception e) { |
||||
|
throw new ParseException("解析书籍信息失败: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDataSourceName() { |
||||
|
return "toscrape.com书籍信息"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
package com.example.crawler.strategy; |
||||
|
|
||||
|
import com.example.crawler.exception.CrawlException; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
/** |
||||
|
* 爬取策略接口 |
||||
|
* 定义爬取操作的标准方法,实现策略模式 |
||||
|
*/ |
||||
|
public interface CrawlStrategy<T> { |
||||
|
|
||||
|
/** |
||||
|
* 执行爬取操作 |
||||
|
* |
||||
|
* @return 爬取到的数据列表 |
||||
|
* @throws CrawlException 爬虫异常 |
||||
|
*/ |
||||
|
List<T> crawl() throws CrawlException; |
||||
|
|
||||
|
/** |
||||
|
* 获取数据源名称 |
||||
|
* |
||||
|
* @return 数据源名称 |
||||
|
*/ |
||||
|
String getDataSourceName(); |
||||
|
} |
||||
@ -0,0 +1,151 @@ |
|||||
|
package com.example.crawler.strategy; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import com.example.crawler.exception.CrawlException; |
||||
|
import com.example.crawler.exception.NetworkException; |
||||
|
import com.example.crawler.exception.ParseException; |
||||
|
import com.example.crawler.model.News; |
||||
|
import com.example.crawler.util.HttpUtil; |
||||
|
|
||||
|
/** |
||||
|
* 新浪新闻爬取策略 |
||||
|
* // 策略模式:新浪新闻爬取策略
|
||||
|
*/ |
||||
|
public class NewsCrawlStrategy implements CrawlStrategy<News> { |
||||
|
|
||||
|
private static final String NEWS_URL = "https://news.sina.com.cn/china/"; |
||||
|
private static final int MAX_NEWS_COUNT = 20; |
||||
|
|
||||
|
@Override |
||||
|
public List<News> crawl() throws CrawlException { |
||||
|
List<News> newsList = new ArrayList<>(); |
||||
|
|
||||
|
try { |
||||
|
// 设置请求头
|
||||
|
Map<String, String> headers = Map.of( |
||||
|
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
||||
|
"Referer", "https://news.sina.com.cn/", |
||||
|
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" |
||||
|
); |
||||
|
|
||||
|
String html = HttpUtil.get(NEWS_URL, headers); |
||||
|
Document doc = Jsoup.parse(html); |
||||
|
|
||||
|
// 新浪新闻页面结构可能变化,使用多种选择器尝试
|
||||
|
Elements newsElements = doc.select(".news-item, .news-list li, .list-item, .feed-card-item"); |
||||
|
|
||||
|
// 如果上述选择器都没找到,尝试更通用的选择器
|
||||
|
if (newsElements.isEmpty()) { |
||||
|
newsElements = doc.select("a[href*=sina.com.cn]"); |
||||
|
} |
||||
|
|
||||
|
int count = 0; |
||||
|
for (Element element : newsElements) { |
||||
|
if (count >= MAX_NEWS_COUNT) { |
||||
|
break; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
News news = parseNews(element); |
||||
|
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) { |
||||
|
newsList.add(news); |
||||
|
count++; |
||||
|
} |
||||
|
} catch (ParseException e) { |
||||
|
// 跳过解析失败的新闻,继续处理下一个
|
||||
|
continue; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 如果使用通用选择器获取的结果不够,尝试另一种方式
|
||||
|
if (newsList.size() < MAX_NEWS_COUNT) { |
||||
|
Elements titleElements = doc.select("h2 a, h3 a, .title a, .news-title a"); |
||||
|
for (Element element : titleElements) { |
||||
|
if (count >= MAX_NEWS_COUNT) { |
||||
|
break; |
||||
|
} |
||||
|
try { |
||||
|
News news = parseNewsFromTitleElement(element); |
||||
|
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) { |
||||
|
newsList.add(news); |
||||
|
count++; |
||||
|
} |
||||
|
} catch (ParseException e) { |
||||
|
continue; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println("已爬取 " + newsList.size() + " 条新浪新闻"); |
||||
|
return newsList; |
||||
|
|
||||
|
} catch (NetworkException e) { |
||||
|
throw new NetworkException("爬取新浪新闻时网络异常: " + e.getMessage(), e); |
||||
|
} catch (Exception e) { |
||||
|
throw new CrawlException("爬取新浪新闻时发生未知异常: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 解析新闻元素 |
||||
|
*/ |
||||
|
private News parseNews(Element element) throws ParseException { |
||||
|
try { |
||||
|
String title = ""; |
||||
|
String url = ""; |
||||
|
String publishTime = ""; |
||||
|
|
||||
|
// 尝试获取标题和链接
|
||||
|
Element linkElement = element.selectFirst("a"); |
||||
|
if (linkElement != null) { |
||||
|
title = linkElement.text().trim(); |
||||
|
url = linkElement.attr("abs:href"); |
||||
|
} |
||||
|
|
||||
|
// 尝试获取发布时间
|
||||
|
Element timeElement = element.selectFirst(".time, .pubtime, span[class*=time]"); |
||||
|
if (timeElement != null) { |
||||
|
publishTime = timeElement.text().trim(); |
||||
|
} |
||||
|
|
||||
|
if (title.isEmpty() || url.isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
return new News(title, publishTime, url); |
||||
|
} catch (Exception e) { |
||||
|
throw new ParseException("解析新闻信息失败: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 从标题元素解析新闻 |
||||
|
*/ |
||||
|
private News parseNewsFromTitleElement(Element element) throws ParseException { |
||||
|
try { |
||||
|
String title = element.text().trim(); |
||||
|
String url = element.attr("abs:href"); |
||||
|
|
||||
|
if (title.isEmpty() || url.isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
return new News(title, "", url); |
||||
|
} catch (Exception e) { |
||||
|
throw new ParseException("解析新闻标题失败: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDataSourceName() { |
||||
|
return "新浪国内新闻"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,24 @@ |
|||||
|
package com.example.crawler.strategy; |
||||
|
|
||||
|
import com.example.crawler.strategy.BookCrawlStrategy; |
||||
|
import com.example.crawler.strategy.NewsCrawlStrategy; |
||||
|
import com.example.crawler.strategy.UniversityRankCrawlStrategy; |
||||
|
import com.example.crawler.strategy.WeatherCrawlStrategy; |
||||
|
|
||||
|
public class StrategyFactory { |
||||
|
|
||||
|
public static CrawlStrategy<?> getStrategy(int choice) { |
||||
|
switch (choice) { |
||||
|
case 1: |
||||
|
return new BookCrawlStrategy(); |
||||
|
case 2: |
||||
|
return new NewsCrawlStrategy(); |
||||
|
case 3: |
||||
|
return new UniversityRankCrawlStrategy(); |
||||
|
case 4: |
||||
|
return new WeatherCrawlStrategy(); |
||||
|
default: |
||||
|
throw new IllegalArgumentException("Invalid choice: " + choice); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,148 @@ |
|||||
|
package com.example.crawler.strategy; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import com.example.crawler.exception.CrawlException; |
||||
|
import com.example.crawler.exception.NetworkException; |
||||
|
import com.example.crawler.exception.ParseException; |
||||
|
import com.example.crawler.model.UniversityRank; |
||||
|
import com.example.crawler.util.HttpUtil; |
||||
|
|
||||
|
/** |
||||
|
* 软科中国大学排名爬取策略 |
||||
|
* // 策略模式:软科中国大学排名爬取策略
|
||||
|
*/ |
||||
|
public class UniversityRankCrawlStrategy implements CrawlStrategy<UniversityRank> { |
||||
|
|
||||
|
private static final String RANKING_URL = "https://www.shanghairanking.cn/rankings/bcur/2025"; |
||||
|
|
||||
|
@Override |
||||
|
public List<UniversityRank> crawl() throws CrawlException { |
||||
|
List<UniversityRank> rankings = new ArrayList<>(); |
||||
|
|
||||
|
try { |
||||
|
// 设置请求头
|
||||
|
Map<String, String> headers = Map.of( |
||||
|
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
||||
|
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", |
||||
|
"Referer", "https://www.shanghairanking.cn/" |
||||
|
); |
||||
|
|
||||
|
// 设置请求延迟
|
||||
|
HttpUtil.sleep(3); |
||||
|
|
||||
|
String html = HttpUtil.get(RANKING_URL, headers); |
||||
|
Document doc = Jsoup.parse(html); |
||||
|
|
||||
|
// 提取表格数据
|
||||
|
Elements rows = doc.select("table tbody tr"); |
||||
|
|
||||
|
if (rows.isEmpty()) { |
||||
|
// 如果第一个选择器失败,尝试其他可能的选择器
|
||||
|
rows = doc.select(".rk-table tbody tr"); |
||||
|
} |
||||
|
|
||||
|
if (rows.isEmpty()) { |
||||
|
// 尝试更通用的选择器
|
||||
|
rows = doc.select("tr"); |
||||
|
} |
||||
|
|
||||
|
int count = 0; |
||||
|
for (Element row : rows) { |
||||
|
try { |
||||
|
UniversityRank ranking = parseRow(row); |
||||
|
if (ranking != null && ranking.getRank() != null) { |
||||
|
rankings.add(ranking); |
||||
|
count++; |
||||
|
|
||||
|
// 最多爬取200条数据
|
||||
|
if (count >= 200) { |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} catch (ParseException e) { |
||||
|
// 跳过解析失败的行
|
||||
|
continue; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println("已爬取 " + rankings.size() + " 条大学排名数据"); |
||||
|
return rankings; |
||||
|
|
||||
|
} catch (NetworkException e) { |
||||
|
throw new NetworkException("爬取软科大学排名时网络异常: " + e.getMessage(), e); |
||||
|
} catch (Exception e) { |
||||
|
throw new CrawlException("爬取软科大学排名时发生未知异常: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 解析表格行数据 |
||||
|
*/ |
||||
|
private UniversityRank parseRow(Element row) throws ParseException { |
||||
|
try { |
||||
|
Elements cells = row.select("td"); |
||||
|
|
||||
|
if (cells.size() < 4) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
// 第1列:排名
|
||||
|
String rankStr = cells.get(0).text().trim(); |
||||
|
Integer rank = null; |
||||
|
try { |
||||
|
rank = Integer.parseInt(rankStr); |
||||
|
} catch (NumberFormatException e) { |
||||
|
// 如果排名不是数字(如"1-3"这样的范围),尝试提取第一个数字
|
||||
|
String numPart = rankStr.replaceAll("[^0-9]", ""); |
||||
|
if (!numPart.isEmpty()) { |
||||
|
rank = Integer.parseInt(numPart); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (rank == null) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
// 第2列:学校名称
|
||||
|
String universityName = cells.get(1).text().trim(); |
||||
|
|
||||
|
// 第4列:总分
|
||||
|
String totalScore = ""; |
||||
|
if (cells.size() > 3) { |
||||
|
totalScore = cells.get(3).text().trim(); |
||||
|
} |
||||
|
|
||||
|
// 尝试提取省份和办学层次(第3列可能包含这些信息)
|
||||
|
String province = ""; |
||||
|
String category = ""; |
||||
|
if (cells.size() > 2) { |
||||
|
String thirdColumn = cells.get(2).text().trim(); |
||||
|
// 尝试解析省份和办学层次
|
||||
|
String[] parts = thirdColumn.split("\\s+"); |
||||
|
if (parts.length >= 1) { |
||||
|
province = parts[0]; |
||||
|
} |
||||
|
if (parts.length >= 2) { |
||||
|
category = parts[1]; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return new UniversityRank(rank, universityName, totalScore, province, category); |
||||
|
} catch (Exception e) { |
||||
|
throw new ParseException("解析大学排名行数据失败: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDataSourceName() { |
||||
|
return "软科中国大学排名"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,177 @@ |
|||||
|
package com.example.crawler.strategy; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
import com.example.crawler.constant.CrawlerConstants; |
||||
|
import com.example.crawler.exception.CrawlException; |
||||
|
import com.example.crawler.exception.NetworkException; |
||||
|
import com.example.crawler.exception.ParseException; |
||||
|
import com.example.crawler.model.Weather; |
||||
|
import com.example.crawler.util.HttpUtil; |
||||
|
import com.google.gson.JsonArray; |
||||
|
import com.google.gson.JsonElement; |
||||
|
import com.google.gson.JsonObject; |
||||
|
import com.google.gson.JsonParser; |
||||
|
|
||||
|
public class WeatherCrawlStrategy implements CrawlStrategy<Weather> { |
||||
|
|
||||
|
@Override |
||||
|
public List<Weather> crawl() throws CrawlException { |
||||
|
List<Weather> weatherList = new ArrayList<>(); |
||||
|
|
||||
|
try { |
||||
|
for (Map.Entry<String, double[]> entry : CrawlerConstants.CITY_COORDINATES.entrySet()) { |
||||
|
String cityName = entry.getKey(); |
||||
|
double[] coords = entry.getValue(); |
||||
|
double latitude = coords[0]; |
||||
|
double longitude = coords[1]; |
||||
|
|
||||
|
String weatherUrl = buildApiUrl(latitude, longitude); |
||||
|
Map<String, String> headers = Map.of( |
||||
|
"User-Agent", CrawlerConstants.USER_AGENT |
||||
|
); |
||||
|
|
||||
|
String response = HttpUtil.get(weatherUrl, headers); |
||||
|
Weather weather = parseWeatherData(cityName, response); |
||||
|
weatherList.add(weather); |
||||
|
|
||||
|
System.out.println("已获取 " + cityName + " 的天气信息"); |
||||
|
|
||||
|
HttpUtil.sleep(2); |
||||
|
} |
||||
|
|
||||
|
return weatherList; |
||||
|
|
||||
|
} catch (NetworkException e) { |
||||
|
throw new NetworkException("爬取天气数据时网络异常: " + e.getMessage(), e); |
||||
|
} catch (ParseException e) { |
||||
|
throw new ParseException("解析天气数据时异常: " + e.getMessage(), e); |
||||
|
} catch (Exception e) { |
||||
|
throw new CrawlException("爬取天气数据时发生未知异常: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private String buildApiUrl(double latitude, double longitude) { |
||||
|
return CrawlerConstants.URL_WEATHER_API + "?latitude=" + latitude + |
||||
|
"&longitude=" + longitude + |
||||
|
"¤t_weather=true" + |
||||
|
"&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m" + |
||||
|
"&forecast_days=1" + |
||||
|
"&timezone=Asia/Shanghai"; |
||||
|
} |
||||
|
|
||||
|
private Weather parseWeatherData(String cityName, String jsonData) throws ParseException { |
||||
|
try { |
||||
|
JsonObject obj = JsonParser.parseString(jsonData).getAsJsonObject(); |
||||
|
|
||||
|
Weather weather = new Weather(); |
||||
|
weather.setCityName(cityName); |
||||
|
|
||||
|
JsonObject currentWeather = obj.getAsJsonObject("current_weather"); |
||||
|
if (currentWeather != null) { |
||||
|
weather.setTemperature(cleanTemperature(getJsonDouble(currentWeather, "temperature", 0))); |
||||
|
weather.setWindSpeed(cleanWindSpeed(getJsonDouble(currentWeather, "windspeed", 0))); |
||||
|
weather.setWeatherCode(String.valueOf(getJsonInt(currentWeather, "weathercode", -1))); |
||||
|
} |
||||
|
|
||||
|
JsonObject hourly = obj.getAsJsonObject("hourly"); |
||||
|
if (hourly != null) { |
||||
|
JsonArray times = hourly.getAsJsonArray("time"); |
||||
|
JsonArray temps = hourly.getAsJsonArray("temperature_2m"); |
||||
|
JsonArray humidities = hourly.getAsJsonArray("relative_humidity_2m"); |
||||
|
JsonArray windSpeeds = hourly.getAsJsonArray("wind_speed_10m"); |
||||
|
|
||||
|
if (times != null && temps != null) { |
||||
|
int count = Math.min(times.size(), 24); |
||||
|
for (int i = 0; i < count; i++) { |
||||
|
weather.getHourlyTimes().add(cleanTimeString(getJsonString(times, i, ""))); |
||||
|
weather.getHourlyTemperatures().add(cleanTemperature(getJsonDouble(temps, i, 0))); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (humidities != null) { |
||||
|
int count = Math.min(humidities.size(), 24); |
||||
|
for (int i = 0; i < count; i++) { |
||||
|
weather.getHourlyHumidities().add(cleanHumidity(getJsonInt(humidities, i, 50))); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (windSpeeds != null) { |
||||
|
int count = Math.min(windSpeeds.size(), 24); |
||||
|
for (int i = 0; i < count; i++) { |
||||
|
weather.getHourlyWindSpeeds().add(cleanWindSpeed(getJsonDouble(windSpeeds, i, 0))); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (!weather.getHourlyHumidities().isEmpty()) { |
||||
|
weather.setHumidity(weather.getHourlyHumidities().get(0)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return weather; |
||||
|
} catch (Exception e) { |
||||
|
throw new ParseException("解析天气JSON数据失败: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private String getJsonString(JsonArray arr, int index, String defaultValue) { |
||||
|
if (arr == null || index >= arr.size()) return defaultValue; |
||||
|
JsonElement element = arr.get(index); |
||||
|
return element.isJsonNull() ? defaultValue : element.getAsString(); |
||||
|
} |
||||
|
|
||||
|
private double getJsonDouble(JsonObject obj, String key, double defaultValue) { |
||||
|
JsonElement element = obj.get(key); |
||||
|
if (element == null || element.isJsonNull()) return defaultValue; |
||||
|
return element.getAsDouble(); |
||||
|
} |
||||
|
|
||||
|
private int getJsonInt(JsonObject obj, String key, int defaultValue) { |
||||
|
JsonElement element = obj.get(key); |
||||
|
if (element == null || element.isJsonNull()) return defaultValue; |
||||
|
return element.getAsInt(); |
||||
|
} |
||||
|
|
||||
|
private double getJsonDouble(JsonArray arr, int index, double defaultValue) { |
||||
|
if (arr == null || index >= arr.size()) return defaultValue; |
||||
|
JsonElement element = arr.get(index); |
||||
|
if (element == null || element.isJsonNull()) return defaultValue; |
||||
|
return element.getAsDouble(); |
||||
|
} |
||||
|
|
||||
|
private int getJsonInt(JsonArray arr, int index, int defaultValue) { |
||||
|
if (arr == null || index >= arr.size()) return defaultValue; |
||||
|
JsonElement element = arr.get(index); |
||||
|
if (element == null || element.isJsonNull()) return defaultValue; |
||||
|
return element.getAsInt(); |
||||
|
} |
||||
|
|
||||
|
private double cleanTemperature(double temp) { |
||||
|
return Math.round(temp * 10.0) / 10.0; |
||||
|
} |
||||
|
|
||||
|
private double cleanWindSpeed(double speed) { |
||||
|
return Math.round(speed * 10.0) / 10.0; |
||||
|
} |
||||
|
|
||||
|
private int cleanHumidity(int humidity) { |
||||
|
if (humidity < 0) return 50; |
||||
|
if (humidity > 100) return 100; |
||||
|
return humidity; |
||||
|
} |
||||
|
|
||||
|
private String cleanTimeString(String time) { |
||||
|
if (time == null || time.isEmpty()) return ""; |
||||
|
if (time.contains("T")) { |
||||
|
return time.substring(time.indexOf("T") + 1, time.indexOf("T") + 6); |
||||
|
} |
||||
|
return time; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDataSourceName() { |
||||
|
return "Open-Meteo 实时天气"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,122 @@ |
|||||
|
package com.example.crawler.util; |
||||
|
|
||||
|
import java.time.LocalDateTime; |
||||
|
import java.time.format.DateTimeFormatter; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
import java.util.regex.Matcher; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
/** |
||||
|
* 数据清洗工具类 |
||||
|
* 提供各类数据的清洗方法 |
||||
|
*/ |
||||
|
public class DataCleaner { |
||||
|
|
||||
|
private static final Map<String, String> STOP_WORDS = new HashMap<>(); |
||||
|
static { |
||||
|
STOP_WORDS.put("的", "的"); |
||||
|
STOP_WORDS.put("了", "了"); |
||||
|
STOP_WORDS.put("是", "是"); |
||||
|
STOP_WORDS.put("在", "在"); |
||||
|
STOP_WORDS.put("和", "和"); |
||||
|
STOP_WORDS.put("与", "与"); |
||||
|
STOP_WORDS.put("对", "对"); |
||||
|
STOP_WORDS.put("为", "为"); |
||||
|
STOP_WORDS.put("有", "有"); |
||||
|
STOP_WORDS.put("我", "我"); |
||||
|
STOP_WORDS.put("你", "你"); |
||||
|
STOP_WORDS.put("他", "他"); |
||||
|
STOP_WORDS.put("她", "她"); |
||||
|
STOP_WORDS.put("它", "它"); |
||||
|
STOP_WORDS.put("这", "这"); |
||||
|
STOP_WORDS.put("那", "那"); |
||||
|
STOP_WORDS.put("就", "就"); |
||||
|
STOP_WORDS.put("也", "也"); |
||||
|
STOP_WORDS.put("都", "都"); |
||||
|
STOP_WORDS.put("要", "要"); |
||||
|
STOP_WORDS.put("会", "会"); |
||||
|
STOP_WORDS.put("能", "能"); |
||||
|
STOP_WORDS.put("可", "可"); |
||||
|
STOP_WORDS.put("以", "以"); |
||||
|
STOP_WORDS.put("说", "说"); |
||||
|
STOP_WORDS.put("到", "到"); |
||||
|
STOP_WORDS.put("来", "来"); |
||||
|
STOP_WORDS.put("去", "去"); |
||||
|
STOP_WORDS.put("着", "着"); |
||||
|
STOP_WORDS.put("过", "过"); |
||||
|
} |
||||
|
|
||||
|
public static double cleanPrice(String price) { |
||||
|
if (price == null || price.isEmpty()) return 0.0; |
||||
|
String cleaned = price.replaceAll("[^0-9.]", ""); |
||||
|
try { |
||||
|
return Double.parseDouble(cleaned); |
||||
|
} catch (NumberFormatException e) { |
||||
|
return 0.0; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static int cleanRating(String ratingClass) { |
||||
|
if (ratingClass == null) return 0; |
||||
|
if (ratingClass.contains("Five")) return 5; |
||||
|
if (ratingClass.contains("Four")) return 4; |
||||
|
if (ratingClass.contains("Three")) return 3; |
||||
|
if (ratingClass.contains("Two")) return 2; |
||||
|
if (ratingClass.contains("One")) return 1; |
||||
|
return 0; |
||||
|
} |
||||
|
|
||||
|
public static LocalDateTime cleanNewsTime(String timeStr) { |
||||
|
if (timeStr == null || timeStr.isEmpty()) return LocalDateTime.now(); |
||||
|
try { |
||||
|
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); |
||||
|
return LocalDateTime.parse(timeStr, formatter); |
||||
|
} catch (Exception e) { |
||||
|
try { |
||||
|
DateTimeFormatter formatter2 = DateTimeFormatter.ofPattern("yyyy年MM月dd日 HH:mm"); |
||||
|
return LocalDateTime.parse(timeStr, formatter2); |
||||
|
} catch (Exception e2) { |
||||
|
return LocalDateTime.now(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static String cleanTitle(String title) { |
||||
|
if (title == null) return ""; |
||||
|
return title.trim().replaceAll("\\s+", " "); |
||||
|
} |
||||
|
|
||||
|
public static double cleanScore(String score) { |
||||
|
if (score == null || score.isEmpty()) return 0.0; |
||||
|
String cleaned = score.replaceAll("[^0-9.]", ""); |
||||
|
try { |
||||
|
return Double.parseDouble(cleaned); |
||||
|
} catch (NumberFormatException e) { |
||||
|
return 0.0; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static String[] extractWords(String text) { |
||||
|
if (text == null || text.isEmpty()) return new String[0]; |
||||
|
String cleaned = text.replaceAll("[^\u4e00-\u9fa5a-zA-Z0-9]", " "); |
||||
|
return cleaned.split("\\s+"); |
||||
|
} |
||||
|
|
||||
|
public static boolean isStopWord(String word) { |
||||
|
return word == null || word.length() < 2 || STOP_WORDS.containsKey(word); |
||||
|
} |
||||
|
|
||||
|
public static Map<String, Integer> countWordFrequency(String[] words) { |
||||
|
Map<String, Integer> frequency = new HashMap<>(); |
||||
|
for (String word : words) { |
||||
|
if (isStopWord(word)) continue; |
||||
|
frequency.put(word, frequency.getOrDefault(word, 0) + 1); |
||||
|
} |
||||
|
return frequency; |
||||
|
} |
||||
|
|
||||
|
public static int extractHour(LocalDateTime dateTime) { |
||||
|
return dateTime.getHour(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,126 @@ |
|||||
|
package com.example.crawler.util; |
||||
|
|
||||
|
import com.example.crawler.exception.NetworkException; |
||||
|
|
||||
|
import java.net.URI; |
||||
|
import java.net.http.HttpClient; |
||||
|
import java.net.http.HttpRequest; |
||||
|
import java.net.http.HttpResponse; |
||||
|
import java.time.Duration; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* HTTP工具类 |
||||
|
* 封装HTTP请求操作,使用Java 11内置HttpClient |
||||
|
*/ |
||||
|
public class HttpUtil { |
||||
|
|
||||
|
private static final HttpClient httpClient = HttpClient.newBuilder() |
||||
|
.connectTimeout(Duration.ofSeconds(30)) |
||||
|
.followRedirects(HttpClient.Redirect.NORMAL) |
||||
|
.build(); |
||||
|
|
||||
|
private static final String DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"; |
||||
|
|
||||
|
/** |
||||
|
* 发送GET请求 |
||||
|
* |
||||
|
* @param url 请求URL |
||||
|
* @return 响应内容 |
||||
|
* @throws NetworkException 网络异常 |
||||
|
*/ |
||||
|
public static String get(String url) throws NetworkException { |
||||
|
return get(url, Map.of()); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 发送GET请求(带请求头) |
||||
|
* |
||||
|
* @param url 请求URL |
||||
|
* @param headers 请求头 |
||||
|
* @return 响应内容 |
||||
|
* @throws NetworkException 网络异常 |
||||
|
*/ |
||||
|
public static String get(String url, Map<String, String> headers) throws NetworkException { |
||||
|
try { |
||||
|
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() |
||||
|
.uri(URI.create(url)) |
||||
|
.timeout(Duration.ofSeconds(30)) |
||||
|
.GET(); |
||||
|
|
||||
|
// 添加默认User-Agent
|
||||
|
if (!headers.containsKey("User-Agent")) { |
||||
|
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT); |
||||
|
} |
||||
|
|
||||
|
// 添加自定义请求头
|
||||
|
headers.forEach(requestBuilder::header); |
||||
|
|
||||
|
HttpRequest request = requestBuilder.build(); |
||||
|
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); |
||||
|
|
||||
|
if (response.statusCode() != 200) { |
||||
|
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode()); |
||||
|
} |
||||
|
|
||||
|
return response.body(); |
||||
|
} catch (NetworkException e) { |
||||
|
throw e; |
||||
|
} catch (Exception e) { |
||||
|
throw new NetworkException("网络请求失败: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 发送POST请求 |
||||
|
* |
||||
|
* @param url 请求URL |
||||
|
* @param body 请求体 |
||||
|
* @param headers 请求头 |
||||
|
* @return 响应内容 |
||||
|
* @throws NetworkException 网络异常 |
||||
|
*/ |
||||
|
public static String post(String url, String body, Map<String, String> headers) throws NetworkException { |
||||
|
try { |
||||
|
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() |
||||
|
.uri(URI.create(url)) |
||||
|
.timeout(Duration.ofSeconds(30)) |
||||
|
.header("Content-Type", "application/json") |
||||
|
.POST(HttpRequest.BodyPublishers.ofString(body)); |
||||
|
|
||||
|
// 添加默认User-Agent
|
||||
|
if (!headers.containsKey("User-Agent")) { |
||||
|
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT); |
||||
|
} |
||||
|
|
||||
|
// 添加自定义请求头
|
||||
|
headers.forEach(requestBuilder::header); |
||||
|
|
||||
|
HttpRequest request = requestBuilder.build(); |
||||
|
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); |
||||
|
|
||||
|
if (response.statusCode() != 200) { |
||||
|
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode()); |
||||
|
} |
||||
|
|
||||
|
return response.body(); |
||||
|
} catch (NetworkException e) { |
||||
|
throw e; |
||||
|
} catch (Exception e) { |
||||
|
throw new NetworkException("网络请求失败: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 设置请求间隔,避免对服务器造成压力 |
||||
|
* |
||||
|
* @param seconds 间隔秒数 |
||||
|
*/ |
||||
|
public static void sleep(int seconds) { |
||||
|
try { |
||||
|
Thread.sleep(seconds * 1000L); |
||||
|
} catch (InterruptedException e) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,95 @@ |
|||||
|
package com.example.crawler.util; |
||||
|
|
||||
|
import com.example.crawler.exception.DataSaveException; |
||||
|
import com.google.gson.Gson; |
||||
|
import com.google.gson.GsonBuilder; |
||||
|
|
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.nio.file.Files; |
||||
|
import java.nio.file.Path; |
||||
|
import java.nio.file.Paths; |
||||
|
import java.util.List; |
||||
|
|
||||
|
/** |
||||
|
* JSON工具类 |
||||
|
* 封装JSON序列化和文件读写操作 |
||||
|
*/ |
||||
|
public class JsonUtil { |
||||
|
|
||||
|
private static final Gson gson = new GsonBuilder() |
||||
|
.setPrettyPrinting() |
||||
|
.disableHtmlEscaping() |
||||
|
.create(); |
||||
|
|
||||
|
/** |
||||
|
* 将对象序列化为JSON字符串 |
||||
|
* |
||||
|
* @param obj 对象 |
||||
|
* @return JSON字符串 |
||||
|
*/ |
||||
|
public static String toJson(Object obj) { |
||||
|
return gson.toJson(obj); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 将JSON字符串反序列化为对象 |
||||
|
* |
||||
|
* @param json JSON字符串 |
||||
|
* @param classOfT 目标类 |
||||
|
* @param <T> 泛型类型 |
||||
|
* @return 反序列化后的对象 |
||||
|
*/ |
||||
|
public static <T> T fromJson(String json, Class<T> classOfT) { |
||||
|
return gson.fromJson(json, classOfT); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 将对象保存为JSON文件 |
||||
|
* |
||||
|
* @param obj 对象 |
||||
|
* @param filePath 文件路径 |
||||
|
* @throws DataSaveException 数据保存异常 |
||||
|
*/ |
||||
|
public static void saveToJsonFile(Object obj, String filePath) throws DataSaveException { |
||||
|
try { |
||||
|
// 确保目录存在
|
||||
|
Path path = Paths.get(filePath); |
||||
|
Path parentDir = path.getParent(); |
||||
|
if (parentDir != null && !Files.exists(parentDir)) { |
||||
|
Files.createDirectories(parentDir); |
||||
|
} |
||||
|
|
||||
|
try (FileWriter writer = new FileWriter(filePath)) { |
||||
|
gson.toJson(obj, writer); |
||||
|
} |
||||
|
} catch (IOException e) { |
||||
|
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 将列表保存为JSON文件 |
||||
|
* |
||||
|
* @param list 列表 |
||||
|
* @param filePath 文件路径 |
||||
|
* @param <T> 泛型类型 |
||||
|
* @throws DataSaveException 数据保存异常 |
||||
|
*/ |
||||
|
public static <T> void saveListToJsonFile(List<T> list, String filePath) throws DataSaveException { |
||||
|
try { |
||||
|
// 确保目录存在
|
||||
|
Path path = Paths.get(filePath); |
||||
|
Path parentDir = path.getParent(); |
||||
|
if (parentDir != null && !Files.exists(parentDir)) { |
||||
|
Files.createDirectories(parentDir); |
||||
|
} |
||||
|
|
||||
|
try (FileWriter writer = new FileWriter(filePath)) { |
||||
|
gson.toJson(list, writer); |
||||
|
} |
||||
|
} catch (IOException e) { |
||||
|
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,72 @@ |
|||||
|
package com.example.crawler.view; |
||||
|
|
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
/** |
||||
|
* 爬虫视图类 |
||||
|
* // MVC模式:View层,负责CLI界面显示和用户交互
|
||||
|
*/ |
||||
|
public class CrawlerView { |
||||
|
|
||||
|
/** |
||||
|
* 显示主菜单 |
||||
|
*/ |
||||
|
public void showMenu() { |
||||
|
System.out.println("\n=== 数据爬取与分析系统 ==="); |
||||
|
System.out.println("1. 爬取书籍信息(toscrape.com)"); |
||||
|
System.out.println("2. 爬取新浪国内新闻"); |
||||
|
System.out.println("3. 爬取软科中国大学排名"); |
||||
|
System.out.println("4. 爬取Open-Meteo实时天气"); |
||||
|
System.out.println("5. 爬取全部数据并保存"); |
||||
|
System.out.println("6. 保存当前数据到文件"); |
||||
|
System.out.println("7. 生成所有数据源的分析报告与图表"); |
||||
|
System.out.println("8. 爬取并分析所有数据(一键完成)"); |
||||
|
System.out.println("9. 退出"); |
||||
|
System.out.print("请选择操作:"); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取用户输入 |
||||
|
* |
||||
|
* @param scanner 输入扫描器 |
||||
|
* @return 用户选择的数字 |
||||
|
*/ |
||||
|
public int getInput(Scanner scanner) { |
||||
|
try { |
||||
|
String input = scanner.nextLine().trim(); |
||||
|
return Integer.parseInt(input); |
||||
|
} catch (NumberFormatException e) { |
||||
|
return -1; // 返回无效值
|
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 显示错误信息 |
||||
|
* |
||||
|
* @param message 错误信息 |
||||
|
*/ |
||||
|
public void showError(String message) { |
||||
|
System.err.println("错误: " + message); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 显示成功信息 |
||||
|
* |
||||
|
* @param message 成功信息 |
||||
|
*/ |
||||
|
public void showSuccess(String message) { |
||||
|
System.out.println("成功: " + message); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 暂停并等待用户按回车键继续 |
||||
|
* |
||||
|
* @param scanner 输入扫描器 |
||||
|
*/ |
||||
|
public void pause(Scanner scanner) { |
||||
|
System.out.print("\n按回车键继续..."); |
||||
|
scanner.nextLine(); |
||||
|
System.out.print("\033[H\033[2J"); |
||||
|
System.out.flush(); |
||||
|
} |
||||
|
} |
||||
@ -1,359 +0,0 @@ |
|||||
package com.university; |
|
||||
|
|
||||
import java.util.HashMap; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
import java.util.Optional; |
|
||||
import java.util.Scanner; |
|
||||
|
|
||||
import com.university.analysis.RankAnalyzer; |
|
||||
import com.university.crawler.UniversityRankCrawler; |
|
||||
import com.university.model.RankChange; |
|
||||
import com.university.model.University; |
|
||||
import com.university.model.UniversityComparison; |
|
||||
import com.university.storage.DataStorage; |
|
||||
import com.university.visualization.ChartGenerator; |
|
||||
import com.university.visualization.ConsoleReporter; |
|
||||
|
|
||||
/** |
|
||||
* 主程序入口 |
|
||||
* 整合所有模块,提供交互式菜单 |
|
||||
*/ |
|
||||
public class Main { |
|
||||
|
|
||||
// 核心组件
|
|
||||
private final UniversityRankCrawler crawler; |
|
||||
private final DataStorage storage; |
|
||||
private final RankAnalyzer analyzer; |
|
||||
private final ChartGenerator chartGenerator; |
|
||||
private final ConsoleReporter reporter; |
|
||||
|
|
||||
// 数据缓存
|
|
||||
private Map<Integer, List<University>> dataCache; |
|
||||
private Scanner scanner; |
|
||||
|
|
||||
public Main() { |
|
||||
this.crawler = new UniversityRankCrawler(); |
|
||||
this.storage = new DataStorage(); |
|
||||
this.analyzer = new RankAnalyzer(); |
|
||||
this.chartGenerator = new ChartGenerator(); |
|
||||
this.reporter = new ConsoleReporter(); |
|
||||
this.dataCache = new HashMap<>(); |
|
||||
this.scanner = new Scanner(System.in); |
|
||||
} |
|
||||
|
|
||||
public static void main(String[] args) { |
|
||||
Main app = new Main(); |
|
||||
app.run(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 运行主程序 |
|
||||
*/ |
|
||||
public void run() { |
|
||||
// 打印欢迎信息
|
|
||||
reporter.printWelcome(); |
|
||||
|
|
||||
// 初始化数据
|
|
||||
initializeData(); |
|
||||
|
|
||||
// 主循环
|
|
||||
boolean running = true; |
|
||||
while (running) { |
|
||||
reporter.printMenu(); |
|
||||
String choice = scanner.nextLine().trim(); |
|
||||
|
|
||||
switch (choice) { |
|
||||
case "1": |
|
||||
showTopN(); |
|
||||
break; |
|
||||
case "2": |
|
||||
showByProvince(); |
|
||||
break; |
|
||||
case "3": |
|
||||
searchUniversity(); |
|
||||
break; |
|
||||
case "4": |
|
||||
showProvinceStatistics(); |
|
||||
break; |
|
||||
case "5": |
|
||||
showScoreStatistics(); |
|
||||
break; |
|
||||
case "6": |
|
||||
showRankChanges(); |
|
||||
break; |
|
||||
case "7": |
|
||||
compareUniversities(); |
|
||||
break; |
|
||||
case "8": |
|
||||
showYearlyTrend(); |
|
||||
break; |
|
||||
case "9": |
|
||||
generateAllCharts(); |
|
||||
break; |
|
||||
case "0": |
|
||||
running = false; |
|
||||
System.out.println("感谢使用,再见!"); |
|
||||
break; |
|
||||
default: |
|
||||
System.out.println("无效选择,请重新输入!"); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
scanner.close(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 初始化数据 |
|
||||
*/ |
|
||||
private void initializeData() { |
|
||||
System.out.println("正在初始化数据..."); |
|
||||
|
|
||||
// 爬取2022-2024年的数据
|
|
||||
int[] years = {2022, 2023, 2024}; |
|
||||
|
|
||||
for (int year : years) { |
|
||||
List<University> data; |
|
||||
|
|
||||
// 先尝试从文件读取
|
|
||||
if (storage.dataExists(year)) { |
|
||||
System.out.println("从文件加载 " + year + " 年数据..."); |
|
||||
data = storage.readRawData(year); |
|
||||
} else { |
|
||||
// 文件不存在则爬取
|
|
||||
System.out.println("爬取 " + year + " 年数据..."); |
|
||||
data = crawler.crawlRankings(year); |
|
||||
// 保存到文件
|
|
||||
storage.saveRawData(data, year); |
|
||||
} |
|
||||
|
|
||||
dataCache.put(year, data); |
|
||||
} |
|
||||
|
|
||||
System.out.println("数据初始化完成!\n"); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 显示Top N |
|
||||
*/ |
|
||||
private void showTopN() { |
|
||||
System.out.print("请输入要查看的年份(2022-2024): "); |
|
||||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|
||||
|
|
||||
System.out.print("请输入要查看的数量: "); |
|
||||
int n = Integer.parseInt(scanner.nextLine().trim()); |
|
||||
|
|
||||
List<University> data = dataCache.get(year); |
|
||||
if (data == null) { |
|
||||
System.out.println("该年份数据不存在!"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
List<University> topN = analyzer.getTopN(data, n); |
|
||||
reporter.printUniversityList(topN, year + "年 Top " + n + " 高校"); |
|
||||
|
|
||||
// 生成图表
|
|
||||
chartGenerator.generateTopNBarChart(data, year, n); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 按省份查看 |
|
||||
*/ |
|
||||
private void showByProvince() { |
|
||||
System.out.print("请输入要查看的年份(2022-2024): "); |
|
||||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|
||||
|
|
||||
System.out.print("请输入省份名称: "); |
|
||||
String province = scanner.nextLine().trim(); |
|
||||
|
|
||||
List<University> data = dataCache.get(year); |
|
||||
if (data == null) { |
|
||||
System.out.println("该年份数据不存在!"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
List<University> result = analyzer.getByProvince(data, province); |
|
||||
if (result.isEmpty()) { |
|
||||
System.out.println("该省份没有高校数据!"); |
|
||||
} else { |
|
||||
reporter.printUniversityList(result, year + "年 " + province + " 高校"); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 搜索高校 |
|
||||
*/ |
|
||||
private void searchUniversity() { |
|
||||
System.out.print("请输入要查看的年份(2022-2024): "); |
|
||||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|
||||
|
|
||||
System.out.print("请输入搜索关键词: "); |
|
||||
String keyword = scanner.nextLine().trim(); |
|
||||
|
|
||||
List<University> data = dataCache.get(year); |
|
||||
if (data == null) { |
|
||||
System.out.println("该年份数据不存在!"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
List<University> result = analyzer.searchUniversity(data, keyword); |
|
||||
if (result.isEmpty()) { |
|
||||
System.out.println("未找到匹配的高校!"); |
|
||||
} else { |
|
||||
reporter.printUniversityList(result, "搜索结果"); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 显示省份统计 |
|
||||
*/ |
|
||||
private void showProvinceStatistics() { |
|
||||
System.out.print("请输入要查看的年份(2022-2024): "); |
|
||||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|
||||
|
|
||||
List<University> data = dataCache.get(year); |
|
||||
if (data == null) { |
|
||||
System.out.println("该年份数据不存在!"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
Map<String, Long> provinceCount = analyzer.countByProvince(data); |
|
||||
reporter.printProvinceStatistics(provinceCount, year + "年 省份分布统计"); |
|
||||
|
|
||||
// 生成图表
|
|
||||
chartGenerator.generateProvincePieChart(provinceCount, year); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 显示分数统计 |
|
||||
*/ |
|
||||
private void showScoreStatistics() { |
|
||||
System.out.print("请输入要查看的年份(2022-2024): "); |
|
||||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|
||||
|
|
||||
List<University> data = dataCache.get(year); |
|
||||
if (data == null) { |
|
||||
System.out.println("该年份数据不存在!"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
RankAnalyzer.ScoreStatistics stats = analyzer.getScoreStatistics(data); |
|
||||
reporter.printScoreStatistics(stats, year + "年 分数统计"); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 显示排名变化 |
|
||||
*/ |
|
||||
private void showRankChanges() { |
|
||||
List<RankChange> changes = analyzer.calculateRankChanges(dataCache); |
|
||||
|
|
||||
// 显示上升最快
|
|
||||
List<RankChange> rising = analyzer.getFastestRising(changes, 5); |
|
||||
reporter.printRankChanges(rising, "排名上升最快 Top 5"); |
|
||||
|
|
||||
// 显示下降最快
|
|
||||
List<RankChange> falling = analyzer.getFastestFalling(changes, 5); |
|
||||
reporter.printRankChanges(falling, "排名下降最快 Top 5"); |
|
||||
|
|
||||
// 生成图表
|
|
||||
if (!rising.isEmpty()) { |
|
||||
chartGenerator.generateRankChangeChart(rising, "排名上升最快", "rank_rising.png"); |
|
||||
} |
|
||||
if (!falling.isEmpty()) { |
|
||||
chartGenerator.generateRankChangeChart(falling, "排名下降最快", "rank_falling.png"); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 对比两所高校 |
|
||||
*/ |
|
||||
private void compareUniversities() { |
|
||||
System.out.print("请输入要查看的年份(2022-2024): "); |
|
||||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|
||||
|
|
||||
System.out.print("请输入第一所高校名称: "); |
|
||||
String name1 = scanner.nextLine().trim(); |
|
||||
|
|
||||
System.out.print("请输入第二所高校名称: "); |
|
||||
String name2 = scanner.nextLine().trim(); |
|
||||
|
|
||||
List<University> data = dataCache.get(year); |
|
||||
if (data == null) { |
|
||||
System.out.println("该年份数据不存在!"); |
|
||||
return; |
|
||||
} |
|
||||
|
|
||||
Optional<University> u1 = data.stream() |
|
||||
.filter(u -> u.getName().equals(name1)) |
|
||||
.findFirst(); |
|
||||
Optional<University> u2 = data.stream() |
|
||||
.filter(u -> u.getName().equals(name2)) |
|
||||
.findFirst(); |
|
||||
|
|
||||
if (u1.isPresent() && u2.isPresent()) { |
|
||||
UniversityComparison comparison = analyzer.compareUniversities(u1.get(), u2.get()); |
|
||||
reporter.printComparison(comparison); |
|
||||
} else { |
|
||||
System.out.println("未找到指定的高校!"); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 显示某高校历年趋势 |
|
||||
*/ |
|
||||
private void showYearlyTrend() { |
|
||||
System.out.print("请输入高校名称: "); |
|
||||
String name = scanner.nextLine().trim(); |
|
||||
|
|
||||
List<University> history = analyzer.getUniversityHistory(dataCache, name); |
|
||||
|
|
||||
if (history.isEmpty()) { |
|
||||
System.out.println("未找到该高校的数据!"); |
|
||||
} else { |
|
||||
reporter.printYearlyTrend(history, name); |
|
||||
chartGenerator.generateRankTrendLineChart(history, name); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 生成所有图表 |
|
||||
*/ |
|
||||
private void generateAllCharts() { |
|
||||
System.out.println("正在生成所有图表..."); |
|
||||
|
|
||||
for (Map.Entry<Integer, List<University>> entry : dataCache.entrySet()) { |
|
||||
int year = entry.getKey(); |
|
||||
List<University> data = entry.getValue(); |
|
||||
|
|
||||
// Top 10 柱状图
|
|
||||
chartGenerator.generateTopNBarChart(data, year, 10); |
|
||||
|
|
||||
// 省份分布饼图
|
|
||||
Map<String, Long> provinceCount = analyzer.countByProvince(data); |
|
||||
chartGenerator.generateProvincePieChart(provinceCount, year); |
|
||||
} |
|
||||
|
|
||||
// 排名变化图
|
|
||||
List<RankChange> changes = analyzer.calculateRankChanges(dataCache); |
|
||||
List<RankChange> rising = analyzer.getFastestRising(changes, 10); |
|
||||
List<RankChange> falling = analyzer.getFastestFalling(changes, 10); |
|
||||
|
|
||||
if (!rising.isEmpty()) { |
|
||||
chartGenerator.generateRankChangeChart(rising, "排名上升最快", "rank_rising.png"); |
|
||||
} |
|
||||
if (!falling.isEmpty()) { |
|
||||
chartGenerator.generateRankChangeChart(falling, "排名下降最快", "rank_falling.png"); |
|
||||
} |
|
||||
|
|
||||
// 为Top 5高校生成历年趋势折线图
|
|
||||
List<University> topUniversities = analyzer.getTopN(dataCache.get(2024), 5); |
|
||||
for (University u : topUniversities) { |
|
||||
List<University> history = analyzer.getUniversityHistory(dataCache, u.getName()); |
|
||||
if (!history.isEmpty()) { |
|
||||
chartGenerator.generateRankTrendLineChart(history, u.getName()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
System.out.println("所有图表生成完成!\n"); |
|
||||
} |
|
||||
} |
|
||||
@ -1,250 +0,0 @@ |
|||||
package com.university.analysis; |
|
||||
|
|
||||
import com.university.model.RankChange; |
|
||||
import com.university.model.University; |
|
||||
import com.university.model.UniversityComparison; |
|
||||
|
|
||||
import java.util.*; |
|
||||
import java.util.stream.Collectors; |
|
||||
|
|
||||
/** |
|
||||
* 排名分析类 |
|
||||
* 提供各种数据分析功能 |
|
||||
*/ |
|
||||
public class RankAnalyzer { |
|
||||
|
|
||||
/** |
|
||||
* 获取Top N高校 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @param n 数量 |
|
||||
* @return Top N高校列表 |
|
||||
*/ |
|
||||
public List<University> getTopN(List<University> universities, int n) { |
|
||||
return universities.stream() |
|
||||
.sorted(Comparator.comparingInt(University::getRank)) |
|
||||
.limit(n) |
|
||||
.collect(Collectors.toList()); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 按省份统计高校数量 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @return 省份-数量映射 |
|
||||
*/ |
|
||||
public Map<String, Long> countByProvince(List<University> universities) { |
|
||||
return universities.stream() |
|
||||
.collect(Collectors.groupingBy( |
|
||||
University::getProvince, |
|
||||
Collectors.counting() |
|
||||
)); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 按省份统计平均分 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @return 省份-平均分映射 |
|
||||
*/ |
|
||||
public Map<String, Double> averageScoreByProvince(List<University> universities) { |
|
||||
return universities.stream() |
|
||||
.collect(Collectors.groupingBy( |
|
||||
University::getProvince, |
|
||||
Collectors.averagingDouble(University::getScore) |
|
||||
)); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 获取指定省份的高校 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @param province 省份 |
|
||||
* @return 该省份的高校列表 |
|
||||
*/ |
|
||||
public List<University> getByProvince(List<University> universities, String province) { |
|
||||
return universities.stream() |
|
||||
.filter(u -> u.getProvince().equals(province)) |
|
||||
.sorted(Comparator.comparingInt(University::getRank)) |
|
||||
.collect(Collectors.toList()); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 搜索高校 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @param keyword 关键词 |
|
||||
* @return 匹配的高校列表 |
|
||||
*/ |
|
||||
public List<University> searchUniversity(List<University> universities, String keyword) { |
|
||||
return universities.stream() |
|
||||
.filter(u -> u.getName().contains(keyword)) |
|
||||
.collect(Collectors.toList()); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 获取分数统计信息 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @return 统计信息 |
|
||||
*/ |
|
||||
public ScoreStatistics getScoreStatistics(List<University> universities) { |
|
||||
DoubleSummaryStatistics stats = universities.stream() |
|
||||
.mapToDouble(University::getScore) |
|
||||
.summaryStatistics(); |
|
||||
|
|
||||
return new ScoreStatistics( |
|
||||
stats.getCount(), |
|
||||
stats.getSum(), |
|
||||
stats.getAverage(), |
|
||||
stats.getMax(), |
|
||||
stats.getMin() |
|
||||
); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 计算历年排名变化 |
|
||||
* |
|
||||
* @param dataMap 多年数据映射(年份->高校列表) |
|
||||
* @return 排名变化列表 |
|
||||
*/ |
|
||||
public List<RankChange> calculateRankChanges(Map<Integer, List<University>> dataMap) { |
|
||||
List<RankChange> changes = new ArrayList<>(); |
|
||||
|
|
||||
// 获取所有年份并排序
|
|
||||
List<Integer> years = new ArrayList<>(dataMap.keySet()); |
|
||||
Collections.sort(years); |
|
||||
|
|
||||
if (years.size() < 2) { |
|
||||
return changes; |
|
||||
} |
|
||||
|
|
||||
int startYear = years.get(0); |
|
||||
int endYear = years.get(years.size() - 1); |
|
||||
|
|
||||
List<University> startData = dataMap.get(startYear); |
|
||||
List<University> endData = dataMap.get(endYear); |
|
||||
|
|
||||
// 创建名称到高校的映射
|
|
||||
Map<String, University> startMap = startData.stream() |
|
||||
.collect(Collectors.toMap(University::getName, u -> u)); |
|
||||
Map<String, University> endMap = endData.stream() |
|
||||
.collect(Collectors.toMap(University::getName, u -> u)); |
|
||||
|
|
||||
// 计算每所高校的变化
|
|
||||
for (String name : startMap.keySet()) { |
|
||||
if (endMap.containsKey(name)) { |
|
||||
University startUni = startMap.get(name); |
|
||||
University endUni = endMap.get(name); |
|
||||
|
|
||||
RankChange change = new RankChange( |
|
||||
name, |
|
||||
startYear, |
|
||||
endYear, |
|
||||
startUni.getRank(), |
|
||||
endUni.getRank(), |
|
||||
startUni.getScore(), |
|
||||
endUni.getScore() |
|
||||
); |
|
||||
changes.add(change); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
return changes; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 获取排名上升最快的高校 |
|
||||
* |
|
||||
* @param changes 排名变化列表 |
|
||||
* @param n 数量 |
|
||||
* @return 上升最快的高校列表 |
|
||||
*/ |
|
||||
public List<RankChange> getFastestRising(List<RankChange> changes, int n) { |
|
||||
return changes.stream() |
|
||||
.filter(c -> c.getRankChange() > 0) // 只取排名上升的
|
|
||||
.sorted(Comparator.comparingInt(RankChange::getRankChange).reversed()) |
|
||||
.limit(n) |
|
||||
.collect(Collectors.toList()); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 获取排名下降最快的高校 |
|
||||
* |
|
||||
* @param changes 排名变化列表 |
|
||||
* @param n 数量 |
|
||||
* @return 下降最快的高校列表 |
|
||||
*/ |
|
||||
public List<RankChange> getFastestFalling(List<RankChange> changes, int n) { |
|
||||
return changes.stream() |
|
||||
.filter(c -> c.getRankChange() < 0) // 只取排名下降的
|
|
||||
.sorted(Comparator.comparingInt(RankChange::getRankChange)) |
|
||||
.limit(n) |
|
||||
.collect(Collectors.toList()); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 对比两所高校 |
|
||||
* |
|
||||
* @param u1 高校1 |
|
||||
* @param u2 高校2 |
|
||||
* @return 对比结果 |
|
||||
*/ |
|
||||
public UniversityComparison compareUniversities(University u1, University u2) { |
|
||||
return new UniversityComparison(u1, u2); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 获取某高校在多年数据中的信息 |
|
||||
* |
|
||||
* @param dataMap 多年数据映射 |
|
||||
* @param universityName 高校名称 |
|
||||
* @return 该高校历年的信息列表 |
|
||||
*/ |
|
||||
public List<University> getUniversityHistory(Map<Integer, List<University>> dataMap, |
|
||||
String universityName) { |
|
||||
List<University> history = new ArrayList<>(); |
|
||||
|
|
||||
for (List<University> yearData : dataMap.values()) { |
|
||||
yearData.stream() |
|
||||
.filter(u -> u.getName().equals(universityName)) |
|
||||
.findFirst() |
|
||||
.ifPresent(history::add); |
|
||||
} |
|
||||
|
|
||||
// 按年份排序
|
|
||||
history.sort(Comparator.comparingInt(University::getYear)); |
|
||||
return history; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 分数统计信息内部类 |
|
||||
*/ |
|
||||
public static class ScoreStatistics { |
|
||||
private final long count; |
|
||||
private final double sum; |
|
||||
private final double average; |
|
||||
private final double max; |
|
||||
private final double min; |
|
||||
|
|
||||
public ScoreStatistics(long count, double sum, double average, double max, double min) { |
|
||||
this.count = count; |
|
||||
this.sum = sum; |
|
||||
this.average = average; |
|
||||
this.max = max; |
|
||||
this.min = min; |
|
||||
} |
|
||||
|
|
||||
public long getCount() { return count; } |
|
||||
public double getSum() { return sum; } |
|
||||
public double getAverage() { return average; } |
|
||||
public double getMax() { return max; } |
|
||||
public double getMin() { return min; } |
|
||||
|
|
||||
@Override |
|
||||
public String toString() { |
|
||||
return String.format("统计信息: 数量=%d, 平均分=%.2f, 最高分=%.2f, 最低分=%.2f", |
|
||||
count, average, max, min); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,153 +0,0 @@ |
|||||
package com.university.crawler; |
|
||||
|
|
||||
import java.io.IOException; |
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
|
|
||||
import org.jsoup.Jsoup; |
|
||||
import org.jsoup.nodes.Document; |
|
||||
import org.jsoup.nodes.Element; |
|
||||
import org.jsoup.select.Elements; |
|
||||
|
|
||||
import com.university.model.University; |
|
||||
|
|
||||
/** |
|
||||
* 高校排名爬虫类 |
|
||||
* 负责从网页抓取高校排名数据 |
|
||||
*/ |
|
||||
public class UniversityRankCrawler { |
|
||||
|
|
||||
// 请求间隔时间(毫秒),防止请求过快被封
|
|
||||
private static final int REQUEST_DELAY = 1000; |
|
||||
|
|
||||
/** |
|
||||
* 爬取软科中国大学排名数据 |
|
||||
* 分析软科官网HTML结构,提取真实排名数据 |
|
||||
* |
|
||||
* @param year 年份 |
|
||||
* @return 高校列表 |
|
||||
*/ |
|
||||
public List<University> crawlRankings(int year) { |
|
||||
List<University> universities = new ArrayList<>(); |
|
||||
|
|
||||
try { |
|
||||
// 软科排名URL
|
|
||||
String url = "https://www.shanghairanking.cn/rankings/bcur/" + year; |
|
||||
|
|
||||
System.out.println("正在爬取 " + year + " 年高校排名数据..."); |
|
||||
|
|
||||
// 发送HTTP请求获取网页内容
|
|
||||
Document doc = Jsoup.connect(url) |
|
||||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") |
|
||||
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") |
|
||||
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") |
|
||||
.timeout(15000) |
|
||||
.get(); |
|
||||
|
|
||||
// 分析HTML结构,提取排名数据
|
|
||||
// 找到排名表格
|
|
||||
Elements rows = doc.select("table.rk-table tbody tr"); |
|
||||
|
|
||||
for (Element row : rows) { |
|
||||
Elements cells = row.select("td"); |
|
||||
if (cells.size() >= 5) { |
|
||||
try { |
|
||||
// 提取排名
|
|
||||
String rankText = cells.get(0).text().trim(); |
|
||||
rankText = rankText.replaceAll("[^0-9]", ""); |
|
||||
if (rankText.isEmpty()) continue; |
|
||||
int rank = Integer.parseInt(rankText); |
|
||||
|
|
||||
// 提取学校名称
|
|
||||
String name = cells.get(1).text().trim(); |
|
||||
|
|
||||
// 提取省份
|
|
||||
String province = cells.get(2).text().trim(); |
|
||||
|
|
||||
// 提取总分
|
|
||||
String scoreText = cells.get(4).text().trim(); |
|
||||
scoreText = scoreText.replaceAll("[^0-9.]", ""); |
|
||||
if (scoreText.isEmpty()) continue; |
|
||||
double score = Double.parseDouble(scoreText); |
|
||||
|
|
||||
// 创建高校对象
|
|
||||
University university = new University(rank, name, province, score, year); |
|
||||
universities.add(university); |
|
||||
|
|
||||
// 限制爬取数量,避免请求过多
|
|
||||
if (universities.size() >= 100) break; |
|
||||
} catch (NumberFormatException e) { |
|
||||
// 跳过解析失败的行
|
|
||||
continue; |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
// 请求间隔,避免被封
|
|
||||
Thread.sleep(REQUEST_DELAY); |
|
||||
|
|
||||
} catch (IOException e) { |
|
||||
System.err.println("爬取数据失败: " + e.getMessage()); |
|
||||
System.out.println("将使用模拟数据..."); |
|
||||
// 如果爬取失败,使用模拟数据
|
|
||||
universities = generateMockData(year); |
|
||||
} catch (InterruptedException e) { |
|
||||
Thread.currentThread().interrupt(); |
|
||||
} |
|
||||
|
|
||||
System.out.println("成功获取 " + universities.size() + " 条数据"); |
|
||||
return universities; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 爬取多年数据 |
|
||||
* |
|
||||
* @param startYear 开始年份 |
|
||||
* @param endYear 结束年份 |
|
||||
* @return 多年数据集合 |
|
||||
*/ |
|
||||
public List<List<University>> crawlMultipleYears(int startYear, int endYear) { |
|
||||
List<List<University>> allData = new ArrayList<>(); |
|
||||
|
|
||||
for (int year = startYear; year <= endYear; year++) { |
|
||||
List<University> yearData = crawlRankings(year); |
|
||||
allData.add(yearData); |
|
||||
} |
|
||||
|
|
||||
return allData; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 生成模拟数据(用于演示) |
|
||||
* 当真实网站无法访问时使用 |
|
||||
*/ |
|
||||
private List<University> generateMockData(int year) { |
|
||||
List<University> mockData = new ArrayList<>(); |
|
||||
|
|
||||
// 基础数据,每年的分数略有变化
|
|
||||
double variation = (year - 2022) * 0.5; |
|
||||
|
|
||||
mockData.add(new University(1, "清华大学", "北京", 852.5 + variation, year)); |
|
||||
mockData.add(new University(2, "北京大学", "北京", 848.2 + variation, year)); |
|
||||
mockData.add(new University(3, "浙江大学", "浙江", 822.5 + variation, year)); |
|
||||
mockData.add(new University(4, "上海交通大学", "上海", 815.3 + variation, year)); |
|
||||
mockData.add(new University(5, "复旦大学", "上海", 805.1 + variation, year)); |
|
||||
mockData.add(new University(6, "南京大学", "江苏", 785.6 + variation, year)); |
|
||||
mockData.add(new University(7, "中国科学技术大学", "安徽", 782.4 + variation, year)); |
|
||||
mockData.add(new University(8, "华中科技大学", "湖北", 765.8 + variation, year)); |
|
||||
mockData.add(new University(9, "武汉大学", "湖北", 758.2 + variation, year)); |
|
||||
mockData.add(new University(10, "西安交通大学", "陕西", 752.6 + variation, year)); |
|
||||
mockData.add(new University(11, "中山大学", "广东", 745.3 + variation, year)); |
|
||||
mockData.add(new University(12, "四川大学", "四川", 738.9 + variation, year)); |
|
||||
mockData.add(new University(13, "哈尔滨工业大学", "黑龙江", 732.5 + variation, year)); |
|
||||
mockData.add(new University(14, "北京航空航天大学", "北京", 725.8 + variation, year)); |
|
||||
mockData.add(new University(15, "东南大学", "江苏", 718.4 + variation, year)); |
|
||||
mockData.add(new University(16, "北京理工大学", "北京", 712.6 + variation, year)); |
|
||||
mockData.add(new University(17, "同济大学", "上海", 705.3 + variation, year)); |
|
||||
mockData.add(new University(18, "中国人民大学", "北京", 698.5 + variation, year)); |
|
||||
mockData.add(new University(19, "北京师范大学", "北京", 692.1 + variation, year)); |
|
||||
mockData.add(new University(20, "南开大学", "天津", 685.7 + variation, year)); |
|
||||
|
|
||||
return mockData; |
|
||||
} |
|
||||
} |
|
||||
@ -1,145 +0,0 @@ |
|||||
package com.university.model; |
|
||||
|
|
||||
/** |
|
||||
* 排名变化实体类 |
|
||||
* 用于存储高校历年排名变化信息 |
|
||||
*/ |
|
||||
public class RankChange { |
|
||||
|
|
||||
// 学校名称
|
|
||||
private String universityName; |
|
||||
|
|
||||
// 起始年份
|
|
||||
private int startYear; |
|
||||
|
|
||||
// 结束年份
|
|
||||
private int endYear; |
|
||||
|
|
||||
// 起始排名
|
|
||||
private int startRank; |
|
||||
|
|
||||
// 结束排名
|
|
||||
private int endRank; |
|
||||
|
|
||||
// 排名变化(正数表示上升,负数表示下降)
|
|
||||
private int rankChange; |
|
||||
|
|
||||
// 起始分数
|
|
||||
private double startScore; |
|
||||
|
|
||||
// 结束分数
|
|
||||
private double endScore; |
|
||||
|
|
||||
// 分数变化
|
|
||||
private double scoreChange; |
|
||||
|
|
||||
public RankChange() { |
|
||||
} |
|
||||
|
|
||||
public RankChange(String universityName, int startYear, int endYear, |
|
||||
int startRank, int endRank, double startScore, double endScore) { |
|
||||
this.universityName = universityName; |
|
||||
this.startYear = startYear; |
|
||||
this.endYear = endYear; |
|
||||
this.startRank = startRank; |
|
||||
this.endRank = endRank; |
|
||||
this.startScore = startScore; |
|
||||
this.endScore = endScore; |
|
||||
|
|
||||
// 计算变化
|
|
||||
this.rankChange = startRank - endRank; // 排名数字变小表示上升
|
|
||||
this.scoreChange = endScore - startScore; |
|
||||
} |
|
||||
|
|
||||
// Getters and Setters
|
|
||||
public String getUniversityName() { |
|
||||
return universityName; |
|
||||
} |
|
||||
|
|
||||
public void setUniversityName(String universityName) { |
|
||||
this.universityName = universityName; |
|
||||
} |
|
||||
|
|
||||
public int getStartYear() { |
|
||||
return startYear; |
|
||||
} |
|
||||
|
|
||||
public void setStartYear(int startYear) { |
|
||||
this.startYear = startYear; |
|
||||
} |
|
||||
|
|
||||
public int getEndYear() { |
|
||||
return endYear; |
|
||||
} |
|
||||
|
|
||||
public void setEndYear(int endYear) { |
|
||||
this.endYear = endYear; |
|
||||
} |
|
||||
|
|
||||
public int getStartRank() { |
|
||||
return startRank; |
|
||||
} |
|
||||
|
|
||||
public void setStartRank(int startRank) { |
|
||||
this.startRank = startRank; |
|
||||
} |
|
||||
|
|
||||
public int getEndRank() { |
|
||||
return endRank; |
|
||||
} |
|
||||
|
|
||||
public void setEndRank(int endRank) { |
|
||||
this.endRank = endRank; |
|
||||
} |
|
||||
|
|
||||
public int getRankChange() { |
|
||||
return rankChange; |
|
||||
} |
|
||||
|
|
||||
public void setRankChange(int rankChange) { |
|
||||
this.rankChange = rankChange; |
|
||||
} |
|
||||
|
|
||||
public double getStartScore() { |
|
||||
return startScore; |
|
||||
} |
|
||||
|
|
||||
public void setStartScore(double startScore) { |
|
||||
this.startScore = startScore; |
|
||||
} |
|
||||
|
|
||||
public double getEndScore() { |
|
||||
return endScore; |
|
||||
} |
|
||||
|
|
||||
public void setEndScore(double endScore) { |
|
||||
this.endScore = endScore; |
|
||||
} |
|
||||
|
|
||||
public double getScoreChange() { |
|
||||
return scoreChange; |
|
||||
} |
|
||||
|
|
||||
public void setScoreChange(double scoreChange) { |
|
||||
this.scoreChange = scoreChange; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 获取变化趋势描述 |
|
||||
*/ |
|
||||
public String getTrendDescription() { |
|
||||
if (rankChange > 0) { |
|
||||
return String.format("上升%d位", rankChange); |
|
||||
} else if (rankChange < 0) { |
|
||||
return String.format("下降%d位", Math.abs(rankChange)); |
|
||||
} else { |
|
||||
return "排名不变"; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String toString() { |
|
||||
return String.format("%s: %d年(第%d名) -> %d年(第%d名), %s", |
|
||||
universityName, startYear, startRank, endYear, endRank, getTrendDescription()); |
|
||||
} |
|
||||
} |
|
||||
@ -1,120 +0,0 @@ |
|||||
package com.university.model; |
|
||||
|
|
||||
import java.util.Objects; |
|
||||
|
|
||||
/** |
|
||||
* 高校实体类 (Java Bean) |
|
||||
* 用于封装高校排名数据 |
|
||||
*/ |
|
||||
public class University { |
|
||||
|
|
||||
// 排名
|
|
||||
private int rank; |
|
||||
|
|
||||
// 学校名称
|
|
||||
private String name; |
|
||||
|
|
||||
// 所在省份
|
|
||||
private String province; |
|
||||
|
|
||||
// 总分
|
|
||||
private double score; |
|
||||
|
|
||||
// 年份
|
|
||||
private int year; |
|
||||
|
|
||||
// 无参构造方法(必须,用于反射创建对象)
|
|
||||
public University() { |
|
||||
} |
|
||||
|
|
||||
// 全参构造方法
|
|
||||
public University(int rank, String name, String province, double score, int year) { |
|
||||
this.rank = rank; |
|
||||
this.name = name; |
|
||||
this.province = province; |
|
||||
this.score = score; |
|
||||
this.year = year; |
|
||||
} |
|
||||
|
|
||||
// Getter和Setter方法
|
|
||||
public int getRank() { |
|
||||
return rank; |
|
||||
} |
|
||||
|
|
||||
public void setRank(int rank) { |
|
||||
this.rank = rank; |
|
||||
} |
|
||||
|
|
||||
public String getName() { |
|
||||
return name; |
|
||||
} |
|
||||
|
|
||||
public void setName(String name) { |
|
||||
this.name = name; |
|
||||
} |
|
||||
|
|
||||
public String getProvince() { |
|
||||
return province; |
|
||||
} |
|
||||
|
|
||||
public void setProvince(String province) { |
|
||||
this.province = province; |
|
||||
} |
|
||||
|
|
||||
public double getScore() { |
|
||||
return score; |
|
||||
} |
|
||||
|
|
||||
public void setScore(double score) { |
|
||||
this.score = score; |
|
||||
} |
|
||||
|
|
||||
public int getYear() { |
|
||||
return year; |
|
||||
} |
|
||||
|
|
||||
public void setYear(int year) { |
|
||||
this.year = year; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 计算排名变化 |
|
||||
* @param previousRank 往年排名 |
|
||||
* @return 排名变化(正数表示上升,负数表示下降) |
|
||||
*/ |
|
||||
public int calculateRankChange(int previousRank) { |
|
||||
return previousRank - this.rank; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 计算分数变化 |
|
||||
* @param previousScore 往年分数 |
|
||||
* @return 分数变化 |
|
||||
*/ |
|
||||
public double calculateScoreChange(double previousScore) { |
|
||||
return this.score - previousScore; |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String toString() { |
|
||||
return String.format("University{rank=%d, name='%s', province='%s', score=%.2f, year=%d}", |
|
||||
rank, name, province, score, year); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public boolean equals(Object o) { |
|
||||
if (this == o) return true; |
|
||||
if (o == null || getClass() != o.getClass()) return false; |
|
||||
University that = (University) o; |
|
||||
return rank == that.rank && |
|
||||
Double.compare(that.score, score) == 0 && |
|
||||
year == that.year && |
|
||||
Objects.equals(name, that.name) && |
|
||||
Objects.equals(province, that.province); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public int hashCode() { |
|
||||
return Objects.hash(rank, name, province, score, year); |
|
||||
} |
|
||||
} |
|
||||
@ -1,171 +0,0 @@ |
|||||
package com.university.model; |
|
||||
|
|
||||
/** |
|
||||
* 高校对比实体类 |
|
||||
* 用于存储两所高校的对比信息 |
|
||||
*/ |
|
||||
public class UniversityComparison { |
|
||||
|
|
||||
// 第一所高校
|
|
||||
private String universityName1; |
|
||||
|
|
||||
// 第二所高校
|
|
||||
private String universityName2; |
|
||||
|
|
||||
// 年份
|
|
||||
private int year; |
|
||||
|
|
||||
// 高校1排名
|
|
||||
private int rank1; |
|
||||
|
|
||||
// 高校2排名
|
|
||||
private int rank2; |
|
||||
|
|
||||
// 高校1分数
|
|
||||
private double score1; |
|
||||
|
|
||||
// 高校2分数
|
|
||||
private double score2; |
|
||||
|
|
||||
// 高校1省份
|
|
||||
private String province1; |
|
||||
|
|
||||
// 高校2省份
|
|
||||
private String province2; |
|
||||
|
|
||||
// 排名差距
|
|
||||
private int rankGap; |
|
||||
|
|
||||
// 分数差距
|
|
||||
private double scoreGap; |
|
||||
|
|
||||
public UniversityComparison() { |
|
||||
} |
|
||||
|
|
||||
public UniversityComparison(University u1, University u2) { |
|
||||
this.universityName1 = u1.getName(); |
|
||||
this.universityName2 = u2.getName(); |
|
||||
this.year = u1.getYear(); |
|
||||
this.rank1 = u1.getRank(); |
|
||||
this.rank2 = u2.getRank(); |
|
||||
this.score1 = u1.getScore(); |
|
||||
this.score2 = u2.getScore(); |
|
||||
this.province1 = u1.getProvince(); |
|
||||
this.province2 = u2.getProvince(); |
|
||||
|
|
||||
this.rankGap = Math.abs(rank1 - rank2); |
|
||||
this.scoreGap = Math.abs(score1 - score2); |
|
||||
} |
|
||||
|
|
||||
// Getters and Setters
|
|
||||
public String getUniversityName1() { |
|
||||
return universityName1; |
|
||||
} |
|
||||
|
|
||||
public void setUniversityName1(String universityName1) { |
|
||||
this.universityName1 = universityName1; |
|
||||
} |
|
||||
|
|
||||
public String getUniversityName2() { |
|
||||
return universityName2; |
|
||||
} |
|
||||
|
|
||||
public void setUniversityName2(String universityName2) { |
|
||||
this.universityName2 = universityName2; |
|
||||
} |
|
||||
|
|
||||
public int getYear() { |
|
||||
return year; |
|
||||
} |
|
||||
|
|
||||
public void setYear(int year) { |
|
||||
this.year = year; |
|
||||
} |
|
||||
|
|
||||
public int getRank1() { |
|
||||
return rank1; |
|
||||
} |
|
||||
|
|
||||
public void setRank1(int rank1) { |
|
||||
this.rank1 = rank1; |
|
||||
} |
|
||||
|
|
||||
public int getRank2() { |
|
||||
return rank2; |
|
||||
} |
|
||||
|
|
||||
public void setRank2(int rank2) { |
|
||||
this.rank2 = rank2; |
|
||||
} |
|
||||
|
|
||||
public double getScore1() { |
|
||||
return score1; |
|
||||
} |
|
||||
|
|
||||
public void setScore1(double score1) { |
|
||||
this.score1 = score1; |
|
||||
} |
|
||||
|
|
||||
public double getScore2() { |
|
||||
return score2; |
|
||||
} |
|
||||
|
|
||||
public void setScore2(double score2) { |
|
||||
this.score2 = score2; |
|
||||
} |
|
||||
|
|
||||
public String getProvince1() { |
|
||||
return province1; |
|
||||
} |
|
||||
|
|
||||
public void setProvince1(String province1) { |
|
||||
this.province1 = province1; |
|
||||
} |
|
||||
|
|
||||
public String getProvince2() { |
|
||||
return province2; |
|
||||
} |
|
||||
|
|
||||
public void setProvince2(String province2) { |
|
||||
this.province2 = province2; |
|
||||
} |
|
||||
|
|
||||
public int getRankGap() { |
|
||||
return rankGap; |
|
||||
} |
|
||||
|
|
||||
public void setRankGap(int rankGap) { |
|
||||
this.rankGap = rankGap; |
|
||||
} |
|
||||
|
|
||||
public double getScoreGap() { |
|
||||
return scoreGap; |
|
||||
} |
|
||||
|
|
||||
public void setScoreGap(double scoreGap) { |
|
||||
this.scoreGap = scoreGap; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 获取排名较高的高校名称 |
|
||||
*/ |
|
||||
public String getHigherRankedUniversity() { |
|
||||
return rank1 < rank2 ? universityName1 : universityName2; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 获取对比结果描述 |
|
||||
*/ |
|
||||
public String getComparisonResult() { |
|
||||
String higherUni = getHigherRankedUniversity(); |
|
||||
return String.format("%d年: %s 排名高于 %s %d位,分数相差 %.2f分", |
|
||||
year, higherUni, |
|
||||
higherUni.equals(universityName1) ? universityName2 : universityName1, |
|
||||
rankGap, scoreGap); |
|
||||
} |
|
||||
|
|
||||
@Override |
|
||||
public String toString() { |
|
||||
return getComparisonResult(); |
|
||||
} |
|
||||
} |
|
||||
@ -1,202 +0,0 @@ |
|||||
package com.university.storage; |
|
||||
|
|
||||
import java.io.File; |
|
||||
import java.io.FileInputStream; |
|
||||
import java.io.FileOutputStream; |
|
||||
import java.io.IOException; |
|
||||
import java.io.InputStreamReader; |
|
||||
import java.io.OutputStreamWriter; |
|
||||
import java.io.Reader; |
|
||||
import java.io.Writer; |
|
||||
import java.nio.charset.StandardCharsets; |
|
||||
import java.util.ArrayList; |
|
||||
import java.util.List; |
|
||||
|
|
||||
import com.opencsv.CSVReader; |
|
||||
import com.opencsv.CSVWriter; |
|
||||
import com.opencsv.bean.CsvToBean; |
|
||||
import com.opencsv.bean.CsvToBeanBuilder; |
|
||||
import com.opencsv.bean.StatefulBeanToCsv; |
|
||||
import com.opencsv.bean.StatefulBeanToCsvBuilder; |
|
||||
import com.opencsv.exceptions.CsvDataTypeMismatchException; |
|
||||
import com.opencsv.exceptions.CsvRequiredFieldEmptyException; |
|
||||
import com.opencsv.exceptions.CsvValidationException; |
|
||||
import com.university.model.University; |
|
||||
|
|
||||
/** |
|
||||
* 数据存储类 |
|
||||
* 负责数据的持久化存储(CSV格式) |
|
||||
*/ |
|
||||
public class DataStorage { |
|
||||
|
|
||||
// 数据存储目录
|
|
||||
private static final String DATA_DIR = "data"; |
|
||||
|
|
||||
/** |
|
||||
* 构造方法,确保数据目录存在 |
|
||||
*/ |
|
||||
public DataStorage() { |
|
||||
File dir = new File(DATA_DIR); |
|
||||
if (!dir.exists()) { |
|
||||
dir.mkdirs(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 保存高校列表到CSV文件 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @param year 年份 |
|
||||
*/ |
|
||||
public void saveToCsv(List<University> universities, int year) { |
|
||||
String filename = DATA_DIR + "/university_rank_" + year + ".csv"; |
|
||||
|
|
||||
try (Writer writer = new OutputStreamWriter( |
|
||||
new FileOutputStream(filename), StandardCharsets.UTF_8)) { |
|
||||
|
|
||||
// 添加BOM,解决Excel中文乱码
|
|
||||
writer.write('\ufeff'); |
|
||||
|
|
||||
// 创建CSV写入器
|
|
||||
StatefulBeanToCsv<University> beanToCsv = new StatefulBeanToCsvBuilder<University>(writer) |
|
||||
.withQuotechar('"') |
|
||||
.withSeparator(',') |
|
||||
.withOrderedResults(true) |
|
||||
.build(); |
|
||||
|
|
||||
// 写入数据
|
|
||||
beanToCsv.write(universities); |
|
||||
System.out.println("数据已保存到: " + filename); |
|
||||
|
|
||||
} catch (IOException | CsvDataTypeMismatchException | CsvRequiredFieldEmptyException e) { |
|
||||
System.err.println("保存CSV文件失败: " + e.getMessage()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 从CSV文件读取高校列表 |
|
||||
* |
|
||||
* @param year 年份 |
|
||||
* @return 高校列表 |
|
||||
*/ |
|
||||
public List<University> readFromCsv(int year) { |
|
||||
String filename = DATA_DIR + "/university_rank_" + year + ".csv"; |
|
||||
List<University> universities = new ArrayList<>(); |
|
||||
|
|
||||
try (Reader reader = new InputStreamReader( |
|
||||
new FileInputStream(filename), StandardCharsets.UTF_8)) { |
|
||||
|
|
||||
// 创建CSV读取器
|
|
||||
CsvToBean<University> csvToBean = new CsvToBeanBuilder<University>(reader) |
|
||||
.withType(University.class) |
|
||||
.withIgnoreLeadingWhiteSpace(true) |
|
||||
.build(); |
|
||||
|
|
||||
// 读取数据
|
|
||||
universities = csvToBean.parse(); |
|
||||
System.out.println("从 " + filename + " 读取了 " + universities.size() + " 条数据"); |
|
||||
|
|
||||
} catch (IOException e) { |
|
||||
System.err.println("读取CSV文件失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
return universities; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 保存原始数据(手动控制格式) |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @param year 年份 |
|
||||
*/ |
|
||||
public void saveRawData(List<University> universities, int year) { |
|
||||
String filename = DATA_DIR + "/university_rank_" + year + ".csv"; |
|
||||
|
|
||||
try (CSVWriter writer = new CSVWriter(new OutputStreamWriter( |
|
||||
new FileOutputStream(filename), StandardCharsets.UTF_8))) { |
|
||||
|
|
||||
// 写入表头
|
|
||||
String[] header = {"排名", "学校名称", "省份", "总分", "年份"}; |
|
||||
writer.writeNext(header); |
|
||||
|
|
||||
// 写入数据
|
|
||||
for (University u : universities) { |
|
||||
String[] row = { |
|
||||
String.valueOf(u.getRank()), |
|
||||
u.getName(), |
|
||||
u.getProvince(), |
|
||||
String.valueOf(u.getScore()), |
|
||||
String.valueOf(u.getYear()) |
|
||||
}; |
|
||||
writer.writeNext(row); |
|
||||
} |
|
||||
|
|
||||
System.out.println("原始数据已保存到: " + filename); |
|
||||
|
|
||||
} catch (IOException e) { |
|
||||
System.err.println("保存原始数据失败: " + e.getMessage()); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 读取原始数据 |
|
||||
* |
|
||||
* @param year 年份 |
|
||||
* @return 高校列表 |
|
||||
*/ |
|
||||
public List<University> readRawData(int year) { |
|
||||
String filename = DATA_DIR + "/university_rank_" + year + ".csv"; |
|
||||
List<University> universities = new ArrayList<>(); |
|
||||
|
|
||||
try (CSVReader reader = new CSVReader(new InputStreamReader( |
|
||||
new FileInputStream(filename), StandardCharsets.UTF_8))) { |
|
||||
|
|
||||
// 跳过表头
|
|
||||
reader.readNext(); |
|
||||
|
|
||||
// 读取数据行
|
|
||||
String[] row; |
|
||||
while ((row = reader.readNext()) != null) { |
|
||||
if (row.length >= 5) { |
|
||||
University u = new University(); |
|
||||
u.setRank(Integer.parseInt(row[0].trim())); |
|
||||
u.setName(row[1].trim()); |
|
||||
u.setProvince(row[2].trim()); |
|
||||
u.setScore(Double.parseDouble(row[3].trim())); |
|
||||
u.setYear(Integer.parseInt(row[4].trim())); |
|
||||
universities.add(u); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
System.out.println("从 " + filename + " 读取了 " + universities.size() + " 条数据"); |
|
||||
|
|
||||
} catch (IOException | CsvValidationException e) { |
|
||||
System.err.println("读取原始数据失败: " + e.getMessage()); |
|
||||
} |
|
||||
|
|
||||
return universities; |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 检查某年份的数据是否存在 |
|
||||
* |
|
||||
* @param year 年份 |
|
||||
* @return 是否存在 |
|
||||
*/ |
|
||||
public boolean dataExists(int year) { |
|
||||
File file = new File(DATA_DIR + "/university_rank_" + year + ".csv"); |
|
||||
return file.exists(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 删除某年份的数据文件 |
|
||||
* |
|
||||
* @param year 年份 |
|
||||
*/ |
|
||||
public void deleteData(int year) { |
|
||||
File file = new File(DATA_DIR + "/university_rank_" + year + ".csv"); |
|
||||
if (file.exists() && file.delete()) { |
|
||||
System.out.println("已删除 " + year + " 年的数据文件"); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,299 +0,0 @@ |
|||||
package com.university.visualization; |
|
||||
|
|
||||
import com.university.model.RankChange; |
|
||||
import com.university.model.University; |
|
||||
import org.jfree.chart.ChartFactory; |
|
||||
import org.jfree.chart.ChartUtils; |
|
||||
import org.jfree.chart.JFreeChart; |
|
||||
import org.jfree.chart.axis.CategoryAxis; |
|
||||
import org.jfree.chart.axis.NumberAxis; |
|
||||
import org.jfree.chart.plot.CategoryPlot; |
|
||||
import org.jfree.chart.plot.PlotOrientation; |
|
||||
import org.jfree.chart.renderer.category.BarRenderer; |
|
||||
import org.jfree.chart.renderer.category.LineAndShapeRenderer; |
|
||||
import org.jfree.data.category.DefaultCategoryDataset; |
|
||||
|
|
||||
import java.awt.*; |
|
||||
import java.io.File; |
|
||||
import java.io.IOException; |
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
/** |
|
||||
* 图表生成类 |
|
||||
* 使用JFreeChart生成各种统计图表 |
|
||||
*/ |
|
||||
public class ChartGenerator { |
|
||||
|
|
||||
// 图表输出目录
|
|
||||
private static final String CHART_DIR = "charts"; |
|
||||
|
|
||||
/** |
|
||||
* 构造方法,确保图表目录存在 |
|
||||
*/ |
|
||||
public ChartGenerator() { |
|
||||
File dir = new File(CHART_DIR); |
|
||||
if (!dir.exists()) { |
|
||||
dir.mkdirs(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 生成Top N高校柱状图 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @param year 年份 |
|
||||
* @param n 数量 |
|
||||
*/ |
|
||||
public void generateTopNBarChart(List<University> universities, int year, int n) { |
|
||||
// 创建数据集
|
|
||||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|
||||
|
|
||||
// 取前N名
|
|
||||
int count = Math.min(n, universities.size()); |
|
||||
for (int i = 0; i < count; i++) { |
|
||||
University u = universities.get(i); |
|
||||
dataset.addValue(u.getScore(), "总分", u.getName()); |
|
||||
} |
|
||||
|
|
||||
// 创建图表
|
|
||||
JFreeChart chart = ChartFactory.createBarChart( |
|
||||
year + "年高校排名Top" + n, // 标题
|
|
||||
"学校", // X轴标签
|
|
||||
"总分", // Y轴标签
|
|
||||
dataset, // 数据集
|
|
||||
PlotOrientation.VERTICAL, // 方向
|
|
||||
true, // 显示图例
|
|
||||
true, // 显示工具提示
|
|
||||
false // 不生成URL
|
|
||||
); |
|
||||
|
|
||||
// 美化图表
|
|
||||
customizeBarChart(chart); |
|
||||
|
|
||||
// 保存图表
|
|
||||
saveChart(chart, "top" + n + "_" + year + ".png"); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 生成省份分布饼图 |
|
||||
* |
|
||||
* @param provinceCount 省份统计 |
|
||||
* @param year 年份 |
|
||||
*/ |
|
||||
public void generateProvincePieChart(Map<String, Long> provinceCount, int year) { |
|
||||
// 创建饼图数据集
|
|
||||
org.jfree.data.general.DefaultPieDataset<String> dataset = |
|
||||
new org.jfree.data.general.DefaultPieDataset<>(); |
|
||||
|
|
||||
// 添加数据
|
|
||||
provinceCount.forEach(dataset::setValue); |
|
||||
|
|
||||
// 创建饼图
|
|
||||
JFreeChart chart = ChartFactory.createPieChart( |
|
||||
year + "年高校省份分布", // 标题
|
|
||||
dataset, // 数据集
|
|
||||
true, // 显示图例
|
|
||||
true, // 显示工具提示
|
|
||||
false // 不生成URL
|
|
||||
); |
|
||||
|
|
||||
// 获取饼图plot并设置标签
|
|
||||
org.jfree.chart.plot.PiePlot plot = (org.jfree.chart.plot.PiePlot) chart.getPlot(); |
|
||||
|
|
||||
// 设置标签格式:省份名称 + 数量 + 百分比
|
|
||||
plot.setLabelGenerator(new org.jfree.chart.labels.StandardPieSectionLabelGenerator( |
|
||||
"{0}: {1}所 ({2})", |
|
||||
java.text.NumberFormat.getIntegerInstance(), |
|
||||
java.text.NumberFormat.getPercentInstance() |
|
||||
)); |
|
||||
|
|
||||
// 设置标签字体
|
|
||||
plot.setLabelFont(new Font("微软雅黑", Font.PLAIN, 12)); |
|
||||
|
|
||||
// 设置标签颜色
|
|
||||
plot.setLabelPaint(Color.BLACK); |
|
||||
|
|
||||
// 设置标签背景
|
|
||||
plot.setLabelBackgroundPaint(new Color(255, 255, 255, 200)); |
|
||||
|
|
||||
// 设置标题字体
|
|
||||
chart.getTitle().setFont(new Font("微软雅黑", Font.BOLD, 16)); |
|
||||
|
|
||||
// 保存图表
|
|
||||
saveChart(chart, "province_distribution_" + year + ".png"); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 生成历年排名变化折线图 |
|
||||
* |
|
||||
* @param universityHistory 某高校历年数据 |
|
||||
* @param universityName 高校名称 |
|
||||
*/ |
|
||||
public void generateRankTrendLineChart(List<University> universityHistory, |
|
||||
String universityName) { |
|
||||
// 创建数据集
|
|
||||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|
||||
|
|
||||
// 添加数据(注意:排名越小越好,所以取负值让折线图向上表示进步)
|
|
||||
for (University u : universityHistory) { |
|
||||
dataset.addValue(u.getRank(), "排名", String.valueOf(u.getYear())); |
|
||||
} |
|
||||
|
|
||||
// 创建图表
|
|
||||
JFreeChart chart = ChartFactory.createLineChart( |
|
||||
universityName + " 历年排名变化", // 标题
|
|
||||
"年份", // X轴标签
|
|
||||
"排名", // Y轴标签
|
|
||||
dataset, // 数据集
|
|
||||
PlotOrientation.VERTICAL, // 方向
|
|
||||
true, // 显示图例
|
|
||||
true, // 显示工具提示
|
|
||||
false // 不生成URL
|
|
||||
); |
|
||||
|
|
||||
// 美化折线图
|
|
||||
customizeLineChart(chart); |
|
||||
|
|
||||
// 保存图表
|
|
||||
saveChart(chart, "rank_trend_" + universityName + ".png"); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 生成排名变化对比图 |
|
||||
* |
|
||||
* @param changes 排名变化列表 |
|
||||
* @param title 图表标题 |
|
||||
* @param filename 文件名 |
|
||||
*/ |
|
||||
public void generateRankChangeChart(List<RankChange> changes, String title, String filename) { |
|
||||
// 创建数据集
|
|
||||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|
||||
|
|
||||
// 添加数据
|
|
||||
for (RankChange change : changes) { |
|
||||
dataset.addValue(change.getRankChange(), "排名变化", change.getUniversityName()); |
|
||||
} |
|
||||
|
|
||||
// 创建图表
|
|
||||
JFreeChart chart = ChartFactory.createBarChart( |
|
||||
title, |
|
||||
"学校", |
|
||||
"排名变化(位)", |
|
||||
dataset, |
|
||||
PlotOrientation.HORIZONTAL, |
|
||||
true, |
|
||||
true, |
|
||||
false |
|
||||
); |
|
||||
|
|
||||
// 美化
|
|
||||
customizeBarChart(chart); |
|
||||
|
|
||||
// 保存
|
|
||||
saveChart(chart, filename); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 生成多高校对比图 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @param year 年份 |
|
||||
*/ |
|
||||
public void generateComparisonChart(List<University> universities, int year) { |
|
||||
// 创建数据集
|
|
||||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|
||||
|
|
||||
// 添加分数数据
|
|
||||
for (University u : universities) { |
|
||||
dataset.addValue(u.getScore(), "总分", u.getName()); |
|
||||
} |
|
||||
|
|
||||
// 创建图表
|
|
||||
JFreeChart chart = ChartFactory.createBarChart( |
|
||||
year + "年高校分数对比", |
|
||||
"学校", |
|
||||
"总分", |
|
||||
dataset, |
|
||||
PlotOrientation.VERTICAL, |
|
||||
true, |
|
||||
true, |
|
||||
false |
|
||||
); |
|
||||
|
|
||||
customizeBarChart(chart); |
|
||||
saveChart(chart, "comparison_" + year + ".png"); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 美化柱状图 |
|
||||
*/ |
|
||||
private void customizeBarChart(JFreeChart chart) { |
|
||||
CategoryPlot plot = chart.getCategoryPlot(); |
|
||||
|
|
||||
// 设置背景色
|
|
||||
plot.setBackgroundPaint(Color.WHITE); |
|
||||
plot.setRangeGridlinePaint(Color.LIGHT_GRAY); |
|
||||
|
|
||||
// 设置柱状图颜色
|
|
||||
BarRenderer renderer = (BarRenderer) plot.getRenderer(); |
|
||||
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
|
||||
|
|
||||
// 设置字体
|
|
||||
CategoryAxis domainAxis = plot.getDomainAxis(); |
|
||||
domainAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10)); |
|
||||
domainAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12)); |
|
||||
|
|
||||
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis(); |
|
||||
rangeAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10)); |
|
||||
rangeAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12)); |
|
||||
|
|
||||
// 设置标题字体
|
|
||||
chart.getTitle().setFont(new Font("微软雅黑", Font.BOLD, 16)); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 美化折线图 |
|
||||
*/ |
|
||||
private void customizeLineChart(JFreeChart chart) { |
|
||||
CategoryPlot plot = chart.getCategoryPlot(); |
|
||||
|
|
||||
// 设置背景色
|
|
||||
plot.setBackgroundPaint(Color.WHITE); |
|
||||
plot.setRangeGridlinePaint(Color.LIGHT_GRAY); |
|
||||
|
|
||||
// 设置折线样式
|
|
||||
LineAndShapeRenderer renderer = (LineAndShapeRenderer) plot.getRenderer(); |
|
||||
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
|
||||
renderer.setSeriesStroke(0, new BasicStroke(2.0f)); |
|
||||
renderer.setSeriesShapesVisible(0, true); |
|
||||
|
|
||||
// 设置字体
|
|
||||
CategoryAxis domainAxis = plot.getDomainAxis(); |
|
||||
domainAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10)); |
|
||||
domainAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12)); |
|
||||
|
|
||||
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis(); |
|
||||
rangeAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10)); |
|
||||
rangeAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12)); |
|
||||
|
|
||||
// 设置标题字体
|
|
||||
chart.getTitle().setFont(new Font("微软雅黑", Font.BOLD, 16)); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 保存图表到文件 |
|
||||
* |
|
||||
* @param chart 图表对象 |
|
||||
* @param filename 文件名 |
|
||||
*/ |
|
||||
private void saveChart(JFreeChart chart, String filename) { |
|
||||
try { |
|
||||
File file = new File(CHART_DIR + "/" + filename); |
|
||||
ChartUtils.saveChartAsPNG(file, chart, 800, 600); |
|
||||
System.out.println("图表已保存: " + file.getAbsolutePath()); |
|
||||
} catch (IOException e) { |
|
||||
System.err.println("保存图表失败: " + e.getMessage()); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -1,241 +0,0 @@ |
|||||
package com.university.visualization; |
|
||||
|
|
||||
import com.university.analysis.RankAnalyzer; |
|
||||
import com.university.model.RankChange; |
|
||||
import com.university.model.University; |
|
||||
import com.university.model.UniversityComparison; |
|
||||
|
|
||||
import java.util.List; |
|
||||
import java.util.Map; |
|
||||
|
|
||||
/** |
|
||||
* 控制台报表类 |
|
||||
* 格式化输出各种统计结果到控制台 |
|
||||
*/ |
|
||||
public class ConsoleReporter { |
|
||||
|
|
||||
/** |
|
||||
* 打印分隔线 |
|
||||
*/ |
|
||||
private void printSeparator() { |
|
||||
System.out.println("=".repeat(80)); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 打印高校列表 |
|
||||
* |
|
||||
* @param universities 高校列表 |
|
||||
* @param title 标题 |
|
||||
*/ |
|
||||
public void printUniversityList(List<University> universities, String title) { |
|
||||
printSeparator(); |
|
||||
System.out.println("【" + title + "】"); |
|
||||
printSeparator(); |
|
||||
|
|
||||
// 表头
|
|
||||
System.out.printf("%-6s %-20s %-10s %-10s %-6s%n", |
|
||||
"排名", "学校名称", "省份", "总分", "年份"); |
|
||||
System.out.println("-".repeat(80)); |
|
||||
|
|
||||
// 数据行
|
|
||||
for (University u : universities) { |
|
||||
System.out.printf("%-6d %-20s %-10s %-10.2f %-6d%n", |
|
||||
u.getRank(), |
|
||||
truncate(u.getName(), 20), |
|
||||
u.getProvince(), |
|
||||
u.getScore(), |
|
||||
u.getYear()); |
|
||||
} |
|
||||
|
|
||||
System.out.println(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 打印省份统计 |
|
||||
* |
|
||||
* @param provinceCount 省份统计 |
|
||||
* @param title 标题 |
|
||||
*/ |
|
||||
public void printProvinceStatistics(Map<String, Long> provinceCount, String title) { |
|
||||
printSeparator(); |
|
||||
System.out.println("【" + title + "】"); |
|
||||
printSeparator(); |
|
||||
|
|
||||
System.out.printf("%-15s %-10s%n", "省份", "高校数量"); |
|
||||
System.out.println("-".repeat(30)); |
|
||||
|
|
||||
// 按数量降序排序
|
|
||||
provinceCount.entrySet().stream() |
|
||||
.sorted(Map.Entry.<String, Long>comparingByValue().reversed()) |
|
||||
.forEach(entry -> System.out.printf("%-15s %-10d%n", |
|
||||
entry.getKey(), entry.getValue())); |
|
||||
|
|
||||
System.out.println(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 打印分数统计 |
|
||||
* |
|
||||
* @param statistics 统计信息 |
|
||||
* @param title 标题 |
|
||||
*/ |
|
||||
public void printScoreStatistics(RankAnalyzer.ScoreStatistics statistics, String title) { |
|
||||
printSeparator(); |
|
||||
System.out.println("【" + title + "】"); |
|
||||
printSeparator(); |
|
||||
|
|
||||
System.out.printf("高校数量: %d%n", statistics.getCount()); |
|
||||
System.out.printf("平均分数: %.2f%n", statistics.getAverage()); |
|
||||
System.out.printf("最高分数: %.2f%n", statistics.getMax()); |
|
||||
System.out.printf("最低分数: %.2f%n", statistics.getMin()); |
|
||||
System.out.println(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 打印排名变化 |
|
||||
* |
|
||||
* @param changes 排名变化列表 |
|
||||
* @param title 标题 |
|
||||
*/ |
|
||||
public void printRankChanges(List<RankChange> changes, String title) { |
|
||||
printSeparator(); |
|
||||
System.out.println("【" + title + "】"); |
|
||||
printSeparator(); |
|
||||
|
|
||||
System.out.printf("%-20s %-8s %-8s %-12s %-12s%n", |
|
||||
"学校名称", "起始年", "结束年", "排名变化", "分数变化"); |
|
||||
System.out.println("-".repeat(80)); |
|
||||
|
|
||||
for (RankChange change : changes) { |
|
||||
String rankChangeStr = change.getRankChange() > 0 ? |
|
||||
"↑" + change.getRankChange() : |
|
||||
(change.getRankChange() < 0 ? |
|
||||
"↓" + Math.abs(change.getRankChange()) : |
|
||||
"-"); |
|
||||
|
|
||||
System.out.printf("%-20s %-8d %-8d %-12s %+.2f%n", |
|
||||
truncate(change.getUniversityName(), 20), |
|
||||
change.getStartYear(), |
|
||||
change.getEndYear(), |
|
||||
rankChangeStr, |
|
||||
change.getScoreChange()); |
|
||||
} |
|
||||
|
|
||||
System.out.println(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 打印高校对比结果 |
|
||||
* |
|
||||
* @param comparison 对比结果 |
|
||||
*/ |
|
||||
public void printComparison(UniversityComparison comparison) { |
|
||||
printSeparator(); |
|
||||
System.out.println("【高校对比分析】"); |
|
||||
printSeparator(); |
|
||||
|
|
||||
System.out.printf("对比年份: %d年%n%n", comparison.getYear()); |
|
||||
|
|
||||
System.out.println("学校信息:"); |
|
||||
System.out.println("-".repeat(50)); |
|
||||
System.out.printf("%-20s %-10s %-10s%n", "学校", "排名", "分数"); |
|
||||
System.out.printf("%-20s %-10d %-10.2f%n", |
|
||||
comparison.getUniversityName1(), |
|
||||
comparison.getRank1(), |
|
||||
comparison.getScore1()); |
|
||||
System.out.printf("%-20s %-10d %-10.2f%n", |
|
||||
comparison.getUniversityName2(), |
|
||||
comparison.getRank2(), |
|
||||
comparison.getScore2()); |
|
||||
|
|
||||
System.out.println(); |
|
||||
System.out.println("对比结果:"); |
|
||||
System.out.println("-".repeat(50)); |
|
||||
System.out.printf("排名领先: %s (领先%d位)%n", |
|
||||
comparison.getHigherRankedUniversity(), |
|
||||
comparison.getRankGap()); |
|
||||
System.out.printf("分数差距: %.2f分%n", comparison.getScoreGap()); |
|
||||
System.out.println(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 打印历年趋势 |
|
||||
* |
|
||||
* @param history 历年数据 |
|
||||
* @param name 学校名称 |
|
||||
*/ |
|
||||
public void printYearlyTrend(List<University> history, String name) { |
|
||||
printSeparator(); |
|
||||
System.out.println("【" + name + " 历年排名趋势】"); |
|
||||
printSeparator(); |
|
||||
|
|
||||
System.out.printf("%-8s %-8s %-10s%n", "年份", "排名", "分数"); |
|
||||
System.out.println("-".repeat(30)); |
|
||||
|
|
||||
University previous = null; |
|
||||
for (University u : history) { |
|
||||
String trend = ""; |
|
||||
if (previous != null) { |
|
||||
int change = previous.getRank() - u.getRank(); |
|
||||
if (change > 0) { |
|
||||
trend = "↑" + change; |
|
||||
} else if (change < 0) { |
|
||||
trend = "↓" + Math.abs(change); |
|
||||
} else { |
|
||||
trend = "-"; |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
System.out.printf("%-8d %-8d %-10.2f %s%n", |
|
||||
u.getYear(), u.getRank(), u.getScore(), trend); |
|
||||
previous = u; |
|
||||
} |
|
||||
|
|
||||
System.out.println(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 打印菜单 |
|
||||
*/ |
|
||||
public void printMenu() { |
|
||||
printSeparator(); |
|
||||
System.out.println("【高校排名分析系统】"); |
|
||||
printSeparator(); |
|
||||
System.out.println("1. 查看Top N高校排名"); |
|
||||
System.out.println("2. 按省份查看高校"); |
|
||||
System.out.println("3. 搜索高校"); |
|
||||
System.out.println("4. 查看省份分布统计"); |
|
||||
System.out.println("5. 查看分数统计"); |
|
||||
System.out.println("6. 查看历年排名变化"); |
|
||||
System.out.println("7. 对比两所高校"); |
|
||||
System.out.println("8. 查看某高校历年趋势"); |
|
||||
System.out.println("9. 生成所有图表"); |
|
||||
System.out.println("0. 退出系统"); |
|
||||
printSeparator(); |
|
||||
System.out.print("请选择功能(0-9): "); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 打印欢迎信息 |
|
||||
*/ |
|
||||
public void printWelcome() { |
|
||||
printSeparator(); |
|
||||
System.out.println(" 欢迎使用高校排名分析系统"); |
|
||||
System.out.println(" 本系统提供高校排名数据爬取、分析和可视化功能"); |
|
||||
printSeparator(); |
|
||||
System.out.println(); |
|
||||
} |
|
||||
|
|
||||
/** |
|
||||
* 截断字符串 |
|
||||
* |
|
||||
* @param str 原字符串 |
|
||||
* @param length 最大长度 |
|
||||
* @return 截断后的字符串 |
|
||||
*/ |
|
||||
private String truncate(String str, int length) { |
|
||||
if (str == null) return ""; |
|
||||
if (str.length() <= length) return str; |
|
||||
return str.substring(0, length - 3) + "..."; |
|
||||
} |
|
||||
} |
|
||||