@ -0,0 +1,4 @@ |
|||
package com.example.datacollect.command; |
|||
|
|||
public class HistoryCommand { |
|||
} |
|||
@ -0,0 +1,10 @@ |
|||
# 默认忽略的文件 |
|||
/shelf/ |
|||
/workspace.xml |
|||
# 已忽略包含查询文件的默认文件夹 |
|||
/queries/ |
|||
# Datasource local storage ignored files |
|||
/dataSources/ |
|||
/dataSources.local.xml |
|||
# 基于编辑器的 HTTP 客户端请求 |
|||
/httpRequests/ |
|||
@ -0,0 +1,13 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="CompilerConfiguration"> |
|||
<annotationProcessing> |
|||
<profile name="Maven default annotation processors profile" enabled="true"> |
|||
<sourceOutputDir name="target/generated-sources/annotations" /> |
|||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> |
|||
<outputRelativeToContentRoot value="true" /> |
|||
<module name="crawler-project" /> |
|||
</profile> |
|||
</annotationProcessing> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,7 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="Encoding"> |
|||
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" /> |
|||
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,20 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="RemoteRepositoriesConfiguration"> |
|||
<remote-repository> |
|||
<option name="id" value="central" /> |
|||
<option name="name" value="Central Repository" /> |
|||
<option name="url" value="https://repo.maven.apache.org/maven2" /> |
|||
</remote-repository> |
|||
<remote-repository> |
|||
<option name="id" value="central" /> |
|||
<option name="name" value="Maven Central repository" /> |
|||
<option name="url" value="https://repo1.maven.org/maven2" /> |
|||
</remote-repository> |
|||
<remote-repository> |
|||
<option name="id" value="jboss.community" /> |
|||
<option name="name" value="JBoss Community repository" /> |
|||
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" /> |
|||
</remote-repository> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,12 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="ExternalStorageConfigurationManager" enabled="true" /> |
|||
<component name="MavenProjectsManager"> |
|||
<option name="originalFiles"> |
|||
<list> |
|||
<option value="$PROJECT_DIR$/pom.xml" /> |
|||
</list> |
|||
</option> |
|||
</component> |
|||
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="21" project-jdk-type="JavaSDK" /> |
|||
</project> |
|||
@ -0,0 +1,6 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="VcsDirectoryMappings"> |
|||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" /> |
|||
</component> |
|||
</project> |
|||
|
After Width: | Height: | Size: 18 KiB |
|
After Width: | Height: | Size: 35 KiB |
|
After Width: | Height: | Size: 26 KiB |
|
After Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 54 KiB |
|
Before Width: | Height: | Size: 55 KiB |
|
Before Width: | Height: | Size: 55 KiB |
|
Before Width: | Height: | Size: 16 KiB |
|
Before Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 17 KiB |
|
After Width: | Height: | Size: 24 KiB |
|
After Width: | Height: | Size: 44 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
After Width: | Height: | Size: 32 KiB |
|
After Width: | Height: | Size: 30 KiB |
|
Before Width: | Height: | Size: 31 KiB |
|
Before Width: | Height: | Size: 31 KiB |
|
Before Width: | Height: | Size: 31 KiB |
|
|
|
@ -0,0 +1,82 @@ |
|||
[ |
|||
{ |
|||
"title": "专栏", |
|||
"publishTime": "", |
|||
"url": "http://zhuanlan.sina.com.cn/" |
|||
}, |
|||
{ |
|||
"title": "导航", |
|||
"publishTime": "", |
|||
"url": "http://news.sina.com.cn/guide/" |
|||
}, |
|||
{ |
|||
"title": "新浪财经", |
|||
"publishTime": "", |
|||
"url": "https://finance.sina.com.cn/mobile/comfinanceweb.shtml" |
|||
}, |
|||
{ |
|||
"title": "新浪博客", |
|||
"publishTime": "", |
|||
"url": "https://blog.sina.com.cn/lm/z/app/" |
|||
}, |
|||
{ |
|||
"title": "我的收藏", |
|||
"publishTime": "", |
|||
"url": "http://my.sina.com.cn/#location=fav" |
|||
}, |
|||
{ |
|||
"title": "注册", |
|||
"publishTime": "", |
|||
"url": "https://login.sina.com.cn/signup/signup?entry=news" |
|||
}, |
|||
{ |
|||
"title": "新闻中心", |
|||
"publishTime": "", |
|||
"url": "http://news.sina.com.cn/" |
|||
}, |
|||
{ |
|||
"title": "新闻排行", |
|||
"publishTime": "", |
|||
"url": "http://news.sina.com.cn/hotnews/" |
|||
}, |
|||
{ |
|||
"title": "联系我们", |
|||
"publishTime": "", |
|||
"url": "http://www.sina.com.cn/contactus.html" |
|||
}, |
|||
{ |
|||
"title": "广告服务", |
|||
"publishTime": "", |
|||
"url": "http://emarketing.sina.com.cn/" |
|||
}, |
|||
{ |
|||
"title": "通行证注册", |
|||
"publishTime": "", |
|||
"url": "http://login.sina.com.cn/signup/signup" |
|||
}, |
|||
{ |
|||
"title": "产品答疑", |
|||
"publishTime": "", |
|||
"url": "http://help.sina.com.cn/" |
|||
}, |
|||
{ |
|||
"title": "招聘信息", |
|||
"publishTime": "", |
|||
"url": "http://career.sina.com.cn/" |
|||
}, |
|||
{ |
|||
"title": "网站律师", |
|||
"publishTime": "", |
|||
"url": "http://corp.sina.com.cn/lawfirm/sina.htm" |
|||
}, |
|||
{ |
|||
"title": "版权所有", |
|||
"publishTime": "", |
|||
"url": "https://corp.sina.com.cn/chn/copyright.html" |
|||
}, |
|||
{ |
|||
"title": "意见反馈", |
|||
"publishTime": "", |
|||
"url": "http://news.sina.com.cn/feedback/post.html" |
|||
} |
|||
] |
|||
@ -0,0 +1,212 @@ |
|||
[ |
|||
{ |
|||
"rank": 1, |
|||
"universityName": "清华大学 Tsinghua University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "北京", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 2, |
|||
"universityName": "北京大学 Peking University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "北京", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 3, |
|||
"universityName": "浙江大学 Zhejiang University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "浙江", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 4, |
|||
"universityName": "上海交通大学 Shanghai Jiao Tong University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "上海", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 5, |
|||
"universityName": "复旦大学 Fudan University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "上海", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 6, |
|||
"universityName": "南京大学 Nanjing University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "江苏", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 7, |
|||
"universityName": "中国科学技术大学 University of Science and Technology of China 双一流/985/211", |
|||
"totalScore": "理工", |
|||
"province": "安徽", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 8, |
|||
"universityName": "武汉大学 Wuhan University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "湖北", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 9, |
|||
"universityName": "华中科技大学 Huazhong University of Science and Technology 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "湖北", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 10, |
|||
"universityName": "西安交通大学 Xi'an Jiaotong University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "陕西", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 11, |
|||
"universityName": "北京航空航天大学 Beihang University 双一流/985/211", |
|||
"totalScore": "理工", |
|||
"province": "北京", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 12, |
|||
"universityName": "中山大学 Sun Yat-sen University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "广东", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 13, |
|||
"universityName": "北京理工大学 Beijing Institute of Technology 双一流/985/211", |
|||
"totalScore": "理工", |
|||
"province": "北京", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 14, |
|||
"universityName": "哈尔滨工业大学 Harbin Institute of Technology 双一流/985/211", |
|||
"totalScore": "理工", |
|||
"province": "黑龙江", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 15, |
|||
"universityName": "四川大学 Sichuan University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "四川", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 16, |
|||
"universityName": "东南大学 Southeast University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "江苏", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 17, |
|||
"universityName": "中国人民大学 Renmin University of China 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "北京", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 18, |
|||
"universityName": "同济大学 Tongji University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "上海", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 19, |
|||
"universityName": "北京师范大学 Beijing Normal University 双一流/985/211", |
|||
"totalScore": "师范", |
|||
"province": "北京", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 20, |
|||
"universityName": "天津大学 Tianjin University 双一流/985/211", |
|||
"totalScore": "理工", |
|||
"province": "天津", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 21, |
|||
"universityName": "西北工业大学 Northwestern Polytechnical University 双一流/985/211", |
|||
"totalScore": "理工", |
|||
"province": "陕西", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 22, |
|||
"universityName": "山东大学 Shandong University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "山东", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 23, |
|||
"universityName": "南开大学 Nankai University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "天津", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 24, |
|||
"universityName": "厦门大学 Xiamen University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "福建", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 25, |
|||
"universityName": "中国农业大学 China Agricultural University 双一流/985/211", |
|||
"totalScore": "农业", |
|||
"province": "北京", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 26, |
|||
"universityName": "吉林大学 Jilin University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "吉林", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 27, |
|||
"universityName": "中南大学 Central South University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "湖南", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 28, |
|||
"universityName": "大连理工大学 Dalian University of Technology 双一流/985/211", |
|||
"totalScore": "理工", |
|||
"province": "辽宁", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 29, |
|||
"universityName": "湖南大学 Hunan University 双一流/985/211", |
|||
"totalScore": "综合", |
|||
"province": "湖南", |
|||
"category": "" |
|||
}, |
|||
{ |
|||
"rank": 30, |
|||
"universityName": "华东师范大学 East China Normal University 双一流/985/211", |
|||
"totalScore": "师范", |
|||
"province": "上海", |
|||
"category": "" |
|||
} |
|||
] |
|||
@ -0,0 +1,335 @@ |
|||
[ |
|||
{ |
|||
"cityName": "上海", |
|||
"temperature": 22.7, |
|||
"humidity": 83.0, |
|||
"windSpeed": 7.8, |
|||
"weatherCode": "3", |
|||
"hourlyTimes": [ |
|||
"00:00", |
|||
"01:00", |
|||
"02:00", |
|||
"03:00", |
|||
"04:00", |
|||
"05:00", |
|||
"06:00", |
|||
"07:00", |
|||
"08:00", |
|||
"09:00", |
|||
"10:00", |
|||
"11:00", |
|||
"12:00", |
|||
"13:00", |
|||
"14:00", |
|||
"15:00", |
|||
"16:00", |
|||
"17:00", |
|||
"18:00", |
|||
"19:00", |
|||
"20:00", |
|||
"21:00", |
|||
"22:00", |
|||
"23:00" |
|||
], |
|||
"hourlyTemperatures": [ |
|||
19.2, |
|||
19.0, |
|||
18.9, |
|||
18.3, |
|||
18.1, |
|||
17.8, |
|||
18.7, |
|||
20.9, |
|||
23.5, |
|||
24.9, |
|||
26.2, |
|||
27.0, |
|||
27.5, |
|||
28.1, |
|||
28.2, |
|||
27.4, |
|||
26.7, |
|||
25.0, |
|||
23.8, |
|||
22.7, |
|||
22.0, |
|||
20.6, |
|||
19.9, |
|||
19.4 |
|||
], |
|||
"hourlyHumidities": [ |
|||
83, |
|||
84, |
|||
85, |
|||
87, |
|||
89, |
|||
92, |
|||
90, |
|||
79, |
|||
55, |
|||
43, |
|||
38, |
|||
34, |
|||
33, |
|||
31, |
|||
30, |
|||
32, |
|||
35, |
|||
45, |
|||
54, |
|||
63, |
|||
67, |
|||
73, |
|||
76, |
|||
78 |
|||
], |
|||
"hourlyWindSpeeds": [ |
|||
3.8, |
|||
3.3, |
|||
2.6, |
|||
1.9, |
|||
1.0, |
|||
0.6, |
|||
2.3, |
|||
0.6, |
|||
1.8, |
|||
2.7, |
|||
3.0, |
|||
3.5, |
|||
5.4, |
|||
5.4, |
|||
6.0, |
|||
7.8, |
|||
9.2, |
|||
9.0, |
|||
8.1, |
|||
7.8, |
|||
7.2, |
|||
7.1, |
|||
7.1, |
|||
7.1 |
|||
] |
|||
}, |
|||
{ |
|||
"cityName": "广州", |
|||
"temperature": 25.9, |
|||
"humidity": 85.0, |
|||
"windSpeed": 5.3, |
|||
"weatherCode": "81", |
|||
"hourlyTimes": [ |
|||
"00:00", |
|||
"01:00", |
|||
"02:00", |
|||
"03:00", |
|||
"04:00", |
|||
"05:00", |
|||
"06:00", |
|||
"07:00", |
|||
"08:00", |
|||
"09:00", |
|||
"10:00", |
|||
"11:00", |
|||
"12:00", |
|||
"13:00", |
|||
"14:00", |
|||
"15:00", |
|||
"16:00", |
|||
"17:00", |
|||
"18:00", |
|||
"19:00", |
|||
"20:00", |
|||
"21:00", |
|||
"22:00", |
|||
"23:00" |
|||
], |
|||
"hourlyTemperatures": [ |
|||
27.7, |
|||
27.2, |
|||
26.0, |
|||
25.5, |
|||
25.4, |
|||
25.0, |
|||
25.0, |
|||
26.0, |
|||
28.1, |
|||
29.3, |
|||
30.6, |
|||
31.9, |
|||
33.0, |
|||
33.8, |
|||
33.9, |
|||
33.6, |
|||
34.2, |
|||
30.5, |
|||
29.4, |
|||
25.9, |
|||
26.4, |
|||
26.5, |
|||
26.3, |
|||
26.2 |
|||
], |
|||
"hourlyHumidities": [ |
|||
85, |
|||
87, |
|||
82, |
|||
84, |
|||
85, |
|||
90, |
|||
92, |
|||
87, |
|||
76, |
|||
70, |
|||
63, |
|||
57, |
|||
54, |
|||
53, |
|||
53, |
|||
54, |
|||
51, |
|||
69, |
|||
72, |
|||
95, |
|||
97, |
|||
96, |
|||
98, |
|||
98 |
|||
], |
|||
"hourlyWindSpeeds": [ |
|||
5.8, |
|||
4.9, |
|||
4.4, |
|||
3.3, |
|||
3.4, |
|||
3.8, |
|||
4.1, |
|||
5.6, |
|||
4.0, |
|||
3.8, |
|||
4.0, |
|||
2.8, |
|||
1.3, |
|||
3.3, |
|||
5.1, |
|||
5.2, |
|||
5.1, |
|||
12.3, |
|||
3.1, |
|||
5.3, |
|||
3.6, |
|||
1.7, |
|||
2.0, |
|||
1.4 |
|||
] |
|||
}, |
|||
{ |
|||
"cityName": "北京", |
|||
"temperature": 32.3, |
|||
"humidity": 56.0, |
|||
"windSpeed": 17.1, |
|||
"weatherCode": "0", |
|||
"hourlyTimes": [ |
|||
"00:00", |
|||
"01:00", |
|||
"02:00", |
|||
"03:00", |
|||
"04:00", |
|||
"05:00", |
|||
"06:00", |
|||
"07:00", |
|||
"08:00", |
|||
"09:00", |
|||
"10:00", |
|||
"11:00", |
|||
"12:00", |
|||
"13:00", |
|||
"14:00", |
|||
"15:00", |
|||
"16:00", |
|||
"17:00", |
|||
"18:00", |
|||
"19:00", |
|||
"20:00", |
|||
"21:00", |
|||
"22:00", |
|||
"23:00" |
|||
], |
|||
"hourlyTemperatures": [ |
|||
22.8, |
|||
21.9, |
|||
21.2, |
|||
20.1, |
|||
19.6, |
|||
18.8, |
|||
19.2, |
|||
20.7, |
|||
23.7, |
|||
27.0, |
|||
29.9, |
|||
32.5, |
|||
34.5, |
|||
35.8, |
|||
36.3, |
|||
36.6, |
|||
36.2, |
|||
35.7, |
|||
34.2, |
|||
32.3, |
|||
30.9, |
|||
29.9, |
|||
29.1, |
|||
28.6 |
|||
], |
|||
"hourlyHumidities": [ |
|||
56, |
|||
60, |
|||
63, |
|||
69, |
|||
71, |
|||
75, |
|||
74, |
|||
67, |
|||
57, |
|||
45, |
|||
37, |
|||
28, |
|||
21, |
|||
18, |
|||
20, |
|||
21, |
|||
26, |
|||
26, |
|||
30, |
|||
33, |
|||
35, |
|||
36, |
|||
35, |
|||
34 |
|||
], |
|||
"hourlyWindSpeeds": [ |
|||
11.6, |
|||
10.6, |
|||
7.6, |
|||
4.5, |
|||
3.9, |
|||
2.3, |
|||
2.3, |
|||
0.6, |
|||
0.8, |
|||
2.2, |
|||
2.4, |
|||
4.9, |
|||
7.6, |
|||
10.4, |
|||
12.2, |
|||
13.4, |
|||
14.7, |
|||
15.1, |
|||
14.5, |
|||
17.1, |
|||
16.9, |
|||
18.1, |
|||
19.7, |
|||
20.1 |
|||
] |
|||
} |
|||
] |
|||
@ -0,0 +1,14 @@ |
|||
========== 书籍数据分析报告 ========== |
|||
生成时间: 2026-05-30T17:47:42.026682900 |
|||
分析书籍总数: 600 |
|||
|
|||
【价格统计】 |
|||
最高价: £59.92 |
|||
最低价: £10.01 |
|||
平均价: £35.29 |
|||
|
|||
【库存统计】 |
|||
有库存: 600 本 |
|||
缺货: 0 本 |
|||
|
|||
报告生成完成 |
|||
@ -0,0 +1,31 @@ |
|||
========== 新闻数据分析报告 ========== |
|||
生成时间: 2026-05-30T17:47:42.145591 |
|||
分析新闻总数: 16 |
|||
|
|||
【发布时间分布】 |
|||
00:00 - 01:00: 0 条 |
|||
01:00 - 02:00: 0 条 |
|||
02:00 - 03:00: 0 条 |
|||
03:00 - 04:00: 0 条 |
|||
04:00 - 05:00: 0 条 |
|||
05:00 - 06:00: 0 条 |
|||
06:00 - 07:00: 0 条 |
|||
07:00 - 08:00: 0 条 |
|||
08:00 - 09:00: 0 条 |
|||
09:00 - 10:00: 0 条 |
|||
10:00 - 11:00: 0 条 |
|||
11:00 - 12:00: 0 条 |
|||
12:00 - 13:00: 0 条 |
|||
13:00 - 14:00: 0 条 |
|||
14:00 - 15:00: 0 条 |
|||
15:00 - 16:00: 0 条 |
|||
16:00 - 17:00: 0 条 |
|||
17:00 - 18:00: 16 条 |
|||
18:00 - 19:00: 0 条 |
|||
19:00 - 20:00: 0 条 |
|||
20:00 - 21:00: 0 条 |
|||
21:00 - 22:00: 0 条 |
|||
22:00 - 23:00: 0 条 |
|||
23:00 - 00:00: 0 条 |
|||
|
|||
报告生成完成 |
|||
@ -0,0 +1,17 @@ |
|||
========== 大学排名数据分析报告 ========== |
|||
生成时间: 2026-05-30T17:47:42.272388 |
|||
分析大学总数: 30 |
|||
|
|||
【省份排行榜 TOP 10】 |
|||
北京: 7 所大学 |
|||
上海: 4 所大学 |
|||
湖北: 2 所大学 |
|||
湖南: 2 所大学 |
|||
天津: 2 所大学 |
|||
陕西: 2 所大学 |
|||
江苏: 2 所大学 |
|||
山东: 1 所大学 |
|||
福建: 1 所大学 |
|||
吉林: 1 所大学 |
|||
|
|||
报告生成完成 |
|||
@ -0,0 +1,29 @@ |
|||
========== 天气数据分析报告 ========== |
|||
生成时间: 2026-05-30T17:47:42.585539200 |
|||
分析城市数量: 3 |
|||
数据来源: Open-Meteo API (CC BY 4.0) |
|||
|
|||
【多城市天气对比】 |
|||
|
|||
城市: 上海 |
|||
当前温度: 24.0°C |
|||
当前湿度: 83% |
|||
风速: 8.3 km/h |
|||
天气: 多云 |
|||
24小时平均温度: 22.7°C |
|||
|
|||
城市: 广州 |
|||
当前温度: 29.8°C |
|||
当前湿度: 85% |
|||
风速: 2.4 km/h |
|||
天气: 小毛毛雨 |
|||
24小时平均温度: 28.6°C |
|||
|
|||
城市: 北京 |
|||
当前温度: 34.6°C |
|||
当前湿度: 56% |
|||
风速: 14.4 km/h |
|||
天气: 晴 |
|||
24小时平均温度: 28.2°C |
|||
|
|||
报告生成完成 |
|||
@ -0,0 +1,15 @@ |
|||
package com.example.crawler; |
|||
|
|||
import com.example.crawler.controller.CrawlerController; |
|||
|
|||
/** |
|||
* 爬虫项目主入口类 |
|||
*/ |
|||
public class Main { |
|||
|
|||
public static void main(String[] args) { |
|||
// 创建控制器并启动CLI界面
|
|||
CrawlerController controller = new CrawlerController(); |
|||
controller.start(); |
|||
} |
|||
} |
|||
@ -0,0 +1,229 @@ |
|||
package com.example.crawler.chart; |
|||
|
|||
import java.awt.Color; |
|||
import java.awt.Font; |
|||
import java.io.File; |
|||
import java.io.IOException; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.stream.Collectors; |
|||
|
|||
import com.example.crawler.constant.CrawlerConstants; |
|||
import org.jfree.chart.ChartFactory; |
|||
import org.jfree.chart.ChartUtils; |
|||
import org.jfree.chart.JFreeChart; |
|||
import org.jfree.chart.axis.CategoryAxis; |
|||
import org.jfree.chart.axis.NumberAxis; |
|||
import org.jfree.chart.plot.CategoryPlot; |
|||
import org.jfree.chart.plot.PiePlot; |
|||
import org.jfree.chart.plot.XYPlot; |
|||
import org.jfree.chart.renderer.category.BarRenderer; |
|||
import org.jfree.chart.renderer.category.LineAndShapeRenderer; |
|||
import org.jfree.data.category.DefaultCategoryDataset; |
|||
import org.jfree.data.general.DefaultPieDataset; |
|||
import org.jfree.data.xy.XYDataset; |
|||
import org.jfree.data.xy.XYSeries; |
|||
import org.jfree.data.xy.XYSeriesCollection; |
|||
|
|||
public class ChartGenerator { |
|||
|
|||
static { |
|||
File dir = new File(CrawlerConstants.CHARTS_DIR); |
|||
if (!dir.exists()) { |
|||
dir.mkdirs(); |
|||
} |
|||
} |
|||
|
|||
public static void generatePriceHistogram(Map<String, Integer> priceDistribution, String fileName) { |
|||
DefaultCategoryDataset dataset = createCategoryDataset(priceDistribution); |
|||
JFreeChart chart = ChartFactory.createBarChart( |
|||
"书籍价格分布", |
|||
"价格区间(£)", |
|||
"书籍数量", |
|||
dataset |
|||
); |
|||
customizeBarChart(chart); |
|||
saveChart(chart, fileName); |
|||
} |
|||
|
|||
public static void generateRatingPieChart(Map<String, Integer> ratingDistribution, String fileName) { |
|||
DefaultPieDataset<String> dataset = new DefaultPieDataset<>(); |
|||
for (Map.Entry<String, Integer> entry : ratingDistribution.entrySet()) { |
|||
dataset.setValue(entry.getKey(), entry.getValue()); |
|||
} |
|||
JFreeChart chart = ChartFactory.createPieChart( |
|||
"书籍评分分布", |
|||
dataset, |
|||
true, |
|||
true, |
|||
false |
|||
); |
|||
customizePieChart(chart); |
|||
saveChart(chart, fileName); |
|||
} |
|||
|
|||
public static void generateNewsTimeTrend(Map<Integer, Integer> hourDistribution, String fileName) { |
|||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|||
for (int i = 0; i < 24; i++) { |
|||
int count = hourDistribution.getOrDefault(i, 0); |
|||
dataset.addValue(count, "新闻数量", String.format("%02d:00", i)); |
|||
} |
|||
JFreeChart chart = ChartFactory.createLineChart( |
|||
"新闻发布时间分布", |
|||
"小时", |
|||
"新闻数量", |
|||
dataset |
|||
); |
|||
customizeLineChart(chart); |
|||
saveChart(chart, fileName); |
|||
} |
|||
|
|||
public static void generateWordFrequencyBarChart(Map<String, Integer> wordFrequency, String fileName) { |
|||
Map<String, Integer> top10 = wordFrequency.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|||
.limit(10) |
|||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
|||
|
|||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|||
for (Map.Entry<String, Integer> entry : top10.entrySet()) { |
|||
dataset.addValue(entry.getValue(), "词频", entry.getKey()); |
|||
} |
|||
JFreeChart chart = ChartFactory.createBarChart( |
|||
"新闻高频词 TOP 10", |
|||
"关键词", |
|||
"出现次数", |
|||
dataset |
|||
); |
|||
customizeBarChart(chart); |
|||
saveChart(chart, fileName); |
|||
} |
|||
|
|||
public static void generateProvinceBarChart(Map<String, Integer> provinceDistribution, String fileName) { |
|||
Map<String, Integer> top10 = provinceDistribution.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|||
.limit(10) |
|||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
|||
|
|||
DefaultCategoryDataset dataset = createCategoryDataset(top10); |
|||
JFreeChart chart = ChartFactory.createBarChart( |
|||
"各省上榜大学数量 TOP 10", |
|||
"省份", |
|||
"大学数量", |
|||
dataset |
|||
); |
|||
customizeBarChart(chart); |
|||
saveChart(chart, fileName); |
|||
} |
|||
|
|||
public static void generateScoreHistogram(Map<String, Integer> scoreDistribution, String fileName) { |
|||
DefaultCategoryDataset dataset = createCategoryDataset(scoreDistribution); |
|||
JFreeChart chart = ChartFactory.createBarChart( |
|||
"大学总分分布", |
|||
"分数区间", |
|||
"大学数量", |
|||
dataset |
|||
); |
|||
customizeBarChart(chart); |
|||
saveChart(chart, fileName); |
|||
} |
|||
|
|||
public static void generateTemperatureTrend(List<String> times, List<Double> temperatures, String cityName, String fileName) { |
|||
XYSeries series = new XYSeries(cityName); |
|||
for (int i = 0; i < Math.min(times.size(), temperatures.size()); i++) { |
|||
series.add(i, temperatures.get(i)); |
|||
} |
|||
XYDataset dataset = new XYSeriesCollection(series); |
|||
JFreeChart chart = ChartFactory.createXYLineChart( |
|||
cityName + " 未来24小时温度变化", |
|||
"小时", |
|||
"温度(°C)", |
|||
dataset |
|||
); |
|||
customizeXYLineChart(chart); |
|||
saveChart(chart, fileName); |
|||
} |
|||
|
|||
public static void generateMultiCityTemperatureComparison(Map<String, List<Double>> cityTemperatures, String fileName) { |
|||
XYSeriesCollection dataset = new XYSeriesCollection(); |
|||
for (Map.Entry<String, List<Double>> entry : cityTemperatures.entrySet()) { |
|||
XYSeries series = new XYSeries(entry.getKey()); |
|||
List<Double> temps = entry.getValue(); |
|||
for (int i = 0; i < Math.min(temps.size(), 24); i++) { |
|||
series.add(i, temps.get(i)); |
|||
} |
|||
dataset.addSeries(series); |
|||
} |
|||
JFreeChart chart = ChartFactory.createXYLineChart( |
|||
"多城市未来24小时温度对比", |
|||
"小时", |
|||
"温度(°C)", |
|||
dataset |
|||
); |
|||
customizeXYLineChart(chart); |
|||
saveChart(chart, fileName); |
|||
} |
|||
|
|||
private static DefaultCategoryDataset createCategoryDataset(Map<String, Integer> data) { |
|||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|||
for (Map.Entry<String, Integer> entry : data.entrySet()) { |
|||
dataset.addValue(entry.getValue(), "数值", entry.getKey()); |
|||
} |
|||
return dataset; |
|||
} |
|||
|
|||
private static void customizeBarChart(JFreeChart chart) { |
|||
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
|||
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|||
|
|||
CategoryPlot plot = chart.getCategoryPlot(); |
|||
CategoryAxis domainAxis = plot.getDomainAxis(); |
|||
domainAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|||
domainAxis.setTickLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 10)); |
|||
|
|||
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis(); |
|||
rangeAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|||
|
|||
BarRenderer renderer = (BarRenderer) plot.getRenderer(); |
|||
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
|||
} |
|||
|
|||
private static void customizePieChart(JFreeChart chart) { |
|||
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
|||
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|||
|
|||
PiePlot plot = (PiePlot) chart.getPlot(); |
|||
plot.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|||
} |
|||
|
|||
private static void customizeLineChart(JFreeChart chart) { |
|||
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
|||
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|||
|
|||
CategoryPlot plot = chart.getCategoryPlot(); |
|||
LineAndShapeRenderer renderer = (LineAndShapeRenderer) plot.getRenderer(); |
|||
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
|||
} |
|||
|
|||
private static void customizeXYLineChart(JFreeChart chart) { |
|||
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16)); |
|||
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|||
|
|||
XYPlot plot = chart.getXYPlot(); |
|||
|
|||
NumberAxis xAxis = (NumberAxis) plot.getDomainAxis(); |
|||
xAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|||
|
|||
NumberAxis yAxis = (NumberAxis) plot.getRangeAxis(); |
|||
yAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12)); |
|||
} |
|||
|
|||
private static void saveChart(JFreeChart chart, String fileName) { |
|||
try { |
|||
File file = new File(CrawlerConstants.CHARTS_DIR, fileName); |
|||
ChartUtils.saveChartAsPNG(file, chart, 800, 500); |
|||
System.out.println("图表已保存: " + file.getAbsolutePath()); |
|||
} catch (IOException e) { |
|||
System.err.println("保存图表失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,60 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.constant.CrawlerConstants; |
|||
import com.example.crawler.exception.CrawlException; |
|||
import com.example.crawler.exception.NetworkException; |
|||
import com.example.crawler.repository.DataRepository; |
|||
import com.example.crawler.strategy.CrawlStrategy; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
public abstract class BaseCrawlCommand implements Command { |
|||
|
|||
protected static final Logger logger = LoggerFactory.getLogger(BaseCrawlCommand.class); |
|||
|
|||
protected DataRepository repository; |
|||
protected int maxRetries; |
|||
protected long retryDelayMs; |
|||
|
|||
public BaseCrawlCommand(DataRepository repository) { |
|||
this.repository = repository; |
|||
this.maxRetries = CrawlerConstants.MAX_RETRIES; |
|||
this.retryDelayMs = 2000; |
|||
} |
|||
|
|||
protected abstract CrawlStrategy<?> getStrategy(); |
|||
|
|||
protected abstract void saveToRepository(Object data); |
|||
|
|||
@Override |
|||
public void execute() { |
|||
try { |
|||
Object data = crawlWithRetry(); |
|||
saveToRepository(data); |
|||
logger.info("Crawling completed and saved to repository"); |
|||
} catch (Exception e) { |
|||
logger.error("Crawling failed", e); |
|||
System.err.println("爬取失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
protected Object crawlWithRetry() throws Exception { |
|||
int attempts = 0; |
|||
while (attempts < maxRetries) { |
|||
try { |
|||
CrawlStrategy<?> strategy = getStrategy(); |
|||
return strategy.crawl(); |
|||
} catch (NetworkException e) { |
|||
attempts++; |
|||
if (attempts < maxRetries) { |
|||
logger.warn("Network error, retrying in {}ms (attempt {}/{})", retryDelayMs, attempts, maxRetries); |
|||
Thread.sleep(retryDelayMs); |
|||
} else { |
|||
logger.error("Max retries reached, giving up"); |
|||
throw e; |
|||
} |
|||
} |
|||
} |
|||
throw new CrawlException("Max retries exceeded"); |
|||
} |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.model.Book; |
|||
import com.example.crawler.repository.DataRepository; |
|||
import com.example.crawler.strategy.BookCrawlStrategy; |
|||
import com.example.crawler.strategy.CrawlStrategy; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class BookCommand extends BaseCrawlCommand { |
|||
|
|||
public BookCommand(DataRepository repository) { |
|||
super(repository); |
|||
} |
|||
|
|||
@Override |
|||
protected CrawlStrategy<?> getStrategy() { |
|||
return new BookCrawlStrategy(); |
|||
} |
|||
|
|||
@Override |
|||
@SuppressWarnings("unchecked") |
|||
protected void saveToRepository(Object data) { |
|||
repository.saveBooks((List<Book>) data); |
|||
System.out.println("成功爬取 " + ((List<Book>) data).size() + " 本书籍信息"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "爬取书籍信息"; |
|||
} |
|||
} |
|||
@ -0,0 +1,20 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
/** |
|||
* 命令接口 |
|||
* 定义命令执行的标准方法,实现Command模式 |
|||
*/ |
|||
public interface Command { |
|||
|
|||
/** |
|||
* 执行命令 |
|||
*/ |
|||
void execute(); |
|||
|
|||
/** |
|||
* 获取命令名称 |
|||
* |
|||
* @return 命令名称 |
|||
*/ |
|||
String getName(); |
|||
} |
|||
@ -0,0 +1,45 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.controller.CrawlerController; |
|||
import com.example.crawler.repository.DataRepository; |
|||
|
|||
public class CrawlAllCommand implements Command { |
|||
|
|||
private final DataRepository repository; |
|||
private final CrawlerController controller; |
|||
|
|||
public CrawlAllCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
this.repository = controller.getRepository(); |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
System.out.println("\n=== 开始爬取全部数据源 ==="); |
|||
|
|||
Command[] commands = { |
|||
new BookCommand(repository), |
|||
new NewsCommand(repository), |
|||
new CrawlRankingCommand(repository), |
|||
new WeatherCommand(repository) |
|||
}; |
|||
|
|||
for (Command command : commands) { |
|||
command.execute(); |
|||
try { |
|||
Thread.sleep(2000); |
|||
} catch (InterruptedException e) { |
|||
Thread.currentThread().interrupt(); |
|||
} |
|||
} |
|||
|
|||
new SaveCommand(controller).execute(); |
|||
|
|||
System.out.println("\n=== 全部数据爬取完成 ==="); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "爬取全部数据并保存"; |
|||
} |
|||
} |
|||
@ -0,0 +1,104 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.controller.CrawlerController; |
|||
import com.example.crawler.repository.DataRepository; |
|||
import com.example.crawler.service.BookAnalysisService; |
|||
import com.example.crawler.service.NewsAnalysisService; |
|||
import com.example.crawler.service.RankingAnalysisService; |
|||
import com.example.crawler.service.WeatherAnalysisService; |
|||
|
|||
public class CrawlAndAnalyzeAllCommand implements Command { |
|||
|
|||
private final DataRepository repository; |
|||
private final CrawlerController controller; |
|||
|
|||
public CrawlAndAnalyzeAllCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
this.repository = controller.getRepository(); |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
System.out.println("\n========== 爬取全部数据并生成分析 ==========\n"); |
|||
|
|||
System.out.println("第1步:爬取书籍信息..."); |
|||
try { |
|||
BookCommand bookCommand = new BookCommand(repository); |
|||
bookCommand.execute(); |
|||
} catch (Exception e) { |
|||
System.err.println("书籍爬取失败: " + e.getMessage()); |
|||
} |
|||
|
|||
System.out.println("\n第2步:爬取新闻信息..."); |
|||
try { |
|||
NewsCommand newsCommand = new NewsCommand(repository); |
|||
newsCommand.execute(); |
|||
} catch (Exception e) { |
|||
System.err.println("新闻爬取失败: " + e.getMessage()); |
|||
} |
|||
|
|||
System.out.println("\n第3步:爬取大学排名..."); |
|||
try { |
|||
CrawlRankingCommand rankingCommand = new CrawlRankingCommand(repository); |
|||
rankingCommand.execute(); |
|||
} catch (Exception e) { |
|||
System.err.println("大学排名爬取失败: " + e.getMessage()); |
|||
} |
|||
|
|||
System.out.println("\n第4步:爬取天气数据..."); |
|||
try { |
|||
WeatherCommand weatherCommand = new WeatherCommand(repository); |
|||
weatherCommand.execute(); |
|||
} catch (Exception e) { |
|||
System.err.println("天气数据爬取失败: " + e.getMessage()); |
|||
} |
|||
|
|||
System.out.println("\n========== 数据爬取完成,开始分析 ==========\n"); |
|||
|
|||
try { |
|||
BookAnalysisService bookService = new BookAnalysisService(); |
|||
if (!repository.getBooks().isEmpty()) { |
|||
bookService.analyze(repository.getBooks()); |
|||
} |
|||
} catch (Exception e) { |
|||
System.err.println("书籍分析失败: " + e.getMessage()); |
|||
} |
|||
|
|||
try { |
|||
NewsAnalysisService newsService = new NewsAnalysisService(); |
|||
if (!repository.getNewsList().isEmpty()) { |
|||
newsService.analyze(repository.getNewsList()); |
|||
} |
|||
} catch (Exception e) { |
|||
System.err.println("新闻分析失败: " + e.getMessage()); |
|||
} |
|||
|
|||
try { |
|||
RankingAnalysisService rankingService = new RankingAnalysisService(); |
|||
if (!repository.getRankings().isEmpty()) { |
|||
rankingService.analyze(repository.getRankings()); |
|||
} |
|||
} catch (Exception e) { |
|||
System.err.println("大学排名分析失败: " + e.getMessage()); |
|||
} |
|||
|
|||
try { |
|||
WeatherAnalysisService weatherService = new WeatherAnalysisService(); |
|||
if (!repository.getWeatherList().isEmpty()) { |
|||
weatherService.analyze(repository.getWeatherList()); |
|||
} |
|||
} catch (Exception e) { |
|||
System.err.println("天气分析失败: " + e.getMessage()); |
|||
} |
|||
|
|||
System.out.println("\n========== 全部完成 =========="); |
|||
System.out.println("原始数据已保存到 output/ 目录"); |
|||
System.out.println("分析报告已保存到 reports/ 目录"); |
|||
System.out.println("图表已保存到 charts/ 目录"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "爬取并分析全部数据"; |
|||
} |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.model.UniversityRank; |
|||
import com.example.crawler.repository.DataRepository; |
|||
import com.example.crawler.strategy.CrawlStrategy; |
|||
import com.example.crawler.strategy.UniversityRankCrawlStrategy; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class CrawlRankingCommand extends BaseCrawlCommand { |
|||
|
|||
public CrawlRankingCommand(DataRepository repository) { |
|||
super(repository); |
|||
} |
|||
|
|||
@Override |
|||
protected CrawlStrategy<?> getStrategy() { |
|||
return new UniversityRankCrawlStrategy(); |
|||
} |
|||
|
|||
@Override |
|||
@SuppressWarnings("unchecked") |
|||
protected void saveToRepository(Object data) { |
|||
repository.saveRankings((List<UniversityRank>) data); |
|||
System.out.println("成功爬取 " + ((List<UniversityRank>) data).size() + " 条大学排名数据"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "爬取软科中国大学排名"; |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
/** |
|||
* 退出命令 |
|||
* // Command模式:退出命令
|
|||
*/ |
|||
public class ExitCommand implements Command { |
|||
|
|||
@Override |
|||
public void execute() { |
|||
System.out.println("\n=== 感谢使用数据爬取系统 ==="); |
|||
System.exit(0); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "退出"; |
|||
} |
|||
} |
|||
@ -0,0 +1,77 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.controller.CrawlerController; |
|||
import com.example.crawler.repository.DataRepository; |
|||
import com.example.crawler.service.BookAnalysisService; |
|||
import com.example.crawler.service.NewsAnalysisService; |
|||
import com.example.crawler.service.RankingAnalysisService; |
|||
import com.example.crawler.service.WeatherAnalysisService; |
|||
|
|||
public class GenerateAllAnalysisCommand implements Command { |
|||
|
|||
private final DataRepository repository; |
|||
private final CrawlerController controller; |
|||
|
|||
public GenerateAllAnalysisCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
this.repository = controller.getRepository(); |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
System.out.println("\n========== 生成所有数据源分析报告 ==========\n"); |
|||
|
|||
try { |
|||
BookAnalysisService bookService = new BookAnalysisService(); |
|||
if (!repository.getBooks().isEmpty()) { |
|||
bookService.analyze(repository.getBooks()); |
|||
} else { |
|||
System.out.println("没有书籍数据,跳过书籍分析"); |
|||
} |
|||
} catch (Exception e) { |
|||
System.err.println("书籍分析失败: " + e.getMessage()); |
|||
} |
|||
|
|||
try { |
|||
NewsAnalysisService newsService = new NewsAnalysisService(); |
|||
if (!repository.getNewsList().isEmpty()) { |
|||
newsService.analyze(repository.getNewsList()); |
|||
} else { |
|||
System.out.println("没有新闻数据,跳过新闻分析"); |
|||
} |
|||
} catch (Exception e) { |
|||
System.err.println("新闻分析失败: " + e.getMessage()); |
|||
} |
|||
|
|||
try { |
|||
RankingAnalysisService rankingService = new RankingAnalysisService(); |
|||
if (!repository.getRankings().isEmpty()) { |
|||
rankingService.analyze(repository.getRankings()); |
|||
} else { |
|||
System.out.println("没有大学排名数据,跳过排名分析"); |
|||
} |
|||
} catch (Exception e) { |
|||
System.err.println("大学排名分析失败: " + e.getMessage()); |
|||
} |
|||
|
|||
try { |
|||
WeatherAnalysisService weatherService = new WeatherAnalysisService(); |
|||
if (!repository.getWeatherList().isEmpty()) { |
|||
weatherService.analyze(repository.getWeatherList()); |
|||
} else { |
|||
System.out.println("没有天气数据,跳过天气分析"); |
|||
} |
|||
} catch (Exception e) { |
|||
System.err.println("天气分析失败: " + e.getMessage()); |
|||
} |
|||
|
|||
System.out.println("\n========== 分析完成 =========="); |
|||
System.out.println("报告已保存到 reports/ 目录"); |
|||
System.out.println("图表已保存到 charts/ 目录"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "生成所有分析报告"; |
|||
} |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.model.News; |
|||
import com.example.crawler.repository.DataRepository; |
|||
import com.example.crawler.strategy.CrawlStrategy; |
|||
import com.example.crawler.strategy.NewsCrawlStrategy; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class NewsCommand extends BaseCrawlCommand { |
|||
|
|||
public NewsCommand(DataRepository repository) { |
|||
super(repository); |
|||
} |
|||
|
|||
@Override |
|||
protected CrawlStrategy<?> getStrategy() { |
|||
return new NewsCrawlStrategy(); |
|||
} |
|||
|
|||
@Override |
|||
@SuppressWarnings("unchecked") |
|||
protected void saveToRepository(Object data) { |
|||
repository.saveNewsList((List<News>) data); |
|||
System.out.println("成功爬取 " + ((List<News>) data).size() + " 条新闻"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "爬取新浪国内新闻"; |
|||
} |
|||
} |
|||
@ -0,0 +1,74 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import java.time.LocalDateTime; |
|||
import java.time.format.DateTimeFormatter; |
|||
import java.util.List; |
|||
|
|||
import com.example.crawler.constant.CrawlerConstants; |
|||
import com.example.crawler.controller.CrawlerController; |
|||
import com.example.crawler.model.Book; |
|||
import com.example.crawler.model.News; |
|||
import com.example.crawler.model.UniversityRank; |
|||
import com.example.crawler.model.Weather; |
|||
import com.example.crawler.util.JsonUtil; |
|||
|
|||
public class SaveCommand implements Command { |
|||
|
|||
private final CrawlerController controller; |
|||
|
|||
public SaveCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
System.out.println("\n=== 开始保存数据 ==="); |
|||
|
|||
try { |
|||
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); |
|||
|
|||
// 保存书籍数据
|
|||
List<Book> books = controller.getBooks(); |
|||
if (books != null && !books.isEmpty()) { |
|||
String bookFileName = CrawlerConstants.OUTPUT_DIR + "/books_" + timestamp + ".json"; |
|||
JsonUtil.saveListToJsonFile(books, bookFileName); |
|||
System.out.println("书籍数据已保存到: " + bookFileName); |
|||
} |
|||
|
|||
// 保存新闻数据
|
|||
List<News> newsList = controller.getNewsList(); |
|||
if (newsList != null && !newsList.isEmpty()) { |
|||
String newsFileName = CrawlerConstants.OUTPUT_DIR + "/news_" + timestamp + ".json"; |
|||
JsonUtil.saveListToJsonFile(newsList, newsFileName); |
|||
System.out.println("新闻数据已保存到: " + newsFileName); |
|||
} |
|||
|
|||
// 保存大学排名数据
|
|||
List<UniversityRank> universityRankList = controller.getUniversityRankList(); |
|||
if (universityRankList != null && !universityRankList.isEmpty()) { |
|||
String rankingFileName = CrawlerConstants.OUTPUT_DIR + "/university_ranking_" + timestamp + ".json"; |
|||
JsonUtil.saveListToJsonFile(universityRankList, rankingFileName); |
|||
System.out.println("大学排名数据已保存到: " + rankingFileName); |
|||
} |
|||
|
|||
// 保存天气数据
|
|||
List<Weather> weatherList = controller.getWeatherList(); |
|||
if (weatherList != null && !weatherList.isEmpty()) { |
|||
String weatherFileName = CrawlerConstants.OUTPUT_DIR + "/weather_" + timestamp + ".json"; |
|||
JsonUtil.saveListToJsonFile(weatherList, weatherFileName); |
|||
System.out.println("天气数据已保存到: " + weatherFileName); |
|||
} |
|||
|
|||
System.out.println("\n=== 数据保存完成 ==="); |
|||
|
|||
} catch (Exception e) { |
|||
System.err.println("保存数据失败: " + e.getMessage()); |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "保存当前数据到文件"; |
|||
} |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
package com.example.crawler.command; |
|||
|
|||
import com.example.crawler.model.Weather; |
|||
import com.example.crawler.repository.DataRepository; |
|||
import com.example.crawler.strategy.CrawlStrategy; |
|||
import com.example.crawler.strategy.WeatherCrawlStrategy; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class WeatherCommand extends BaseCrawlCommand { |
|||
|
|||
public WeatherCommand(DataRepository repository) { |
|||
super(repository); |
|||
} |
|||
|
|||
@Override |
|||
protected CrawlStrategy<?> getStrategy() { |
|||
return new WeatherCrawlStrategy(); |
|||
} |
|||
|
|||
@Override |
|||
@SuppressWarnings("unchecked") |
|||
protected void saveToRepository(Object data) { |
|||
repository.saveWeatherList((List<Weather>) data); |
|||
System.out.println("成功爬取 " + ((List<Weather>) data).size() + " 个城市的天气信息"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "爬取天气数据"; |
|||
} |
|||
} |
|||
@ -0,0 +1,31 @@ |
|||
package com.example.crawler.constant; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
|
|||
public class CrawlerConstants { |
|||
|
|||
public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36"; |
|||
public static final String REFERER = "https://www.baidu.com"; |
|||
|
|||
public static final int TIMEOUT_MS = 10000; |
|||
public static final int MAX_RETRIES = 3; |
|||
public static final long DELAY_MS = 3000; |
|||
|
|||
public static final String URL_BOOKS = "https://books.toscrape.com/"; |
|||
public static final String URL_NEWS = "https://news.sina.com.cn/china/"; |
|||
public static final String URL_RANKING = "https://www.shanghairanking.cn/rankings/bcur/202310"; |
|||
public static final String URL_WEATHER_API = "https://api.open-meteo.com/v1/forecast"; |
|||
|
|||
public static final String OUTPUT_DIR = "output"; |
|||
public static final String REPORTS_DIR = "reports"; |
|||
public static final String CHARTS_DIR = "charts"; |
|||
|
|||
public static final Map<String, double[]> CITY_COORDINATES; |
|||
static { |
|||
CITY_COORDINATES = new HashMap<>(); |
|||
CITY_COORDINATES.put("北京", new double[]{39.9042, 116.4074}); |
|||
CITY_COORDINATES.put("上海", new double[]{31.2304, 121.4737}); |
|||
CITY_COORDINATES.put("广州", new double[]{23.1291, 113.2644}); |
|||
} |
|||
} |
|||
@ -0,0 +1,90 @@ |
|||
package com.example.crawler.controller; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.Scanner; |
|||
|
|||
import com.example.crawler.command.BookCommand; |
|||
import com.example.crawler.command.Command; |
|||
import com.example.crawler.command.CrawlAllCommand; |
|||
import com.example.crawler.command.CrawlAndAnalyzeAllCommand; |
|||
import com.example.crawler.command.CrawlRankingCommand; |
|||
import com.example.crawler.command.ExitCommand; |
|||
import com.example.crawler.command.GenerateAllAnalysisCommand; |
|||
import com.example.crawler.command.NewsCommand; |
|||
import com.example.crawler.command.SaveCommand; |
|||
import com.example.crawler.command.WeatherCommand; |
|||
import com.example.crawler.model.Book; |
|||
import com.example.crawler.model.News; |
|||
import com.example.crawler.model.UniversityRank; |
|||
import com.example.crawler.model.Weather; |
|||
import com.example.crawler.repository.DataRepository; |
|||
import com.example.crawler.view.CrawlerView; |
|||
|
|||
public class CrawlerController { |
|||
|
|||
private final CrawlerView view; |
|||
private final Map<Integer, Command> commandMap; |
|||
private final DataRepository repository; |
|||
|
|||
public CrawlerController() { |
|||
this.view = new CrawlerView(); |
|||
this.repository = DataRepository.getInstance(); |
|||
this.commandMap = new HashMap<>(); |
|||
initCommands(); |
|||
} |
|||
|
|||
private void initCommands() { |
|||
commandMap.put(1, new BookCommand(repository)); |
|||
commandMap.put(2, new NewsCommand(repository)); |
|||
commandMap.put(3, new CrawlRankingCommand(repository)); |
|||
commandMap.put(4, new WeatherCommand(repository)); |
|||
commandMap.put(5, new CrawlAllCommand(this)); |
|||
commandMap.put(6, new SaveCommand(this)); |
|||
commandMap.put(7, new GenerateAllAnalysisCommand(this)); |
|||
commandMap.put(8, new CrawlAndAnalyzeAllCommand(this)); |
|||
commandMap.put(9, new ExitCommand()); |
|||
} |
|||
|
|||
public void start() { |
|||
Scanner scanner = new Scanner(System.in); |
|||
|
|||
while (true) { |
|||
view.showMenu(); |
|||
|
|||
int choice = view.getInput(scanner); |
|||
|
|||
Command command = commandMap.get(choice); |
|||
if (command != null) { |
|||
command.execute(); |
|||
} else { |
|||
view.showError("无效的选择,请输入1-9之间的数字"); |
|||
} |
|||
|
|||
if (choice != 9) { |
|||
view.pause(scanner); |
|||
} |
|||
} |
|||
} |
|||
|
|||
public List<Book> getBooks() { |
|||
return repository.getBooks(); |
|||
} |
|||
|
|||
public List<News> getNewsList() { |
|||
return repository.getNewsList(); |
|||
} |
|||
|
|||
public List<UniversityRank> getUniversityRankList() { |
|||
return repository.getRankings(); |
|||
} |
|||
|
|||
public List<Weather> getWeatherList() { |
|||
return repository.getWeatherList(); |
|||
} |
|||
|
|||
public DataRepository getRepository() { |
|||
return repository; |
|||
} |
|||
} |
|||
@ -0,0 +1,16 @@ |
|||
package com.example.crawler.exception; |
|||
|
|||
/** |
|||
* 爬虫异常基类 |
|||
* 所有爬虫相关异常都继承此类 |
|||
*/ |
|||
public class CrawlException extends Exception { |
|||
|
|||
public CrawlException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public CrawlException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,16 @@ |
|||
package com.example.crawler.exception; |
|||
|
|||
/** |
|||
* 数据保存异常 |
|||
* 用于处理文件写入失败、JSON序列化失败等数据保存相关错误 |
|||
*/ |
|||
public class DataSaveException extends CrawlException { |
|||
|
|||
public DataSaveException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public DataSaveException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,16 @@ |
|||
package com.example.crawler.exception; |
|||
|
|||
/** |
|||
* 网络异常 |
|||
* 用于处理HTTP请求失败、连接超时等网络相关错误 |
|||
*/ |
|||
public class NetworkException extends CrawlException { |
|||
|
|||
public NetworkException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public NetworkException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,16 @@ |
|||
package com.example.crawler.exception; |
|||
|
|||
/** |
|||
* 解析异常 |
|||
* 用于处理HTML解析失败、JSON解析失败等数据解析相关错误 |
|||
*/ |
|||
public class ParseException extends CrawlException { |
|||
|
|||
public ParseException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public ParseException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,65 @@ |
|||
package com.example.crawler.model; |
|||
|
|||
/** |
|||
* 书籍数据模型 |
|||
* 存储toscrape.com网站的书籍信息 |
|||
*/ |
|||
public class Book { |
|||
|
|||
private String title; |
|||
private String price; |
|||
private String availability; |
|||
private String rating; |
|||
|
|||
public Book() { |
|||
} |
|||
|
|||
public Book(String title, String price, String availability, String rating) { |
|||
this.title = title; |
|||
this.price = price; |
|||
this.availability = availability; |
|||
this.rating = rating; |
|||
} |
|||
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public void setTitle(String title) { |
|||
this.title = title; |
|||
} |
|||
|
|||
public String getPrice() { |
|||
return price; |
|||
} |
|||
|
|||
public void setPrice(String price) { |
|||
this.price = price; |
|||
} |
|||
|
|||
public String getAvailability() { |
|||
return availability; |
|||
} |
|||
|
|||
public void setAvailability(String availability) { |
|||
this.availability = availability; |
|||
} |
|||
|
|||
public String getRating() { |
|||
return rating; |
|||
} |
|||
|
|||
public void setRating(String rating) { |
|||
this.rating = rating; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "Book{" + |
|||
"title='" + title + '\'' + |
|||
", price='" + price + '\'' + |
|||
", availability='" + availability + '\'' + |
|||
", rating='" + rating + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,54 @@ |
|||
package com.example.crawler.model; |
|||
|
|||
/** |
|||
* 新闻数据模型 |
|||
* 存储新浪新闻的国内新闻信息 |
|||
*/ |
|||
public class News { |
|||
|
|||
private String title; |
|||
private String publishTime; |
|||
private String url; |
|||
|
|||
public News() { |
|||
} |
|||
|
|||
public News(String title, String publishTime, String url) { |
|||
this.title = title; |
|||
this.publishTime = publishTime; |
|||
this.url = url; |
|||
} |
|||
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public void setTitle(String title) { |
|||
this.title = title; |
|||
} |
|||
|
|||
public String getPublishTime() { |
|||
return publishTime; |
|||
} |
|||
|
|||
public void setPublishTime(String publishTime) { |
|||
this.publishTime = publishTime; |
|||
} |
|||
|
|||
public String getUrl() { |
|||
return url; |
|||
} |
|||
|
|||
public void setUrl(String url) { |
|||
this.url = url; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "News{" + |
|||
"title='" + title + '\'' + |
|||
", publishTime='" + publishTime + '\'' + |
|||
", url='" + url + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,76 @@ |
|||
package com.example.crawler.model; |
|||
|
|||
/** |
|||
* 大学排名数据模型 |
|||
* 存储软科中国大学排名信息 |
|||
*/ |
|||
public class UniversityRank { |
|||
|
|||
private Integer rank; |
|||
private String universityName; |
|||
private String totalScore; |
|||
private String province; |
|||
private String category; |
|||
|
|||
public UniversityRank() { |
|||
} |
|||
|
|||
public UniversityRank(Integer rank, String universityName, String totalScore, String province, String category) { |
|||
this.rank = rank; |
|||
this.universityName = universityName; |
|||
this.totalScore = totalScore; |
|||
this.province = province; |
|||
this.category = category; |
|||
} |
|||
|
|||
public Integer getRank() { |
|||
return rank; |
|||
} |
|||
|
|||
public void setRank(Integer rank) { |
|||
this.rank = rank; |
|||
} |
|||
|
|||
public String getUniversityName() { |
|||
return universityName; |
|||
} |
|||
|
|||
public void setUniversityName(String universityName) { |
|||
this.universityName = universityName; |
|||
} |
|||
|
|||
public String getTotalScore() { |
|||
return totalScore; |
|||
} |
|||
|
|||
public void setTotalScore(String totalScore) { |
|||
this.totalScore = totalScore; |
|||
} |
|||
|
|||
public String getProvince() { |
|||
return province; |
|||
} |
|||
|
|||
public void setProvince(String province) { |
|||
this.province = province; |
|||
} |
|||
|
|||
public String getCategory() { |
|||
return category; |
|||
} |
|||
|
|||
public void setCategory(String category) { |
|||
this.category = category; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "UniversityRank{" + |
|||
"rank=" + rank + |
|||
", universityName='" + universityName + '\'' + |
|||
", totalScore='" + totalScore + '\'' + |
|||
", province='" + province + '\'' + |
|||
", category='" + category + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,140 @@ |
|||
package com.example.crawler.model; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
/** |
|||
* 天气数据模型 |
|||
* 存储 Open-Meteo API 的城市天气信息 |
|||
* 数据来源:Open-Meteo (CC BY 4.0) |
|||
*/ |
|||
public class Weather { |
|||
|
|||
private String cityName; |
|||
private double temperature; |
|||
private double humidity; |
|||
private double windSpeed; |
|||
private String weatherCode; |
|||
private List<String> hourlyTimes; |
|||
private List<Double> hourlyTemperatures; |
|||
private List<Integer> hourlyHumidities; |
|||
private List<Double> hourlyWindSpeeds; |
|||
|
|||
public Weather() { |
|||
this.hourlyTimes = new ArrayList<>(); |
|||
this.hourlyTemperatures = new ArrayList<>(); |
|||
this.hourlyHumidities = new ArrayList<>(); |
|||
this.hourlyWindSpeeds = new ArrayList<>(); |
|||
} |
|||
|
|||
public Weather(String cityName, double temperature, double humidity, double windSpeed, String weatherCode) { |
|||
this.cityName = cityName; |
|||
this.temperature = temperature; |
|||
this.humidity = humidity; |
|||
this.windSpeed = windSpeed; |
|||
this.weatherCode = weatherCode; |
|||
this.hourlyTimes = new ArrayList<>(); |
|||
this.hourlyTemperatures = new ArrayList<>(); |
|||
this.hourlyHumidities = new ArrayList<>(); |
|||
this.hourlyWindSpeeds = new ArrayList<>(); |
|||
} |
|||
|
|||
public String getCityName() { |
|||
return cityName; |
|||
} |
|||
|
|||
public void setCityName(String cityName) { |
|||
this.cityName = cityName; |
|||
} |
|||
|
|||
public double getTemperature() { |
|||
return temperature; |
|||
} |
|||
|
|||
public void setTemperature(double temperature) { |
|||
this.temperature = temperature; |
|||
} |
|||
|
|||
public double getHumidity() { |
|||
return humidity; |
|||
} |
|||
|
|||
public void setHumidity(double humidity) { |
|||
this.humidity = humidity; |
|||
} |
|||
|
|||
public double getWindSpeed() { |
|||
return windSpeed; |
|||
} |
|||
|
|||
public void setWindSpeed(double windSpeed) { |
|||
this.windSpeed = windSpeed; |
|||
} |
|||
|
|||
public String getWeatherCode() { |
|||
return weatherCode; |
|||
} |
|||
|
|||
public void setWeatherCode(String weatherCode) { |
|||
this.weatherCode = weatherCode; |
|||
} |
|||
|
|||
public List<String> getHourlyTimes() { |
|||
return hourlyTimes; |
|||
} |
|||
|
|||
public void setHourlyTimes(List<String> hourlyTimes) { |
|||
this.hourlyTimes = hourlyTimes; |
|||
} |
|||
|
|||
public List<Double> getHourlyTemperatures() { |
|||
return hourlyTemperatures; |
|||
} |
|||
|
|||
public void setHourlyTemperatures(List<Double> hourlyTemperatures) { |
|||
this.hourlyTemperatures = hourlyTemperatures; |
|||
} |
|||
|
|||
public List<Integer> getHourlyHumidities() { |
|||
return hourlyHumidities; |
|||
} |
|||
|
|||
public void setHourlyHumidities(List<Integer> hourlyHumidities) { |
|||
this.hourlyHumidities = hourlyHumidities; |
|||
} |
|||
|
|||
public List<Double> getHourlyWindSpeeds() { |
|||
return hourlyWindSpeeds; |
|||
} |
|||
|
|||
public void setHourlyWindSpeeds(List<Double> hourlyWindSpeeds) { |
|||
this.hourlyWindSpeeds = hourlyWindSpeeds; |
|||
} |
|||
|
|||
public String getWeatherDescription() { |
|||
if (weatherCode == null) return "未知"; |
|||
switch (weatherCode) { |
|||
case "0": return "晴"; |
|||
case "1": case "2": case "3": return "多云"; |
|||
case "45": case "48": return "雾"; |
|||
case "51": case "53": case "55": return "小毛毛雨"; |
|||
case "61": case "63": case "65": return "小雨"; |
|||
case "80": case "81": case "82": return "阵雨"; |
|||
case "95": return "雷暴"; |
|||
case "96": case "99": return "雷暴加冰雹"; |
|||
default: return "未知"; |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "Weather{" + |
|||
"cityName='" + cityName + '\'' + |
|||
", temperature=" + temperature + |
|||
", humidity=" + humidity + |
|||
", windSpeed=" + windSpeed + |
|||
", weatherCode='" + weatherCode + '\'' + |
|||
", weather='" + getWeatherDescription() + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,75 @@ |
|||
package com.example.crawler.repository; |
|||
|
|||
import com.example.crawler.model.Book; |
|||
import com.example.crawler.model.News; |
|||
import com.example.crawler.model.UniversityRank; |
|||
import com.example.crawler.model.Weather; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class DataRepository { |
|||
|
|||
private static DataRepository instance; |
|||
|
|||
private List<Book> books; |
|||
private List<News> newsList; |
|||
private List<UniversityRank> rankings; |
|||
private List<Weather> weatherList; |
|||
|
|||
private DataRepository() { |
|||
this.books = new ArrayList<>(); |
|||
this.newsList = new ArrayList<>(); |
|||
this.rankings = new ArrayList<>(); |
|||
this.weatherList = new ArrayList<>(); |
|||
} |
|||
|
|||
public static synchronized DataRepository getInstance() { |
|||
if (instance == null) { |
|||
instance = new DataRepository(); |
|||
} |
|||
return instance; |
|||
} |
|||
|
|||
public List<Book> getBooks() { |
|||
return new ArrayList<>(books); |
|||
} |
|||
|
|||
public void saveBooks(List<Book> books) { |
|||
this.books.clear(); |
|||
this.books.addAll(books); |
|||
} |
|||
|
|||
public List<News> getNewsList() { |
|||
return new ArrayList<>(newsList); |
|||
} |
|||
|
|||
public void saveNewsList(List<News> newsList) { |
|||
this.newsList.clear(); |
|||
this.newsList.addAll(newsList); |
|||
} |
|||
|
|||
public List<UniversityRank> getRankings() { |
|||
return new ArrayList<>(rankings); |
|||
} |
|||
|
|||
public void saveRankings(List<UniversityRank> rankings) { |
|||
this.rankings.clear(); |
|||
this.rankings.addAll(rankings); |
|||
} |
|||
|
|||
public List<Weather> getWeatherList() { |
|||
return new ArrayList<>(weatherList); |
|||
} |
|||
|
|||
public void saveWeatherList(List<Weather> weatherList) { |
|||
this.weatherList.clear(); |
|||
this.weatherList.addAll(weatherList); |
|||
} |
|||
|
|||
public void clearAll() { |
|||
books.clear(); |
|||
newsList.clear(); |
|||
rankings.clear(); |
|||
weatherList.clear(); |
|||
} |
|||
} |
|||
@ -0,0 +1,171 @@ |
|||
package com.example.crawler.service; |
|||
|
|||
import java.io.File; |
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.io.PrintWriter; |
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.stream.Collectors; |
|||
|
|||
import com.example.crawler.chart.ChartGenerator; |
|||
import com.example.crawler.constant.CrawlerConstants; |
|||
import com.example.crawler.model.Book; |
|||
import com.example.crawler.util.DataCleaner; |
|||
|
|||
public class BookAnalysisService { |
|||
|
|||
static { |
|||
File dir = new File(CrawlerConstants.REPORTS_DIR); |
|||
if (!dir.exists()) { |
|||
dir.mkdirs(); |
|||
} |
|||
} |
|||
|
|||
public void analyze(List<Book> books) { |
|||
if (books == null || books.isEmpty()) { |
|||
System.out.println("没有书籍数据可分析"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("\n========== 书籍数据分析 =========="); |
|||
System.out.println("共分析 " + books.size() + " 本书\n"); |
|||
|
|||
analyzePriceDistribution(books); |
|||
analyzeRatingDistribution(books); |
|||
analyzeStockStatus(books); |
|||
|
|||
generateReport(books); |
|||
} |
|||
|
|||
private void analyzePriceDistribution(List<Book> books) { |
|||
System.out.println("【价格分析】"); |
|||
List<Double> prices = new ArrayList<>(); |
|||
for (Book book : books) { |
|||
double price = DataCleaner.cleanPrice(book.getPrice()); |
|||
if (price > 0) { |
|||
prices.add(price); |
|||
} |
|||
} |
|||
|
|||
if (prices.isEmpty()) { |
|||
System.out.println("无法获取有效价格数据"); |
|||
return; |
|||
} |
|||
|
|||
double maxPrice = prices.stream().mapToDouble(Double::doubleValue).max().orElse(0); |
|||
double minPrice = prices.stream().mapToDouble(Double::doubleValue).min().orElse(0); |
|||
double avgPrice = prices.stream().mapToDouble(Double::doubleValue).average().orElse(0); |
|||
|
|||
System.out.println("最高价: £" + String.format("%.2f", maxPrice)); |
|||
System.out.println("最低价: £" + String.format("%.2f", minPrice)); |
|||
System.out.println("平均价: £" + String.format("%.2f", avgPrice)); |
|||
|
|||
Map<String, Integer> priceRanges = new HashMap<>(); |
|||
String[] ranges = {"0-10", "10-20", "20-30", "30-40", "40-50", "50+"}; |
|||
for (String range : ranges) { |
|||
priceRanges.put(range, 0); |
|||
} |
|||
|
|||
for (Double price : prices) { |
|||
if (price < 10) priceRanges.put("0-10", priceRanges.get("0-10") + 1); |
|||
else if (price < 20) priceRanges.put("10-20", priceRanges.get("10-20") + 1); |
|||
else if (price < 30) priceRanges.put("20-30", priceRanges.get("20-30") + 1); |
|||
else if (price < 40) priceRanges.put("30-40", priceRanges.get("30-40") + 1); |
|||
else if (price < 50) priceRanges.put("40-50", priceRanges.get("40-50") + 1); |
|||
else priceRanges.put("50+", priceRanges.get("50+") + 1); |
|||
} |
|||
|
|||
System.out.println("\n价格区间分布:"); |
|||
for (Map.Entry<String, Integer> entry : priceRanges.entrySet()) { |
|||
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本"); |
|||
} |
|||
|
|||
ChartGenerator.generatePriceHistogram(priceRanges, "price_histogram.png"); |
|||
} |
|||
|
|||
private void analyzeRatingDistribution(List<Book> books) { |
|||
System.out.println("\n【评分分析】"); |
|||
Map<String, Integer> ratingCounts = new HashMap<>(); |
|||
ratingCounts.put("5星", 0); |
|||
ratingCounts.put("4星", 0); |
|||
ratingCounts.put("3星", 0); |
|||
ratingCounts.put("2星", 0); |
|||
ratingCounts.put("1星", 0); |
|||
ratingCounts.put("未知", 0); |
|||
|
|||
for (Book book : books) { |
|||
int rating = DataCleaner.cleanRating(book.getRating()); |
|||
switch (rating) { |
|||
case 5: ratingCounts.put("5星", ratingCounts.get("5星") + 1); break; |
|||
case 4: ratingCounts.put("4星", ratingCounts.get("4星") + 1); break; |
|||
case 3: ratingCounts.put("3星", ratingCounts.get("3星") + 1); break; |
|||
case 2: ratingCounts.put("2星", ratingCounts.get("2星") + 1); break; |
|||
case 1: ratingCounts.put("1星", ratingCounts.get("1星") + 1); break; |
|||
default: ratingCounts.put("未知", ratingCounts.get("未知") + 1); |
|||
} |
|||
} |
|||
|
|||
int total = books.size(); |
|||
System.out.println("评分分布:"); |
|||
for (Map.Entry<String, Integer> entry : ratingCounts.entrySet()) { |
|||
double percentage = (entry.getValue() * 100.0) / total; |
|||
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本 (" + String.format("%.1f", percentage) + "%)"); |
|||
} |
|||
|
|||
ChartGenerator.generateRatingPieChart(ratingCounts, "rating_pie.png"); |
|||
} |
|||
|
|||
private void analyzeStockStatus(List<Book> books) { |
|||
System.out.println("\n【库存分析】"); |
|||
int inStock = 0; |
|||
int outOfStock = 0; |
|||
|
|||
for (Book book : books) { |
|||
String availability = book.getAvailability(); |
|||
if (availability != null && availability.toLowerCase().contains("in stock")) { |
|||
inStock++; |
|||
} else { |
|||
outOfStock++; |
|||
} |
|||
} |
|||
|
|||
System.out.println("有库存: " + inStock + " 本"); |
|||
System.out.println("缺货: " + outOfStock + " 本"); |
|||
} |
|||
|
|||
private void generateReport(List<Book> books) { |
|||
String fileName = CrawlerConstants.REPORTS_DIR + "/book_analysis_report.txt"; |
|||
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
|||
writer.println("========== 书籍数据分析报告 =========="); |
|||
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
|||
writer.println("分析书籍总数: " + books.size()); |
|||
writer.println(); |
|||
|
|||
List<Double> prices = books.stream() |
|||
.map(b -> DataCleaner.cleanPrice(b.getPrice())) |
|||
.filter(p -> p > 0) |
|||
.collect(Collectors.toList()); |
|||
|
|||
if (!prices.isEmpty()) { |
|||
writer.println("【价格统计】"); |
|||
writer.println("最高价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).max().orElse(0))); |
|||
writer.println("最低价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).min().orElse(0))); |
|||
writer.println("平均价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).average().orElse(0))); |
|||
writer.println(); |
|||
} |
|||
|
|||
writer.println("【库存统计】"); |
|||
long inStock = books.stream().filter(b -> b.getAvailability() != null && b.getAvailability().toLowerCase().contains("in stock")).count(); |
|||
writer.println("有库存: " + inStock + " 本"); |
|||
writer.println("缺货: " + (books.size() - inStock) + " 本"); |
|||
|
|||
writer.println("\n报告生成完成"); |
|||
System.out.println("\n报告已保存: " + fileName); |
|||
} catch (IOException e) { |
|||
System.err.println("生成报告失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,138 @@ |
|||
package com.example.crawler.service; |
|||
|
|||
import java.io.File; |
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.io.PrintWriter; |
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.stream.Collectors; |
|||
|
|||
import com.example.crawler.chart.ChartGenerator; |
|||
import com.example.crawler.constant.CrawlerConstants; |
|||
import com.example.crawler.model.News; |
|||
import com.example.crawler.util.DataCleaner; |
|||
|
|||
public class NewsAnalysisService { |
|||
|
|||
static { |
|||
File dir = new File(CrawlerConstants.REPORTS_DIR); |
|||
if (!dir.exists()) { |
|||
dir.mkdirs(); |
|||
} |
|||
} |
|||
|
|||
public void analyze(List<News> newsList) { |
|||
if (newsList == null || newsList.isEmpty()) { |
|||
System.out.println("没有新闻数据可分析"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("\n========== 新闻数据分析 =========="); |
|||
System.out.println("共分析 " + newsList.size() + " 条新闻\n"); |
|||
|
|||
analyzeTimeDistribution(newsList); |
|||
analyzeKeywords(newsList); |
|||
|
|||
generateReport(newsList); |
|||
} |
|||
|
|||
private void analyzeTimeDistribution(List<News> newsList) { |
|||
System.out.println("【发布时间分布】"); |
|||
Map<Integer, Integer> hourDistribution = new HashMap<>(); |
|||
for (int i = 0; i < 24; i++) { |
|||
hourDistribution.put(i, 0); |
|||
} |
|||
|
|||
for (News news : newsList) { |
|||
try { |
|||
java.time.LocalDateTime dateTime = DataCleaner.cleanNewsTime(news.getPublishTime()); |
|||
int hour = DataCleaner.extractHour(dateTime); |
|||
hourDistribution.put(hour, hourDistribution.get(hour) + 1); |
|||
} catch (Exception e) { |
|||
// 忽略解析失败的数据
|
|||
} |
|||
} |
|||
|
|||
System.out.println("\n按小时统计:"); |
|||
for (int i = 0; i < 24; i++) { |
|||
int count = hourDistribution.get(i); |
|||
String bar = "*".repeat(Math.max(1, count)); |
|||
System.out.printf(" %02d:00 - %02d:00: %3d %s%n", i, (i + 1) % 24, count, bar); |
|||
} |
|||
|
|||
int peakHour = 0; |
|||
int peakCount = 0; |
|||
for (Map.Entry<Integer, Integer> entry : hourDistribution.entrySet()) { |
|||
if (entry.getValue() > peakCount) { |
|||
peakCount = entry.getValue(); |
|||
peakHour = entry.getKey(); |
|||
} |
|||
} |
|||
System.out.println("\n高峰时段: " + String.format("%02d:00", peakHour) + " (发布 " + peakCount + " 条新闻)"); |
|||
|
|||
ChartGenerator.generateNewsTimeTrend(hourDistribution, "news_time_trend.png"); |
|||
} |
|||
|
|||
private void analyzeKeywords(List<News> newsList) { |
|||
System.out.println("\n【关键词分析】"); |
|||
Map<String, Integer> allWords = new HashMap<>(); |
|||
|
|||
for (News news : newsList) { |
|||
String title = DataCleaner.cleanTitle(news.getTitle()); |
|||
String[] words = DataCleaner.extractWords(title); |
|||
Map<String, Integer> wordFreq = DataCleaner.countWordFrequency(words); |
|||
for (Map.Entry<String, Integer> entry : wordFreq.entrySet()) { |
|||
allWords.put(entry.getKey(), allWords.getOrDefault(entry.getKey(), 0) + entry.getValue()); |
|||
} |
|||
} |
|||
|
|||
List<Map.Entry<String, Integer>> sortedWords = allWords.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|||
.limit(20) |
|||
.collect(Collectors.toList()); |
|||
|
|||
System.out.println("\n高频词 TOP 10:"); |
|||
for (int i = 0; i < Math.min(10, sortedWords.size()); i++) { |
|||
Map.Entry<String, Integer> entry = sortedWords.get(i); |
|||
System.out.printf(" %2d. %s: %d%n", i + 1, entry.getKey(), entry.getValue()); |
|||
} |
|||
|
|||
Map<String, Integer> top10 = sortedWords.stream() |
|||
.limit(10) |
|||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
|||
|
|||
ChartGenerator.generateWordFrequencyBarChart(top10, "news_top_words.png"); |
|||
} |
|||
|
|||
private void generateReport(List<News> newsList) { |
|||
String fileName = CrawlerConstants.REPORTS_DIR + "/news_analysis_report.txt"; |
|||
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
|||
writer.println("========== 新闻数据分析报告 =========="); |
|||
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
|||
writer.println("分析新闻总数: " + newsList.size()); |
|||
writer.println(); |
|||
|
|||
Map<Integer, Integer> hourDistribution = new HashMap<>(); |
|||
for (int i = 0; i < 24; i++) hourDistribution.put(i, 0); |
|||
for (News news : newsList) { |
|||
try { |
|||
int hour = DataCleaner.extractHour(DataCleaner.cleanNewsTime(news.getPublishTime())); |
|||
hourDistribution.put(hour, hourDistribution.get(hour) + 1); |
|||
} catch (Exception e) {} |
|||
} |
|||
|
|||
writer.println("【发布时间分布】"); |
|||
for (int i = 0; i < 24; i++) { |
|||
writer.println(String.format(" %02d:00 - %02d:00: %d 条", i, (i + 1) % 24, hourDistribution.get(i))); |
|||
} |
|||
|
|||
writer.println("\n报告生成完成"); |
|||
System.out.println("\n报告已保存: " + fileName); |
|||
} catch (IOException e) { |
|||
System.err.println("生成报告失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,189 @@ |
|||
package com.example.crawler.service; |
|||
|
|||
import java.io.File; |
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.io.PrintWriter; |
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.stream.Collectors; |
|||
|
|||
import com.example.crawler.chart.ChartGenerator; |
|||
import com.example.crawler.constant.CrawlerConstants; |
|||
import com.example.crawler.model.UniversityRank; |
|||
import com.example.crawler.util.DataCleaner; |
|||
|
|||
public class RankingAnalysisService { |
|||
|
|||
static { |
|||
File dir = new File(CrawlerConstants.REPORTS_DIR); |
|||
if (!dir.exists()) { |
|||
dir.mkdirs(); |
|||
} |
|||
} |
|||
|
|||
public void analyze(List<UniversityRank> ranks) { |
|||
if (ranks == null || ranks.isEmpty()) { |
|||
System.out.println("没有大学排名数据可分析"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("\n========== 大学排名数据分析 =========="); |
|||
System.out.println("共分析 " + ranks.size() + " 所大学\n"); |
|||
|
|||
analyzeProvinceDistribution(ranks); |
|||
analyzeScoreDistribution(ranks); |
|||
analyzeCategoryDistribution(ranks); |
|||
|
|||
generateReport(ranks); |
|||
} |
|||
|
|||
private void analyzeProvinceDistribution(List<UniversityRank> ranks) { |
|||
System.out.println("【各省份上榜大学数量】"); |
|||
Map<String, Integer> provinceCounts = new HashMap<>(); |
|||
|
|||
for (UniversityRank rank : ranks) { |
|||
String province = rank.getProvince(); |
|||
if (province != null && !province.isEmpty()) { |
|||
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1); |
|||
} |
|||
} |
|||
|
|||
List<Map.Entry<String, Integer>> sorted = provinceCounts.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|||
.collect(Collectors.toList()); |
|||
|
|||
System.out.println("\n省份排行榜 TOP 10:"); |
|||
int rankNum = 1; |
|||
for (Map.Entry<String, Integer> entry : sorted) { |
|||
if (rankNum > 10) break; |
|||
System.out.printf(" %2d. %s: %d 所大学%n", rankNum++, entry.getKey(), entry.getValue()); |
|||
} |
|||
|
|||
Map<String, Integer> top10 = sorted.stream() |
|||
.limit(10) |
|||
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); |
|||
|
|||
ChartGenerator.generateProvinceBarChart(top10, "province_bar.png"); |
|||
} |
|||
|
|||
private void analyzeScoreDistribution(List<UniversityRank> ranks) { |
|||
System.out.println("\n【总分分析】"); |
|||
List<Double> scores = new ArrayList<>(); |
|||
|
|||
for (UniversityRank rank : ranks) { |
|||
double score = DataCleaner.cleanScore(rank.getTotalScore()); |
|||
if (score > 0) { |
|||
scores.add(score); |
|||
} |
|||
} |
|||
|
|||
if (scores.isEmpty()) { |
|||
System.out.println("无法获取有效分数数据"); |
|||
return; |
|||
} |
|||
|
|||
double maxScore = scores.stream().mapToDouble(Double::doubleValue).max().orElse(0); |
|||
double minScore = scores.stream().mapToDouble(Double::doubleValue).min().orElse(0); |
|||
double avgScore = scores.stream().mapToDouble(Double::doubleValue).average().orElse(0); |
|||
|
|||
List<Double> sortedScores = scores.stream().sorted().collect(Collectors.toList()); |
|||
double median = sortedScores.get(sortedScores.size() / 2); |
|||
|
|||
System.out.println("最高分: " + String.format("%.2f", maxScore)); |
|||
System.out.println("最低分: " + String.format("%.2f", minScore)); |
|||
System.out.println("平均分: " + String.format("%.2f", avgScore)); |
|||
System.out.println("中位数: " + String.format("%.2f", median)); |
|||
|
|||
Map<String, Integer> scoreRanges = new HashMap<>(); |
|||
String[] ranges = {"0-20", "20-40", "40-60", "60-80", "80-100"}; |
|||
for (String range : ranges) { |
|||
scoreRanges.put(range, 0); |
|||
} |
|||
|
|||
for (Double score : scores) { |
|||
if (score < 20) scoreRanges.put("0-20", scoreRanges.get("0-20") + 1); |
|||
else if (score < 40) scoreRanges.put("20-40", scoreRanges.get("20-40") + 1); |
|||
else if (score < 60) scoreRanges.put("40-60", scoreRanges.get("40-60") + 1); |
|||
else if (score < 80) scoreRanges.put("60-80", scoreRanges.get("60-80") + 1); |
|||
else scoreRanges.put("80-100", scoreRanges.get("80-100") + 1); |
|||
} |
|||
|
|||
System.out.println("\n分数区间分布:"); |
|||
for (Map.Entry<String, Integer> entry : scoreRanges.entrySet()) { |
|||
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 所"); |
|||
} |
|||
|
|||
ChartGenerator.generateScoreHistogram(scoreRanges, "score_boxplot.png"); |
|||
} |
|||
|
|||
private void analyzeCategoryDistribution(List<UniversityRank> ranks) { |
|||
System.out.println("\n【办学层次统计】"); |
|||
Map<String, Integer> categoryCounts = new HashMap<>(); |
|||
|
|||
for (UniversityRank rank : ranks) { |
|||
String category = rank.getCategory(); |
|||
if (category != null && !category.isEmpty()) { |
|||
categoryCounts.put(category, categoryCounts.getOrDefault(category, 0) + 1); |
|||
} |
|||
} |
|||
|
|||
if (categoryCounts.isEmpty()) { |
|||
System.out.println("没有办学层次数据"); |
|||
return; |
|||
} |
|||
|
|||
List<Map.Entry<String, Integer>> sorted = categoryCounts.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|||
.collect(Collectors.toList()); |
|||
|
|||
System.out.println("\n办学层次分布:"); |
|||
for (Map.Entry<String, Integer> entry : sorted) { |
|||
System.out.printf(" %s: %d 所%n", entry.getKey(), entry.getValue()); |
|||
} |
|||
} |
|||
|
|||
private void generateReport(List<UniversityRank> ranks) { |
|||
String fileName = CrawlerConstants.REPORTS_DIR + "/ranking_analysis_report.txt"; |
|||
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
|||
writer.println("========== 大学排名数据分析报告 =========="); |
|||
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
|||
writer.println("分析大学总数: " + ranks.size()); |
|||
writer.println(); |
|||
|
|||
Map<String, Integer> provinceCounts = new HashMap<>(); |
|||
for (UniversityRank rank : ranks) { |
|||
String province = rank.getProvince(); |
|||
if (province != null && !province.isEmpty()) { |
|||
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1); |
|||
} |
|||
} |
|||
|
|||
writer.println("【省份排行榜 TOP 10】"); |
|||
provinceCounts.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|||
.limit(10) |
|||
.forEach(e -> writer.println(" " + e.getKey() + ": " + e.getValue() + " 所大学")); |
|||
|
|||
List<Double> scores = ranks.stream() |
|||
.map(r -> DataCleaner.cleanScore(r.getTotalScore())) |
|||
.filter(s -> s > 0) |
|||
.collect(Collectors.toList()); |
|||
|
|||
if (!scores.isEmpty()) { |
|||
writer.println(); |
|||
writer.println("【分数统计】"); |
|||
writer.println("最高分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).max().orElse(0))); |
|||
writer.println("最低分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).min().orElse(0))); |
|||
writer.println("平均分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).average().orElse(0))); |
|||
} |
|||
|
|||
writer.println("\n报告生成完成"); |
|||
System.out.println("\n报告已保存: " + fileName); |
|||
} catch (IOException e) { |
|||
System.err.println("生成报告失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,163 @@ |
|||
package com.example.crawler.service; |
|||
|
|||
import java.io.File; |
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.io.PrintWriter; |
|||
import java.util.ArrayList; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
import com.example.crawler.chart.ChartGenerator; |
|||
import com.example.crawler.constant.CrawlerConstants; |
|||
import com.example.crawler.model.Weather; |
|||
|
|||
public class WeatherAnalysisService { |
|||
|
|||
static { |
|||
File dir = new File(CrawlerConstants.REPORTS_DIR); |
|||
if (!dir.exists()) { |
|||
dir.mkdirs(); |
|||
} |
|||
} |
|||
|
|||
public void analyze(List<Weather> weatherList) { |
|||
if (weatherList == null || weatherList.isEmpty()) { |
|||
System.out.println("没有天气数据可分析"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("\n========== 天气数据分析 =========="); |
|||
System.out.println("共分析 " + weatherList.size() + " 个城市\n"); |
|||
|
|||
analyzeCurrentWeather(weatherList); |
|||
analyzeTemperatureTrend(weatherList); |
|||
analyzeHumidityTrend(weatherList); |
|||
analyzeComfortIndex(weatherList); |
|||
|
|||
generateReport(weatherList); |
|||
} |
|||
|
|||
private void analyzeCurrentWeather(List<Weather> weatherList) { |
|||
System.out.println("【当前天气对比】"); |
|||
System.out.println("┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐"); |
|||
System.out.println("│ 城市名称 │ 温度(°C)│ 湿度(%) │ 风速(km/h)│ 天气状况 │ 舒适度 │"); |
|||
System.out.println("├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤"); |
|||
|
|||
for (Weather weather : weatherList) { |
|||
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity()); |
|||
String comfortDesc = getComfortDescription(comfort); |
|||
System.out.printf("│ %-8s │ %8.1f │ %8.0f │ %8.1f │ %-8s │ %-8s │%n", |
|||
weather.getCityName(), |
|||
weather.getTemperature(), |
|||
weather.getHumidity(), |
|||
weather.getWindSpeed(), |
|||
weather.getWeatherDescription(), |
|||
comfortDesc); |
|||
} |
|||
System.out.println("└──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘"); |
|||
} |
|||
|
|||
private void analyzeTemperatureTrend(List<Weather> weatherList) { |
|||
System.out.println("\n【未来24小时温度分析】"); |
|||
|
|||
Map<String, List<Double>> cityTemperatures = new HashMap<>(); |
|||
for (Weather weather : weatherList) { |
|||
cityTemperatures.put(weather.getCityName(), weather.getHourlyTemperatures()); |
|||
|
|||
List<Double> temps = weather.getHourlyTemperatures(); |
|||
if (!temps.isEmpty()) { |
|||
double maxTemp = temps.stream().mapToDouble(Double::doubleValue).max().orElse(0); |
|||
double minTemp = temps.stream().mapToDouble(Double::doubleValue).min().orElse(0); |
|||
double avgTemp = temps.stream().mapToDouble(Double::doubleValue).average().orElse(0); |
|||
|
|||
int maxIndex = temps.indexOf(maxTemp); |
|||
int minIndex = temps.indexOf(minTemp); |
|||
|
|||
String maxTime = maxIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(maxIndex) : ""; |
|||
String minTime = minIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(minIndex) : ""; |
|||
|
|||
System.out.printf(" %s: 最高 %.1f°C(%s) 最低 %.1f°C(%s) 平均 %.1f°C%n", |
|||
weather.getCityName(), maxTemp, maxTime, minTemp, minTime, avgTemp); |
|||
} |
|||
|
|||
ChartGenerator.generateTemperatureTrend( |
|||
weather.getHourlyTimes(), |
|||
weather.getHourlyTemperatures(), |
|||
weather.getCityName(), |
|||
"temperature_" + weather.getCityName() + ".png" |
|||
); |
|||
} |
|||
|
|||
ChartGenerator.generateMultiCityTemperatureComparison(cityTemperatures, "temperature_comparison.png"); |
|||
} |
|||
|
|||
private void analyzeHumidityTrend(List<Weather> weatherList) { |
|||
System.out.println("\n【未来24小时湿度分析】"); |
|||
for (Weather weather : weatherList) { |
|||
List<Integer> humidities = weather.getHourlyHumidities(); |
|||
if (!humidities.isEmpty()) { |
|||
double avgHumidity = humidities.stream().mapToInt(Integer::intValue).average().orElse(0); |
|||
System.out.printf(" %s: 平均湿度 %.0f%%%n", weather.getCityName(), avgHumidity); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private void analyzeComfortIndex(List<Weather> weatherList) { |
|||
System.out.println("\n【舒适度指数分析】"); |
|||
System.out.println("(基于温度和湿度的体感舒适度计算,0-100分制)"); |
|||
|
|||
for (Weather weather : weatherList) { |
|||
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity()); |
|||
String description = getComfortDescription(comfort); |
|||
System.out.printf(" %s: %.1f分 (%s)%n", weather.getCityName(), comfort, description); |
|||
} |
|||
} |
|||
|
|||
private double calculateComfortIndex(double temperature, double humidity) { |
|||
double tempDiff = Math.abs(temperature - 22); |
|||
double humDiff = Math.abs(humidity - 50); |
|||
|
|||
double comfort = 100 - (tempDiff * 3 + humDiff * 0.5); |
|||
return Math.max(0, Math.min(100, comfort)); |
|||
} |
|||
|
|||
private String getComfortDescription(double comfort) { |
|||
if (comfort >= 80) return "非常舒适"; |
|||
if (comfort >= 60) return "舒适"; |
|||
if (comfort >= 40) return "一般"; |
|||
if (comfort >= 20) return "不舒适"; |
|||
return "极不舒适"; |
|||
} |
|||
|
|||
private void generateReport(List<Weather> weatherList) { |
|||
String fileName = CrawlerConstants.REPORTS_DIR + "/weather_analysis_report.txt"; |
|||
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) { |
|||
writer.println("========== 天气数据分析报告 =========="); |
|||
writer.println("生成时间: " + java.time.LocalDateTime.now()); |
|||
writer.println("分析城市数量: " + weatherList.size()); |
|||
writer.println("数据来源: Open-Meteo API (CC BY 4.0)"); |
|||
writer.println(); |
|||
|
|||
writer.println("【多城市天气对比】"); |
|||
for (Weather weather : weatherList) { |
|||
writer.println("\n城市: " + weather.getCityName()); |
|||
writer.println(" 当前温度: " + String.format("%.1f°C", weather.getTemperature())); |
|||
writer.println(" 当前湿度: " + String.format("%.0f%%", weather.getHumidity())); |
|||
writer.println(" 风速: " + String.format("%.1f km/h", weather.getWindSpeed())); |
|||
writer.println(" 天气: " + weather.getWeatherDescription()); |
|||
|
|||
List<Double> temps = weather.getHourlyTemperatures(); |
|||
if (!temps.isEmpty()) { |
|||
writer.println(" 24小时平均温度: " + String.format("%.1f°C", temps.stream().mapToDouble(Double::doubleValue).average().orElse(0))); |
|||
} |
|||
} |
|||
|
|||
writer.println("\n报告生成完成"); |
|||
System.out.println("\n报告已保存: " + fileName); |
|||
} catch (IOException e) { |
|||
System.err.println("生成报告失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,127 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import com.example.crawler.exception.CrawlException; |
|||
import com.example.crawler.exception.NetworkException; |
|||
import com.example.crawler.exception.ParseException; |
|||
import com.example.crawler.model.Book; |
|||
import com.example.crawler.util.HttpUtil; |
|||
|
|||
/** |
|||
* 书籍爬取策略 |
|||
* // 策略模式:书籍信息爬取策略
|
|||
*/ |
|||
public class BookCrawlStrategy implements CrawlStrategy<Book> { |
|||
|
|||
private static final String BASE_URL = "https://books.toscrape.com/"; |
|||
private static final String PAGE_URL_FORMAT = "https://books.toscrape.com/catalogue/page-%d.html"; |
|||
private static final int MAX_PAGES = 30; // 最大爬取页数
|
|||
|
|||
@Override |
|||
public List<Book> crawl() throws CrawlException { |
|||
List<Book> books = new ArrayList<>(); |
|||
int pageNum = 1; |
|||
|
|||
try { |
|||
while (true) { |
|||
// 达到最大页数限制时停止
|
|||
if (pageNum > MAX_PAGES) { |
|||
System.out.println("已达到最大爬取页数限制(" + MAX_PAGES + "页),停止爬取"); |
|||
break; |
|||
} |
|||
|
|||
String url = pageNum == 1 ? BASE_URL : String.format(PAGE_URL_FORMAT, pageNum); |
|||
|
|||
// 设置请求头
|
|||
Map<String, String> headers = Map.of( |
|||
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
|||
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" |
|||
); |
|||
|
|||
String html = HttpUtil.get(url, headers); |
|||
Document doc = Jsoup.parse(html); |
|||
|
|||
Elements bookElements = doc.select(".product_pod"); |
|||
|
|||
// 如果没有书籍元素,说明已到达最后一页
|
|||
if (bookElements.isEmpty()) { |
|||
System.out.println("第 " + pageNum + " 页没有书籍数据,停止爬取"); |
|||
break; |
|||
} |
|||
|
|||
for (Element bookElement : bookElements) { |
|||
Book book = parseBook(bookElement); |
|||
books.add(book); |
|||
} |
|||
|
|||
System.out.println("已爬取第 " + pageNum + " 页,共 " + books.size() + " 本书"); |
|||
|
|||
// 设置请求间隔
|
|||
HttpUtil.sleep(1); |
|||
|
|||
pageNum++; |
|||
} |
|||
|
|||
return books; |
|||
} catch (NetworkException e) { |
|||
// 如果是404错误且已经爬取了一些数据,返回已获取的数据
|
|||
if (e.getMessage().contains("404") && !books.isEmpty()) { |
|||
System.out.println("第 " + pageNum + " 页不存在(404),返回已爬取的 " + books.size() + " 本书"); |
|||
return books; |
|||
} |
|||
throw new NetworkException("爬取书籍信息时网络异常: " + e.getMessage(), e); |
|||
} catch (ParseException e) { |
|||
throw new ParseException("解析书籍信息时异常: " + e.getMessage(), e); |
|||
} catch (Exception e) { |
|||
throw new CrawlException("爬取书籍信息时发生未知异常: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 解析书籍元素 |
|||
*/ |
|||
private Book parseBook(Element bookElement) throws ParseException { |
|||
try { |
|||
// 获取书名
|
|||
Element titleElement = bookElement.selectFirst("h3 a"); |
|||
String title = titleElement != null ? titleElement.attr("title") : "未知书名"; |
|||
|
|||
// 获取价格
|
|||
Element priceElement = bookElement.selectFirst(".price_color"); |
|||
String price = priceElement != null ? priceElement.text() : "未知价格"; |
|||
|
|||
// 获取库存状态
|
|||
Element availabilityElement = bookElement.selectFirst(".instock.availability"); |
|||
String availability = availabilityElement != null ? availabilityElement.text().trim() : "未知库存"; |
|||
|
|||
// 获取星级评分
|
|||
Element ratingElement = bookElement.selectFirst(".star-rating"); |
|||
String rating = "未知"; |
|||
if (ratingElement != null) { |
|||
String classAttr = ratingElement.attr("class"); |
|||
if (classAttr.contains("One")) rating = "1星"; |
|||
else if (classAttr.contains("Two")) rating = "2星"; |
|||
else if (classAttr.contains("Three")) rating = "3星"; |
|||
else if (classAttr.contains("Four")) rating = "4星"; |
|||
else if (classAttr.contains("Five")) rating = "5星"; |
|||
} |
|||
|
|||
return new Book(title, price, availability, rating); |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析书籍信息失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getDataSourceName() { |
|||
return "toscrape.com书籍信息"; |
|||
} |
|||
} |
|||
@ -0,0 +1,27 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import com.example.crawler.exception.CrawlException; |
|||
|
|||
import java.util.List; |
|||
|
|||
/** |
|||
* 爬取策略接口 |
|||
* 定义爬取操作的标准方法,实现策略模式 |
|||
*/ |
|||
public interface CrawlStrategy<T> { |
|||
|
|||
/** |
|||
* 执行爬取操作 |
|||
* |
|||
* @return 爬取到的数据列表 |
|||
* @throws CrawlException 爬虫异常 |
|||
*/ |
|||
List<T> crawl() throws CrawlException; |
|||
|
|||
/** |
|||
* 获取数据源名称 |
|||
* |
|||
* @return 数据源名称 |
|||
*/ |
|||
String getDataSourceName(); |
|||
} |
|||
@ -0,0 +1,151 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import com.example.crawler.exception.CrawlException; |
|||
import com.example.crawler.exception.NetworkException; |
|||
import com.example.crawler.exception.ParseException; |
|||
import com.example.crawler.model.News; |
|||
import com.example.crawler.util.HttpUtil; |
|||
|
|||
/** |
|||
* 新浪新闻爬取策略 |
|||
* // 策略模式:新浪新闻爬取策略
|
|||
*/ |
|||
public class NewsCrawlStrategy implements CrawlStrategy<News> { |
|||
|
|||
private static final String NEWS_URL = "https://news.sina.com.cn/china/"; |
|||
private static final int MAX_NEWS_COUNT = 20; |
|||
|
|||
@Override |
|||
public List<News> crawl() throws CrawlException { |
|||
List<News> newsList = new ArrayList<>(); |
|||
|
|||
try { |
|||
// 设置请求头
|
|||
Map<String, String> headers = Map.of( |
|||
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
|||
"Referer", "https://news.sina.com.cn/", |
|||
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8" |
|||
); |
|||
|
|||
String html = HttpUtil.get(NEWS_URL, headers); |
|||
Document doc = Jsoup.parse(html); |
|||
|
|||
// 新浪新闻页面结构可能变化,使用多种选择器尝试
|
|||
Elements newsElements = doc.select(".news-item, .news-list li, .list-item, .feed-card-item"); |
|||
|
|||
// 如果上述选择器都没找到,尝试更通用的选择器
|
|||
if (newsElements.isEmpty()) { |
|||
newsElements = doc.select("a[href*=sina.com.cn]"); |
|||
} |
|||
|
|||
int count = 0; |
|||
for (Element element : newsElements) { |
|||
if (count >= MAX_NEWS_COUNT) { |
|||
break; |
|||
} |
|||
|
|||
try { |
|||
News news = parseNews(element); |
|||
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) { |
|||
newsList.add(news); |
|||
count++; |
|||
} |
|||
} catch (ParseException e) { |
|||
// 跳过解析失败的新闻,继续处理下一个
|
|||
continue; |
|||
} |
|||
} |
|||
|
|||
// 如果使用通用选择器获取的结果不够,尝试另一种方式
|
|||
if (newsList.size() < MAX_NEWS_COUNT) { |
|||
Elements titleElements = doc.select("h2 a, h3 a, .title a, .news-title a"); |
|||
for (Element element : titleElements) { |
|||
if (count >= MAX_NEWS_COUNT) { |
|||
break; |
|||
} |
|||
try { |
|||
News news = parseNewsFromTitleElement(element); |
|||
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) { |
|||
newsList.add(news); |
|||
count++; |
|||
} |
|||
} catch (ParseException e) { |
|||
continue; |
|||
} |
|||
} |
|||
} |
|||
|
|||
System.out.println("已爬取 " + newsList.size() + " 条新浪新闻"); |
|||
return newsList; |
|||
|
|||
} catch (NetworkException e) { |
|||
throw new NetworkException("爬取新浪新闻时网络异常: " + e.getMessage(), e); |
|||
} catch (Exception e) { |
|||
throw new CrawlException("爬取新浪新闻时发生未知异常: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 解析新闻元素 |
|||
*/ |
|||
private News parseNews(Element element) throws ParseException { |
|||
try { |
|||
String title = ""; |
|||
String url = ""; |
|||
String publishTime = ""; |
|||
|
|||
// 尝试获取标题和链接
|
|||
Element linkElement = element.selectFirst("a"); |
|||
if (linkElement != null) { |
|||
title = linkElement.text().trim(); |
|||
url = linkElement.attr("abs:href"); |
|||
} |
|||
|
|||
// 尝试获取发布时间
|
|||
Element timeElement = element.selectFirst(".time, .pubtime, span[class*=time]"); |
|||
if (timeElement != null) { |
|||
publishTime = timeElement.text().trim(); |
|||
} |
|||
|
|||
if (title.isEmpty() || url.isEmpty()) { |
|||
return null; |
|||
} |
|||
|
|||
return new News(title, publishTime, url); |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析新闻信息失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 从标题元素解析新闻 |
|||
*/ |
|||
private News parseNewsFromTitleElement(Element element) throws ParseException { |
|||
try { |
|||
String title = element.text().trim(); |
|||
String url = element.attr("abs:href"); |
|||
|
|||
if (title.isEmpty() || url.isEmpty()) { |
|||
return null; |
|||
} |
|||
|
|||
return new News(title, "", url); |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析新闻标题失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getDataSourceName() { |
|||
return "新浪国内新闻"; |
|||
} |
|||
} |
|||
@ -0,0 +1,24 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import com.example.crawler.strategy.BookCrawlStrategy; |
|||
import com.example.crawler.strategy.NewsCrawlStrategy; |
|||
import com.example.crawler.strategy.UniversityRankCrawlStrategy; |
|||
import com.example.crawler.strategy.WeatherCrawlStrategy; |
|||
|
|||
public class StrategyFactory { |
|||
|
|||
public static CrawlStrategy<?> getStrategy(int choice) { |
|||
switch (choice) { |
|||
case 1: |
|||
return new BookCrawlStrategy(); |
|||
case 2: |
|||
return new NewsCrawlStrategy(); |
|||
case 3: |
|||
return new UniversityRankCrawlStrategy(); |
|||
case 4: |
|||
return new WeatherCrawlStrategy(); |
|||
default: |
|||
throw new IllegalArgumentException("Invalid choice: " + choice); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,148 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import com.example.crawler.exception.CrawlException; |
|||
import com.example.crawler.exception.NetworkException; |
|||
import com.example.crawler.exception.ParseException; |
|||
import com.example.crawler.model.UniversityRank; |
|||
import com.example.crawler.util.HttpUtil; |
|||
|
|||
/** |
|||
* 软科中国大学排名爬取策略 |
|||
* // 策略模式:软科中国大学排名爬取策略
|
|||
*/ |
|||
public class UniversityRankCrawlStrategy implements CrawlStrategy<UniversityRank> { |
|||
|
|||
private static final String RANKING_URL = "https://www.shanghairanking.cn/rankings/bcur/2025"; |
|||
|
|||
@Override |
|||
public List<UniversityRank> crawl() throws CrawlException { |
|||
List<UniversityRank> rankings = new ArrayList<>(); |
|||
|
|||
try { |
|||
// 设置请求头
|
|||
Map<String, String> headers = Map.of( |
|||
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", |
|||
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", |
|||
"Referer", "https://www.shanghairanking.cn/" |
|||
); |
|||
|
|||
// 设置请求延迟
|
|||
HttpUtil.sleep(3); |
|||
|
|||
String html = HttpUtil.get(RANKING_URL, headers); |
|||
Document doc = Jsoup.parse(html); |
|||
|
|||
// 提取表格数据
|
|||
Elements rows = doc.select("table tbody tr"); |
|||
|
|||
if (rows.isEmpty()) { |
|||
// 如果第一个选择器失败,尝试其他可能的选择器
|
|||
rows = doc.select(".rk-table tbody tr"); |
|||
} |
|||
|
|||
if (rows.isEmpty()) { |
|||
// 尝试更通用的选择器
|
|||
rows = doc.select("tr"); |
|||
} |
|||
|
|||
int count = 0; |
|||
for (Element row : rows) { |
|||
try { |
|||
UniversityRank ranking = parseRow(row); |
|||
if (ranking != null && ranking.getRank() != null) { |
|||
rankings.add(ranking); |
|||
count++; |
|||
|
|||
// 最多爬取200条数据
|
|||
if (count >= 200) { |
|||
break; |
|||
} |
|||
} |
|||
} catch (ParseException e) { |
|||
// 跳过解析失败的行
|
|||
continue; |
|||
} |
|||
} |
|||
|
|||
System.out.println("已爬取 " + rankings.size() + " 条大学排名数据"); |
|||
return rankings; |
|||
|
|||
} catch (NetworkException e) { |
|||
throw new NetworkException("爬取软科大学排名时网络异常: " + e.getMessage(), e); |
|||
} catch (Exception e) { |
|||
throw new CrawlException("爬取软科大学排名时发生未知异常: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 解析表格行数据 |
|||
*/ |
|||
private UniversityRank parseRow(Element row) throws ParseException { |
|||
try { |
|||
Elements cells = row.select("td"); |
|||
|
|||
if (cells.size() < 4) { |
|||
return null; |
|||
} |
|||
|
|||
// 第1列:排名
|
|||
String rankStr = cells.get(0).text().trim(); |
|||
Integer rank = null; |
|||
try { |
|||
rank = Integer.parseInt(rankStr); |
|||
} catch (NumberFormatException e) { |
|||
// 如果排名不是数字(如"1-3"这样的范围),尝试提取第一个数字
|
|||
String numPart = rankStr.replaceAll("[^0-9]", ""); |
|||
if (!numPart.isEmpty()) { |
|||
rank = Integer.parseInt(numPart); |
|||
} |
|||
} |
|||
|
|||
if (rank == null) { |
|||
return null; |
|||
} |
|||
|
|||
// 第2列:学校名称
|
|||
String universityName = cells.get(1).text().trim(); |
|||
|
|||
// 第4列:总分
|
|||
String totalScore = ""; |
|||
if (cells.size() > 3) { |
|||
totalScore = cells.get(3).text().trim(); |
|||
} |
|||
|
|||
// 尝试提取省份和办学层次(第3列可能包含这些信息)
|
|||
String province = ""; |
|||
String category = ""; |
|||
if (cells.size() > 2) { |
|||
String thirdColumn = cells.get(2).text().trim(); |
|||
// 尝试解析省份和办学层次
|
|||
String[] parts = thirdColumn.split("\\s+"); |
|||
if (parts.length >= 1) { |
|||
province = parts[0]; |
|||
} |
|||
if (parts.length >= 2) { |
|||
category = parts[1]; |
|||
} |
|||
} |
|||
|
|||
return new UniversityRank(rank, universityName, totalScore, province, category); |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析大学排名行数据失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getDataSourceName() { |
|||
return "软科中国大学排名"; |
|||
} |
|||
} |
|||
@ -0,0 +1,177 @@ |
|||
package com.example.crawler.strategy; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
import com.example.crawler.constant.CrawlerConstants; |
|||
import com.example.crawler.exception.CrawlException; |
|||
import com.example.crawler.exception.NetworkException; |
|||
import com.example.crawler.exception.ParseException; |
|||
import com.example.crawler.model.Weather; |
|||
import com.example.crawler.util.HttpUtil; |
|||
import com.google.gson.JsonArray; |
|||
import com.google.gson.JsonElement; |
|||
import com.google.gson.JsonObject; |
|||
import com.google.gson.JsonParser; |
|||
|
|||
public class WeatherCrawlStrategy implements CrawlStrategy<Weather> { |
|||
|
|||
@Override |
|||
public List<Weather> crawl() throws CrawlException { |
|||
List<Weather> weatherList = new ArrayList<>(); |
|||
|
|||
try { |
|||
for (Map.Entry<String, double[]> entry : CrawlerConstants.CITY_COORDINATES.entrySet()) { |
|||
String cityName = entry.getKey(); |
|||
double[] coords = entry.getValue(); |
|||
double latitude = coords[0]; |
|||
double longitude = coords[1]; |
|||
|
|||
String weatherUrl = buildApiUrl(latitude, longitude); |
|||
Map<String, String> headers = Map.of( |
|||
"User-Agent", CrawlerConstants.USER_AGENT |
|||
); |
|||
|
|||
String response = HttpUtil.get(weatherUrl, headers); |
|||
Weather weather = parseWeatherData(cityName, response); |
|||
weatherList.add(weather); |
|||
|
|||
System.out.println("已获取 " + cityName + " 的天气信息"); |
|||
|
|||
HttpUtil.sleep(2); |
|||
} |
|||
|
|||
return weatherList; |
|||
|
|||
} catch (NetworkException e) { |
|||
throw new NetworkException("爬取天气数据时网络异常: " + e.getMessage(), e); |
|||
} catch (ParseException e) { |
|||
throw new ParseException("解析天气数据时异常: " + e.getMessage(), e); |
|||
} catch (Exception e) { |
|||
throw new CrawlException("爬取天气数据时发生未知异常: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
private String buildApiUrl(double latitude, double longitude) { |
|||
return CrawlerConstants.URL_WEATHER_API + "?latitude=" + latitude + |
|||
"&longitude=" + longitude + |
|||
"¤t_weather=true" + |
|||
"&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m" + |
|||
"&forecast_days=1" + |
|||
"&timezone=Asia/Shanghai"; |
|||
} |
|||
|
|||
private Weather parseWeatherData(String cityName, String jsonData) throws ParseException { |
|||
try { |
|||
JsonObject obj = JsonParser.parseString(jsonData).getAsJsonObject(); |
|||
|
|||
Weather weather = new Weather(); |
|||
weather.setCityName(cityName); |
|||
|
|||
JsonObject currentWeather = obj.getAsJsonObject("current_weather"); |
|||
if (currentWeather != null) { |
|||
weather.setTemperature(cleanTemperature(getJsonDouble(currentWeather, "temperature", 0))); |
|||
weather.setWindSpeed(cleanWindSpeed(getJsonDouble(currentWeather, "windspeed", 0))); |
|||
weather.setWeatherCode(String.valueOf(getJsonInt(currentWeather, "weathercode", -1))); |
|||
} |
|||
|
|||
JsonObject hourly = obj.getAsJsonObject("hourly"); |
|||
if (hourly != null) { |
|||
JsonArray times = hourly.getAsJsonArray("time"); |
|||
JsonArray temps = hourly.getAsJsonArray("temperature_2m"); |
|||
JsonArray humidities = hourly.getAsJsonArray("relative_humidity_2m"); |
|||
JsonArray windSpeeds = hourly.getAsJsonArray("wind_speed_10m"); |
|||
|
|||
if (times != null && temps != null) { |
|||
int count = Math.min(times.size(), 24); |
|||
for (int i = 0; i < count; i++) { |
|||
weather.getHourlyTimes().add(cleanTimeString(getJsonString(times, i, ""))); |
|||
weather.getHourlyTemperatures().add(cleanTemperature(getJsonDouble(temps, i, 0))); |
|||
} |
|||
} |
|||
|
|||
if (humidities != null) { |
|||
int count = Math.min(humidities.size(), 24); |
|||
for (int i = 0; i < count; i++) { |
|||
weather.getHourlyHumidities().add(cleanHumidity(getJsonInt(humidities, i, 50))); |
|||
} |
|||
} |
|||
|
|||
if (windSpeeds != null) { |
|||
int count = Math.min(windSpeeds.size(), 24); |
|||
for (int i = 0; i < count; i++) { |
|||
weather.getHourlyWindSpeeds().add(cleanWindSpeed(getJsonDouble(windSpeeds, i, 0))); |
|||
} |
|||
} |
|||
|
|||
if (!weather.getHourlyHumidities().isEmpty()) { |
|||
weather.setHumidity(weather.getHourlyHumidities().get(0)); |
|||
} |
|||
} |
|||
|
|||
return weather; |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析天气JSON数据失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
private String getJsonString(JsonArray arr, int index, String defaultValue) { |
|||
if (arr == null || index >= arr.size()) return defaultValue; |
|||
JsonElement element = arr.get(index); |
|||
return element.isJsonNull() ? defaultValue : element.getAsString(); |
|||
} |
|||
|
|||
private double getJsonDouble(JsonObject obj, String key, double defaultValue) { |
|||
JsonElement element = obj.get(key); |
|||
if (element == null || element.isJsonNull()) return defaultValue; |
|||
return element.getAsDouble(); |
|||
} |
|||
|
|||
private int getJsonInt(JsonObject obj, String key, int defaultValue) { |
|||
JsonElement element = obj.get(key); |
|||
if (element == null || element.isJsonNull()) return defaultValue; |
|||
return element.getAsInt(); |
|||
} |
|||
|
|||
private double getJsonDouble(JsonArray arr, int index, double defaultValue) { |
|||
if (arr == null || index >= arr.size()) return defaultValue; |
|||
JsonElement element = arr.get(index); |
|||
if (element == null || element.isJsonNull()) return defaultValue; |
|||
return element.getAsDouble(); |
|||
} |
|||
|
|||
private int getJsonInt(JsonArray arr, int index, int defaultValue) { |
|||
if (arr == null || index >= arr.size()) return defaultValue; |
|||
JsonElement element = arr.get(index); |
|||
if (element == null || element.isJsonNull()) return defaultValue; |
|||
return element.getAsInt(); |
|||
} |
|||
|
|||
private double cleanTemperature(double temp) { |
|||
return Math.round(temp * 10.0) / 10.0; |
|||
} |
|||
|
|||
private double cleanWindSpeed(double speed) { |
|||
return Math.round(speed * 10.0) / 10.0; |
|||
} |
|||
|
|||
private int cleanHumidity(int humidity) { |
|||
if (humidity < 0) return 50; |
|||
if (humidity > 100) return 100; |
|||
return humidity; |
|||
} |
|||
|
|||
private String cleanTimeString(String time) { |
|||
if (time == null || time.isEmpty()) return ""; |
|||
if (time.contains("T")) { |
|||
return time.substring(time.indexOf("T") + 1, time.indexOf("T") + 6); |
|||
} |
|||
return time; |
|||
} |
|||
|
|||
@Override |
|||
public String getDataSourceName() { |
|||
return "Open-Meteo 实时天气"; |
|||
} |
|||
} |
|||
@ -0,0 +1,122 @@ |
|||
package com.example.crawler.util; |
|||
|
|||
import java.time.LocalDateTime; |
|||
import java.time.format.DateTimeFormatter; |
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
/** |
|||
* 数据清洗工具类 |
|||
* 提供各类数据的清洗方法 |
|||
*/ |
|||
public class DataCleaner { |
|||
|
|||
private static final Map<String, String> STOP_WORDS = new HashMap<>(); |
|||
static { |
|||
STOP_WORDS.put("的", "的"); |
|||
STOP_WORDS.put("了", "了"); |
|||
STOP_WORDS.put("是", "是"); |
|||
STOP_WORDS.put("在", "在"); |
|||
STOP_WORDS.put("和", "和"); |
|||
STOP_WORDS.put("与", "与"); |
|||
STOP_WORDS.put("对", "对"); |
|||
STOP_WORDS.put("为", "为"); |
|||
STOP_WORDS.put("有", "有"); |
|||
STOP_WORDS.put("我", "我"); |
|||
STOP_WORDS.put("你", "你"); |
|||
STOP_WORDS.put("他", "他"); |
|||
STOP_WORDS.put("她", "她"); |
|||
STOP_WORDS.put("它", "它"); |
|||
STOP_WORDS.put("这", "这"); |
|||
STOP_WORDS.put("那", "那"); |
|||
STOP_WORDS.put("就", "就"); |
|||
STOP_WORDS.put("也", "也"); |
|||
STOP_WORDS.put("都", "都"); |
|||
STOP_WORDS.put("要", "要"); |
|||
STOP_WORDS.put("会", "会"); |
|||
STOP_WORDS.put("能", "能"); |
|||
STOP_WORDS.put("可", "可"); |
|||
STOP_WORDS.put("以", "以"); |
|||
STOP_WORDS.put("说", "说"); |
|||
STOP_WORDS.put("到", "到"); |
|||
STOP_WORDS.put("来", "来"); |
|||
STOP_WORDS.put("去", "去"); |
|||
STOP_WORDS.put("着", "着"); |
|||
STOP_WORDS.put("过", "过"); |
|||
} |
|||
|
|||
public static double cleanPrice(String price) { |
|||
if (price == null || price.isEmpty()) return 0.0; |
|||
String cleaned = price.replaceAll("[^0-9.]", ""); |
|||
try { |
|||
return Double.parseDouble(cleaned); |
|||
} catch (NumberFormatException e) { |
|||
return 0.0; |
|||
} |
|||
} |
|||
|
|||
public static int cleanRating(String ratingClass) { |
|||
if (ratingClass == null) return 0; |
|||
if (ratingClass.contains("Five")) return 5; |
|||
if (ratingClass.contains("Four")) return 4; |
|||
if (ratingClass.contains("Three")) return 3; |
|||
if (ratingClass.contains("Two")) return 2; |
|||
if (ratingClass.contains("One")) return 1; |
|||
return 0; |
|||
} |
|||
|
|||
public static LocalDateTime cleanNewsTime(String timeStr) { |
|||
if (timeStr == null || timeStr.isEmpty()) return LocalDateTime.now(); |
|||
try { |
|||
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); |
|||
return LocalDateTime.parse(timeStr, formatter); |
|||
} catch (Exception e) { |
|||
try { |
|||
DateTimeFormatter formatter2 = DateTimeFormatter.ofPattern("yyyy年MM月dd日 HH:mm"); |
|||
return LocalDateTime.parse(timeStr, formatter2); |
|||
} catch (Exception e2) { |
|||
return LocalDateTime.now(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
public static String cleanTitle(String title) { |
|||
if (title == null) return ""; |
|||
return title.trim().replaceAll("\\s+", " "); |
|||
} |
|||
|
|||
public static double cleanScore(String score) { |
|||
if (score == null || score.isEmpty()) return 0.0; |
|||
String cleaned = score.replaceAll("[^0-9.]", ""); |
|||
try { |
|||
return Double.parseDouble(cleaned); |
|||
} catch (NumberFormatException e) { |
|||
return 0.0; |
|||
} |
|||
} |
|||
|
|||
public static String[] extractWords(String text) { |
|||
if (text == null || text.isEmpty()) return new String[0]; |
|||
String cleaned = text.replaceAll("[^\u4e00-\u9fa5a-zA-Z0-9]", " "); |
|||
return cleaned.split("\\s+"); |
|||
} |
|||
|
|||
public static boolean isStopWord(String word) { |
|||
return word == null || word.length() < 2 || STOP_WORDS.containsKey(word); |
|||
} |
|||
|
|||
public static Map<String, Integer> countWordFrequency(String[] words) { |
|||
Map<String, Integer> frequency = new HashMap<>(); |
|||
for (String word : words) { |
|||
if (isStopWord(word)) continue; |
|||
frequency.put(word, frequency.getOrDefault(word, 0) + 1); |
|||
} |
|||
return frequency; |
|||
} |
|||
|
|||
public static int extractHour(LocalDateTime dateTime) { |
|||
return dateTime.getHour(); |
|||
} |
|||
} |
|||
@ -0,0 +1,126 @@ |
|||
package com.example.crawler.util; |
|||
|
|||
import com.example.crawler.exception.NetworkException; |
|||
|
|||
import java.net.URI; |
|||
import java.net.http.HttpClient; |
|||
import java.net.http.HttpRequest; |
|||
import java.net.http.HttpResponse; |
|||
import java.time.Duration; |
|||
import java.util.Map; |
|||
|
|||
/** |
|||
* HTTP工具类 |
|||
* 封装HTTP请求操作,使用Java 11内置HttpClient |
|||
*/ |
|||
public class HttpUtil { |
|||
|
|||
private static final HttpClient httpClient = HttpClient.newBuilder() |
|||
.connectTimeout(Duration.ofSeconds(30)) |
|||
.followRedirects(HttpClient.Redirect.NORMAL) |
|||
.build(); |
|||
|
|||
private static final String DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"; |
|||
|
|||
/** |
|||
* 发送GET请求 |
|||
* |
|||
* @param url 请求URL |
|||
* @return 响应内容 |
|||
* @throws NetworkException 网络异常 |
|||
*/ |
|||
public static String get(String url) throws NetworkException { |
|||
return get(url, Map.of()); |
|||
} |
|||
|
|||
/** |
|||
* 发送GET请求(带请求头) |
|||
* |
|||
* @param url 请求URL |
|||
* @param headers 请求头 |
|||
* @return 响应内容 |
|||
* @throws NetworkException 网络异常 |
|||
*/ |
|||
public static String get(String url, Map<String, String> headers) throws NetworkException { |
|||
try { |
|||
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() |
|||
.uri(URI.create(url)) |
|||
.timeout(Duration.ofSeconds(30)) |
|||
.GET(); |
|||
|
|||
// 添加默认User-Agent
|
|||
if (!headers.containsKey("User-Agent")) { |
|||
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT); |
|||
} |
|||
|
|||
// 添加自定义请求头
|
|||
headers.forEach(requestBuilder::header); |
|||
|
|||
HttpRequest request = requestBuilder.build(); |
|||
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); |
|||
|
|||
if (response.statusCode() != 200) { |
|||
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode()); |
|||
} |
|||
|
|||
return response.body(); |
|||
} catch (NetworkException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new NetworkException("网络请求失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 发送POST请求 |
|||
* |
|||
* @param url 请求URL |
|||
* @param body 请求体 |
|||
* @param headers 请求头 |
|||
* @return 响应内容 |
|||
* @throws NetworkException 网络异常 |
|||
*/ |
|||
public static String post(String url, String body, Map<String, String> headers) throws NetworkException { |
|||
try { |
|||
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder() |
|||
.uri(URI.create(url)) |
|||
.timeout(Duration.ofSeconds(30)) |
|||
.header("Content-Type", "application/json") |
|||
.POST(HttpRequest.BodyPublishers.ofString(body)); |
|||
|
|||
// 添加默认User-Agent
|
|||
if (!headers.containsKey("User-Agent")) { |
|||
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT); |
|||
} |
|||
|
|||
// 添加自定义请求头
|
|||
headers.forEach(requestBuilder::header); |
|||
|
|||
HttpRequest request = requestBuilder.build(); |
|||
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString()); |
|||
|
|||
if (response.statusCode() != 200) { |
|||
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode()); |
|||
} |
|||
|
|||
return response.body(); |
|||
} catch (NetworkException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new NetworkException("网络请求失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 设置请求间隔,避免对服务器造成压力 |
|||
* |
|||
* @param seconds 间隔秒数 |
|||
*/ |
|||
public static void sleep(int seconds) { |
|||
try { |
|||
Thread.sleep(seconds * 1000L); |
|||
} catch (InterruptedException e) { |
|||
Thread.currentThread().interrupt(); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,95 @@ |
|||
package com.example.crawler.util; |
|||
|
|||
import com.example.crawler.exception.DataSaveException; |
|||
import com.google.gson.Gson; |
|||
import com.google.gson.GsonBuilder; |
|||
|
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.nio.file.Files; |
|||
import java.nio.file.Path; |
|||
import java.nio.file.Paths; |
|||
import java.util.List; |
|||
|
|||
/** |
|||
* JSON工具类 |
|||
* 封装JSON序列化和文件读写操作 |
|||
*/ |
|||
public class JsonUtil { |
|||
|
|||
private static final Gson gson = new GsonBuilder() |
|||
.setPrettyPrinting() |
|||
.disableHtmlEscaping() |
|||
.create(); |
|||
|
|||
/** |
|||
* 将对象序列化为JSON字符串 |
|||
* |
|||
* @param obj 对象 |
|||
* @return JSON字符串 |
|||
*/ |
|||
public static String toJson(Object obj) { |
|||
return gson.toJson(obj); |
|||
} |
|||
|
|||
/** |
|||
* 将JSON字符串反序列化为对象 |
|||
* |
|||
* @param json JSON字符串 |
|||
* @param classOfT 目标类 |
|||
* @param <T> 泛型类型 |
|||
* @return 反序列化后的对象 |
|||
*/ |
|||
public static <T> T fromJson(String json, Class<T> classOfT) { |
|||
return gson.fromJson(json, classOfT); |
|||
} |
|||
|
|||
/** |
|||
* 将对象保存为JSON文件 |
|||
* |
|||
* @param obj 对象 |
|||
* @param filePath 文件路径 |
|||
* @throws DataSaveException 数据保存异常 |
|||
*/ |
|||
public static void saveToJsonFile(Object obj, String filePath) throws DataSaveException { |
|||
try { |
|||
// 确保目录存在
|
|||
Path path = Paths.get(filePath); |
|||
Path parentDir = path.getParent(); |
|||
if (parentDir != null && !Files.exists(parentDir)) { |
|||
Files.createDirectories(parentDir); |
|||
} |
|||
|
|||
try (FileWriter writer = new FileWriter(filePath)) { |
|||
gson.toJson(obj, writer); |
|||
} |
|||
} catch (IOException e) { |
|||
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 将列表保存为JSON文件 |
|||
* |
|||
* @param list 列表 |
|||
* @param filePath 文件路径 |
|||
* @param <T> 泛型类型 |
|||
* @throws DataSaveException 数据保存异常 |
|||
*/ |
|||
public static <T> void saveListToJsonFile(List<T> list, String filePath) throws DataSaveException { |
|||
try { |
|||
// 确保目录存在
|
|||
Path path = Paths.get(filePath); |
|||
Path parentDir = path.getParent(); |
|||
if (parentDir != null && !Files.exists(parentDir)) { |
|||
Files.createDirectories(parentDir); |
|||
} |
|||
|
|||
try (FileWriter writer = new FileWriter(filePath)) { |
|||
gson.toJson(list, writer); |
|||
} |
|||
} catch (IOException e) { |
|||
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,72 @@ |
|||
package com.example.crawler.view; |
|||
|
|||
import java.util.Scanner; |
|||
|
|||
/** |
|||
* 爬虫视图类 |
|||
* // MVC模式:View层,负责CLI界面显示和用户交互
|
|||
*/ |
|||
public class CrawlerView { |
|||
|
|||
/** |
|||
* 显示主菜单 |
|||
*/ |
|||
public void showMenu() { |
|||
System.out.println("\n=== 数据爬取与分析系统 ==="); |
|||
System.out.println("1. 爬取书籍信息(toscrape.com)"); |
|||
System.out.println("2. 爬取新浪国内新闻"); |
|||
System.out.println("3. 爬取软科中国大学排名"); |
|||
System.out.println("4. 爬取Open-Meteo实时天气"); |
|||
System.out.println("5. 爬取全部数据并保存"); |
|||
System.out.println("6. 保存当前数据到文件"); |
|||
System.out.println("7. 生成所有数据源的分析报告与图表"); |
|||
System.out.println("8. 爬取并分析所有数据(一键完成)"); |
|||
System.out.println("9. 退出"); |
|||
System.out.print("请选择操作:"); |
|||
} |
|||
|
|||
/** |
|||
* 获取用户输入 |
|||
* |
|||
* @param scanner 输入扫描器 |
|||
* @return 用户选择的数字 |
|||
*/ |
|||
public int getInput(Scanner scanner) { |
|||
try { |
|||
String input = scanner.nextLine().trim(); |
|||
return Integer.parseInt(input); |
|||
} catch (NumberFormatException e) { |
|||
return -1; // 返回无效值
|
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 显示错误信息 |
|||
* |
|||
* @param message 错误信息 |
|||
*/ |
|||
public void showError(String message) { |
|||
System.err.println("错误: " + message); |
|||
} |
|||
|
|||
/** |
|||
* 显示成功信息 |
|||
* |
|||
* @param message 成功信息 |
|||
*/ |
|||
public void showSuccess(String message) { |
|||
System.out.println("成功: " + message); |
|||
} |
|||
|
|||
/** |
|||
* 暂停并等待用户按回车键继续 |
|||
* |
|||
* @param scanner 输入扫描器 |
|||
*/ |
|||
public void pause(Scanner scanner) { |
|||
System.out.print("\n按回车键继续..."); |
|||
scanner.nextLine(); |
|||
System.out.print("\033[H\033[2J"); |
|||
System.out.flush(); |
|||
} |
|||
} |
|||
@ -1,359 +0,0 @@ |
|||
package com.university; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.Optional; |
|||
import java.util.Scanner; |
|||
|
|||
import com.university.analysis.RankAnalyzer; |
|||
import com.university.crawler.UniversityRankCrawler; |
|||
import com.university.model.RankChange; |
|||
import com.university.model.University; |
|||
import com.university.model.UniversityComparison; |
|||
import com.university.storage.DataStorage; |
|||
import com.university.visualization.ChartGenerator; |
|||
import com.university.visualization.ConsoleReporter; |
|||
|
|||
/** |
|||
* 主程序入口 |
|||
* 整合所有模块,提供交互式菜单 |
|||
*/ |
|||
public class Main { |
|||
|
|||
// 核心组件
|
|||
private final UniversityRankCrawler crawler; |
|||
private final DataStorage storage; |
|||
private final RankAnalyzer analyzer; |
|||
private final ChartGenerator chartGenerator; |
|||
private final ConsoleReporter reporter; |
|||
|
|||
// 数据缓存
|
|||
private Map<Integer, List<University>> dataCache; |
|||
private Scanner scanner; |
|||
|
|||
public Main() { |
|||
this.crawler = new UniversityRankCrawler(); |
|||
this.storage = new DataStorage(); |
|||
this.analyzer = new RankAnalyzer(); |
|||
this.chartGenerator = new ChartGenerator(); |
|||
this.reporter = new ConsoleReporter(); |
|||
this.dataCache = new HashMap<>(); |
|||
this.scanner = new Scanner(System.in); |
|||
} |
|||
|
|||
public static void main(String[] args) { |
|||
Main app = new Main(); |
|||
app.run(); |
|||
} |
|||
|
|||
/** |
|||
* 运行主程序 |
|||
*/ |
|||
public void run() { |
|||
// 打印欢迎信息
|
|||
reporter.printWelcome(); |
|||
|
|||
// 初始化数据
|
|||
initializeData(); |
|||
|
|||
// 主循环
|
|||
boolean running = true; |
|||
while (running) { |
|||
reporter.printMenu(); |
|||
String choice = scanner.nextLine().trim(); |
|||
|
|||
switch (choice) { |
|||
case "1": |
|||
showTopN(); |
|||
break; |
|||
case "2": |
|||
showByProvince(); |
|||
break; |
|||
case "3": |
|||
searchUniversity(); |
|||
break; |
|||
case "4": |
|||
showProvinceStatistics(); |
|||
break; |
|||
case "5": |
|||
showScoreStatistics(); |
|||
break; |
|||
case "6": |
|||
showRankChanges(); |
|||
break; |
|||
case "7": |
|||
compareUniversities(); |
|||
break; |
|||
case "8": |
|||
showYearlyTrend(); |
|||
break; |
|||
case "9": |
|||
generateAllCharts(); |
|||
break; |
|||
case "0": |
|||
running = false; |
|||
System.out.println("感谢使用,再见!"); |
|||
break; |
|||
default: |
|||
System.out.println("无效选择,请重新输入!"); |
|||
} |
|||
} |
|||
|
|||
scanner.close(); |
|||
} |
|||
|
|||
/** |
|||
* 初始化数据 |
|||
*/ |
|||
private void initializeData() { |
|||
System.out.println("正在初始化数据..."); |
|||
|
|||
// 爬取2022-2024年的数据
|
|||
int[] years = {2022, 2023, 2024}; |
|||
|
|||
for (int year : years) { |
|||
List<University> data; |
|||
|
|||
// 先尝试从文件读取
|
|||
if (storage.dataExists(year)) { |
|||
System.out.println("从文件加载 " + year + " 年数据..."); |
|||
data = storage.readRawData(year); |
|||
} else { |
|||
// 文件不存在则爬取
|
|||
System.out.println("爬取 " + year + " 年数据..."); |
|||
data = crawler.crawlRankings(year); |
|||
// 保存到文件
|
|||
storage.saveRawData(data, year); |
|||
} |
|||
|
|||
dataCache.put(year, data); |
|||
} |
|||
|
|||
System.out.println("数据初始化完成!\n"); |
|||
} |
|||
|
|||
/** |
|||
* 显示Top N |
|||
*/ |
|||
private void showTopN() { |
|||
System.out.print("请输入要查看的年份(2022-2024): "); |
|||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|||
|
|||
System.out.print("请输入要查看的数量: "); |
|||
int n = Integer.parseInt(scanner.nextLine().trim()); |
|||
|
|||
List<University> data = dataCache.get(year); |
|||
if (data == null) { |
|||
System.out.println("该年份数据不存在!"); |
|||
return; |
|||
} |
|||
|
|||
List<University> topN = analyzer.getTopN(data, n); |
|||
reporter.printUniversityList(topN, year + "年 Top " + n + " 高校"); |
|||
|
|||
// 生成图表
|
|||
chartGenerator.generateTopNBarChart(data, year, n); |
|||
} |
|||
|
|||
/** |
|||
* 按省份查看 |
|||
*/ |
|||
private void showByProvince() { |
|||
System.out.print("请输入要查看的年份(2022-2024): "); |
|||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|||
|
|||
System.out.print("请输入省份名称: "); |
|||
String province = scanner.nextLine().trim(); |
|||
|
|||
List<University> data = dataCache.get(year); |
|||
if (data == null) { |
|||
System.out.println("该年份数据不存在!"); |
|||
return; |
|||
} |
|||
|
|||
List<University> result = analyzer.getByProvince(data, province); |
|||
if (result.isEmpty()) { |
|||
System.out.println("该省份没有高校数据!"); |
|||
} else { |
|||
reporter.printUniversityList(result, year + "年 " + province + " 高校"); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 搜索高校 |
|||
*/ |
|||
private void searchUniversity() { |
|||
System.out.print("请输入要查看的年份(2022-2024): "); |
|||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|||
|
|||
System.out.print("请输入搜索关键词: "); |
|||
String keyword = scanner.nextLine().trim(); |
|||
|
|||
List<University> data = dataCache.get(year); |
|||
if (data == null) { |
|||
System.out.println("该年份数据不存在!"); |
|||
return; |
|||
} |
|||
|
|||
List<University> result = analyzer.searchUniversity(data, keyword); |
|||
if (result.isEmpty()) { |
|||
System.out.println("未找到匹配的高校!"); |
|||
} else { |
|||
reporter.printUniversityList(result, "搜索结果"); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 显示省份统计 |
|||
*/ |
|||
private void showProvinceStatistics() { |
|||
System.out.print("请输入要查看的年份(2022-2024): "); |
|||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|||
|
|||
List<University> data = dataCache.get(year); |
|||
if (data == null) { |
|||
System.out.println("该年份数据不存在!"); |
|||
return; |
|||
} |
|||
|
|||
Map<String, Long> provinceCount = analyzer.countByProvince(data); |
|||
reporter.printProvinceStatistics(provinceCount, year + "年 省份分布统计"); |
|||
|
|||
// 生成图表
|
|||
chartGenerator.generateProvincePieChart(provinceCount, year); |
|||
} |
|||
|
|||
/** |
|||
* 显示分数统计 |
|||
*/ |
|||
private void showScoreStatistics() { |
|||
System.out.print("请输入要查看的年份(2022-2024): "); |
|||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|||
|
|||
List<University> data = dataCache.get(year); |
|||
if (data == null) { |
|||
System.out.println("该年份数据不存在!"); |
|||
return; |
|||
} |
|||
|
|||
RankAnalyzer.ScoreStatistics stats = analyzer.getScoreStatistics(data); |
|||
reporter.printScoreStatistics(stats, year + "年 分数统计"); |
|||
} |
|||
|
|||
/** |
|||
* 显示排名变化 |
|||
*/ |
|||
private void showRankChanges() { |
|||
List<RankChange> changes = analyzer.calculateRankChanges(dataCache); |
|||
|
|||
// 显示上升最快
|
|||
List<RankChange> rising = analyzer.getFastestRising(changes, 5); |
|||
reporter.printRankChanges(rising, "排名上升最快 Top 5"); |
|||
|
|||
// 显示下降最快
|
|||
List<RankChange> falling = analyzer.getFastestFalling(changes, 5); |
|||
reporter.printRankChanges(falling, "排名下降最快 Top 5"); |
|||
|
|||
// 生成图表
|
|||
if (!rising.isEmpty()) { |
|||
chartGenerator.generateRankChangeChart(rising, "排名上升最快", "rank_rising.png"); |
|||
} |
|||
if (!falling.isEmpty()) { |
|||
chartGenerator.generateRankChangeChart(falling, "排名下降最快", "rank_falling.png"); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 对比两所高校 |
|||
*/ |
|||
private void compareUniversities() { |
|||
System.out.print("请输入要查看的年份(2022-2024): "); |
|||
int year = Integer.parseInt(scanner.nextLine().trim()); |
|||
|
|||
System.out.print("请输入第一所高校名称: "); |
|||
String name1 = scanner.nextLine().trim(); |
|||
|
|||
System.out.print("请输入第二所高校名称: "); |
|||
String name2 = scanner.nextLine().trim(); |
|||
|
|||
List<University> data = dataCache.get(year); |
|||
if (data == null) { |
|||
System.out.println("该年份数据不存在!"); |
|||
return; |
|||
} |
|||
|
|||
Optional<University> u1 = data.stream() |
|||
.filter(u -> u.getName().equals(name1)) |
|||
.findFirst(); |
|||
Optional<University> u2 = data.stream() |
|||
.filter(u -> u.getName().equals(name2)) |
|||
.findFirst(); |
|||
|
|||
if (u1.isPresent() && u2.isPresent()) { |
|||
UniversityComparison comparison = analyzer.compareUniversities(u1.get(), u2.get()); |
|||
reporter.printComparison(comparison); |
|||
} else { |
|||
System.out.println("未找到指定的高校!"); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 显示某高校历年趋势 |
|||
*/ |
|||
private void showYearlyTrend() { |
|||
System.out.print("请输入高校名称: "); |
|||
String name = scanner.nextLine().trim(); |
|||
|
|||
List<University> history = analyzer.getUniversityHistory(dataCache, name); |
|||
|
|||
if (history.isEmpty()) { |
|||
System.out.println("未找到该高校的数据!"); |
|||
} else { |
|||
reporter.printYearlyTrend(history, name); |
|||
chartGenerator.generateRankTrendLineChart(history, name); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 生成所有图表 |
|||
*/ |
|||
private void generateAllCharts() { |
|||
System.out.println("正在生成所有图表..."); |
|||
|
|||
for (Map.Entry<Integer, List<University>> entry : dataCache.entrySet()) { |
|||
int year = entry.getKey(); |
|||
List<University> data = entry.getValue(); |
|||
|
|||
// Top 10 柱状图
|
|||
chartGenerator.generateTopNBarChart(data, year, 10); |
|||
|
|||
// 省份分布饼图
|
|||
Map<String, Long> provinceCount = analyzer.countByProvince(data); |
|||
chartGenerator.generateProvincePieChart(provinceCount, year); |
|||
} |
|||
|
|||
// 排名变化图
|
|||
List<RankChange> changes = analyzer.calculateRankChanges(dataCache); |
|||
List<RankChange> rising = analyzer.getFastestRising(changes, 10); |
|||
List<RankChange> falling = analyzer.getFastestFalling(changes, 10); |
|||
|
|||
if (!rising.isEmpty()) { |
|||
chartGenerator.generateRankChangeChart(rising, "排名上升最快", "rank_rising.png"); |
|||
} |
|||
if (!falling.isEmpty()) { |
|||
chartGenerator.generateRankChangeChart(falling, "排名下降最快", "rank_falling.png"); |
|||
} |
|||
|
|||
// 为Top 5高校生成历年趋势折线图
|
|||
List<University> topUniversities = analyzer.getTopN(dataCache.get(2024), 5); |
|||
for (University u : topUniversities) { |
|||
List<University> history = analyzer.getUniversityHistory(dataCache, u.getName()); |
|||
if (!history.isEmpty()) { |
|||
chartGenerator.generateRankTrendLineChart(history, u.getName()); |
|||
} |
|||
} |
|||
|
|||
System.out.println("所有图表生成完成!\n"); |
|||
} |
|||
} |
|||
@ -1,250 +0,0 @@ |
|||
package com.university.analysis; |
|||
|
|||
import com.university.model.RankChange; |
|||
import com.university.model.University; |
|||
import com.university.model.UniversityComparison; |
|||
|
|||
import java.util.*; |
|||
import java.util.stream.Collectors; |
|||
|
|||
/** |
|||
* 排名分析类 |
|||
* 提供各种数据分析功能 |
|||
*/ |
|||
public class RankAnalyzer { |
|||
|
|||
/** |
|||
* 获取Top N高校 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @param n 数量 |
|||
* @return Top N高校列表 |
|||
*/ |
|||
public List<University> getTopN(List<University> universities, int n) { |
|||
return universities.stream() |
|||
.sorted(Comparator.comparingInt(University::getRank)) |
|||
.limit(n) |
|||
.collect(Collectors.toList()); |
|||
} |
|||
|
|||
/** |
|||
* 按省份统计高校数量 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @return 省份-数量映射 |
|||
*/ |
|||
public Map<String, Long> countByProvince(List<University> universities) { |
|||
return universities.stream() |
|||
.collect(Collectors.groupingBy( |
|||
University::getProvince, |
|||
Collectors.counting() |
|||
)); |
|||
} |
|||
|
|||
/** |
|||
* 按省份统计平均分 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @return 省份-平均分映射 |
|||
*/ |
|||
public Map<String, Double> averageScoreByProvince(List<University> universities) { |
|||
return universities.stream() |
|||
.collect(Collectors.groupingBy( |
|||
University::getProvince, |
|||
Collectors.averagingDouble(University::getScore) |
|||
)); |
|||
} |
|||
|
|||
/** |
|||
* 获取指定省份的高校 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @param province 省份 |
|||
* @return 该省份的高校列表 |
|||
*/ |
|||
public List<University> getByProvince(List<University> universities, String province) { |
|||
return universities.stream() |
|||
.filter(u -> u.getProvince().equals(province)) |
|||
.sorted(Comparator.comparingInt(University::getRank)) |
|||
.collect(Collectors.toList()); |
|||
} |
|||
|
|||
/** |
|||
* 搜索高校 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @param keyword 关键词 |
|||
* @return 匹配的高校列表 |
|||
*/ |
|||
public List<University> searchUniversity(List<University> universities, String keyword) { |
|||
return universities.stream() |
|||
.filter(u -> u.getName().contains(keyword)) |
|||
.collect(Collectors.toList()); |
|||
} |
|||
|
|||
/** |
|||
* 获取分数统计信息 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @return 统计信息 |
|||
*/ |
|||
public ScoreStatistics getScoreStatistics(List<University> universities) { |
|||
DoubleSummaryStatistics stats = universities.stream() |
|||
.mapToDouble(University::getScore) |
|||
.summaryStatistics(); |
|||
|
|||
return new ScoreStatistics( |
|||
stats.getCount(), |
|||
stats.getSum(), |
|||
stats.getAverage(), |
|||
stats.getMax(), |
|||
stats.getMin() |
|||
); |
|||
} |
|||
|
|||
/** |
|||
* 计算历年排名变化 |
|||
* |
|||
* @param dataMap 多年数据映射(年份->高校列表) |
|||
* @return 排名变化列表 |
|||
*/ |
|||
public List<RankChange> calculateRankChanges(Map<Integer, List<University>> dataMap) { |
|||
List<RankChange> changes = new ArrayList<>(); |
|||
|
|||
// 获取所有年份并排序
|
|||
List<Integer> years = new ArrayList<>(dataMap.keySet()); |
|||
Collections.sort(years); |
|||
|
|||
if (years.size() < 2) { |
|||
return changes; |
|||
} |
|||
|
|||
int startYear = years.get(0); |
|||
int endYear = years.get(years.size() - 1); |
|||
|
|||
List<University> startData = dataMap.get(startYear); |
|||
List<University> endData = dataMap.get(endYear); |
|||
|
|||
// 创建名称到高校的映射
|
|||
Map<String, University> startMap = startData.stream() |
|||
.collect(Collectors.toMap(University::getName, u -> u)); |
|||
Map<String, University> endMap = endData.stream() |
|||
.collect(Collectors.toMap(University::getName, u -> u)); |
|||
|
|||
// 计算每所高校的变化
|
|||
for (String name : startMap.keySet()) { |
|||
if (endMap.containsKey(name)) { |
|||
University startUni = startMap.get(name); |
|||
University endUni = endMap.get(name); |
|||
|
|||
RankChange change = new RankChange( |
|||
name, |
|||
startYear, |
|||
endYear, |
|||
startUni.getRank(), |
|||
endUni.getRank(), |
|||
startUni.getScore(), |
|||
endUni.getScore() |
|||
); |
|||
changes.add(change); |
|||
} |
|||
} |
|||
|
|||
return changes; |
|||
} |
|||
|
|||
/** |
|||
* 获取排名上升最快的高校 |
|||
* |
|||
* @param changes 排名变化列表 |
|||
* @param n 数量 |
|||
* @return 上升最快的高校列表 |
|||
*/ |
|||
public List<RankChange> getFastestRising(List<RankChange> changes, int n) { |
|||
return changes.stream() |
|||
.filter(c -> c.getRankChange() > 0) // 只取排名上升的
|
|||
.sorted(Comparator.comparingInt(RankChange::getRankChange).reversed()) |
|||
.limit(n) |
|||
.collect(Collectors.toList()); |
|||
} |
|||
|
|||
/** |
|||
* 获取排名下降最快的高校 |
|||
* |
|||
* @param changes 排名变化列表 |
|||
* @param n 数量 |
|||
* @return 下降最快的高校列表 |
|||
*/ |
|||
public List<RankChange> getFastestFalling(List<RankChange> changes, int n) { |
|||
return changes.stream() |
|||
.filter(c -> c.getRankChange() < 0) // 只取排名下降的
|
|||
.sorted(Comparator.comparingInt(RankChange::getRankChange)) |
|||
.limit(n) |
|||
.collect(Collectors.toList()); |
|||
} |
|||
|
|||
/** |
|||
* 对比两所高校 |
|||
* |
|||
* @param u1 高校1 |
|||
* @param u2 高校2 |
|||
* @return 对比结果 |
|||
*/ |
|||
public UniversityComparison compareUniversities(University u1, University u2) { |
|||
return new UniversityComparison(u1, u2); |
|||
} |
|||
|
|||
/** |
|||
* 获取某高校在多年数据中的信息 |
|||
* |
|||
* @param dataMap 多年数据映射 |
|||
* @param universityName 高校名称 |
|||
* @return 该高校历年的信息列表 |
|||
*/ |
|||
public List<University> getUniversityHistory(Map<Integer, List<University>> dataMap, |
|||
String universityName) { |
|||
List<University> history = new ArrayList<>(); |
|||
|
|||
for (List<University> yearData : dataMap.values()) { |
|||
yearData.stream() |
|||
.filter(u -> u.getName().equals(universityName)) |
|||
.findFirst() |
|||
.ifPresent(history::add); |
|||
} |
|||
|
|||
// 按年份排序
|
|||
history.sort(Comparator.comparingInt(University::getYear)); |
|||
return history; |
|||
} |
|||
|
|||
/** |
|||
* 分数统计信息内部类 |
|||
*/ |
|||
public static class ScoreStatistics { |
|||
private final long count; |
|||
private final double sum; |
|||
private final double average; |
|||
private final double max; |
|||
private final double min; |
|||
|
|||
public ScoreStatistics(long count, double sum, double average, double max, double min) { |
|||
this.count = count; |
|||
this.sum = sum; |
|||
this.average = average; |
|||
this.max = max; |
|||
this.min = min; |
|||
} |
|||
|
|||
public long getCount() { return count; } |
|||
public double getSum() { return sum; } |
|||
public double getAverage() { return average; } |
|||
public double getMax() { return max; } |
|||
public double getMin() { return min; } |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return String.format("统计信息: 数量=%d, 平均分=%.2f, 最高分=%.2f, 最低分=%.2f", |
|||
count, average, max, min); |
|||
} |
|||
} |
|||
} |
|||
@ -1,153 +0,0 @@ |
|||
package com.university.crawler; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import com.university.model.University; |
|||
|
|||
/** |
|||
* 高校排名爬虫类 |
|||
* 负责从网页抓取高校排名数据 |
|||
*/ |
|||
public class UniversityRankCrawler { |
|||
|
|||
// 请求间隔时间(毫秒),防止请求过快被封
|
|||
private static final int REQUEST_DELAY = 1000; |
|||
|
|||
/** |
|||
* 爬取软科中国大学排名数据 |
|||
* 分析软科官网HTML结构,提取真实排名数据 |
|||
* |
|||
* @param year 年份 |
|||
* @return 高校列表 |
|||
*/ |
|||
public List<University> crawlRankings(int year) { |
|||
List<University> universities = new ArrayList<>(); |
|||
|
|||
try { |
|||
// 软科排名URL
|
|||
String url = "https://www.shanghairanking.cn/rankings/bcur/" + year; |
|||
|
|||
System.out.println("正在爬取 " + year + " 年高校排名数据..."); |
|||
|
|||
// 发送HTTP请求获取网页内容
|
|||
Document doc = Jsoup.connect(url) |
|||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") |
|||
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") |
|||
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") |
|||
.timeout(15000) |
|||
.get(); |
|||
|
|||
// 分析HTML结构,提取排名数据
|
|||
// 找到排名表格
|
|||
Elements rows = doc.select("table.rk-table tbody tr"); |
|||
|
|||
for (Element row : rows) { |
|||
Elements cells = row.select("td"); |
|||
if (cells.size() >= 5) { |
|||
try { |
|||
// 提取排名
|
|||
String rankText = cells.get(0).text().trim(); |
|||
rankText = rankText.replaceAll("[^0-9]", ""); |
|||
if (rankText.isEmpty()) continue; |
|||
int rank = Integer.parseInt(rankText); |
|||
|
|||
// 提取学校名称
|
|||
String name = cells.get(1).text().trim(); |
|||
|
|||
// 提取省份
|
|||
String province = cells.get(2).text().trim(); |
|||
|
|||
// 提取总分
|
|||
String scoreText = cells.get(4).text().trim(); |
|||
scoreText = scoreText.replaceAll("[^0-9.]", ""); |
|||
if (scoreText.isEmpty()) continue; |
|||
double score = Double.parseDouble(scoreText); |
|||
|
|||
// 创建高校对象
|
|||
University university = new University(rank, name, province, score, year); |
|||
universities.add(university); |
|||
|
|||
// 限制爬取数量,避免请求过多
|
|||
if (universities.size() >= 100) break; |
|||
} catch (NumberFormatException e) { |
|||
// 跳过解析失败的行
|
|||
continue; |
|||
} |
|||
} |
|||
} |
|||
|
|||
// 请求间隔,避免被封
|
|||
Thread.sleep(REQUEST_DELAY); |
|||
|
|||
} catch (IOException e) { |
|||
System.err.println("爬取数据失败: " + e.getMessage()); |
|||
System.out.println("将使用模拟数据..."); |
|||
// 如果爬取失败,使用模拟数据
|
|||
universities = generateMockData(year); |
|||
} catch (InterruptedException e) { |
|||
Thread.currentThread().interrupt(); |
|||
} |
|||
|
|||
System.out.println("成功获取 " + universities.size() + " 条数据"); |
|||
return universities; |
|||
} |
|||
|
|||
/** |
|||
* 爬取多年数据 |
|||
* |
|||
* @param startYear 开始年份 |
|||
* @param endYear 结束年份 |
|||
* @return 多年数据集合 |
|||
*/ |
|||
public List<List<University>> crawlMultipleYears(int startYear, int endYear) { |
|||
List<List<University>> allData = new ArrayList<>(); |
|||
|
|||
for (int year = startYear; year <= endYear; year++) { |
|||
List<University> yearData = crawlRankings(year); |
|||
allData.add(yearData); |
|||
} |
|||
|
|||
return allData; |
|||
} |
|||
|
|||
/** |
|||
* 生成模拟数据(用于演示) |
|||
* 当真实网站无法访问时使用 |
|||
*/ |
|||
private List<University> generateMockData(int year) { |
|||
List<University> mockData = new ArrayList<>(); |
|||
|
|||
// 基础数据,每年的分数略有变化
|
|||
double variation = (year - 2022) * 0.5; |
|||
|
|||
mockData.add(new University(1, "清华大学", "北京", 852.5 + variation, year)); |
|||
mockData.add(new University(2, "北京大学", "北京", 848.2 + variation, year)); |
|||
mockData.add(new University(3, "浙江大学", "浙江", 822.5 + variation, year)); |
|||
mockData.add(new University(4, "上海交通大学", "上海", 815.3 + variation, year)); |
|||
mockData.add(new University(5, "复旦大学", "上海", 805.1 + variation, year)); |
|||
mockData.add(new University(6, "南京大学", "江苏", 785.6 + variation, year)); |
|||
mockData.add(new University(7, "中国科学技术大学", "安徽", 782.4 + variation, year)); |
|||
mockData.add(new University(8, "华中科技大学", "湖北", 765.8 + variation, year)); |
|||
mockData.add(new University(9, "武汉大学", "湖北", 758.2 + variation, year)); |
|||
mockData.add(new University(10, "西安交通大学", "陕西", 752.6 + variation, year)); |
|||
mockData.add(new University(11, "中山大学", "广东", 745.3 + variation, year)); |
|||
mockData.add(new University(12, "四川大学", "四川", 738.9 + variation, year)); |
|||
mockData.add(new University(13, "哈尔滨工业大学", "黑龙江", 732.5 + variation, year)); |
|||
mockData.add(new University(14, "北京航空航天大学", "北京", 725.8 + variation, year)); |
|||
mockData.add(new University(15, "东南大学", "江苏", 718.4 + variation, year)); |
|||
mockData.add(new University(16, "北京理工大学", "北京", 712.6 + variation, year)); |
|||
mockData.add(new University(17, "同济大学", "上海", 705.3 + variation, year)); |
|||
mockData.add(new University(18, "中国人民大学", "北京", 698.5 + variation, year)); |
|||
mockData.add(new University(19, "北京师范大学", "北京", 692.1 + variation, year)); |
|||
mockData.add(new University(20, "南开大学", "天津", 685.7 + variation, year)); |
|||
|
|||
return mockData; |
|||
} |
|||
} |
|||
@ -1,145 +0,0 @@ |
|||
package com.university.model; |
|||
|
|||
/** |
|||
* 排名变化实体类 |
|||
* 用于存储高校历年排名变化信息 |
|||
*/ |
|||
public class RankChange { |
|||
|
|||
// 学校名称
|
|||
private String universityName; |
|||
|
|||
// 起始年份
|
|||
private int startYear; |
|||
|
|||
// 结束年份
|
|||
private int endYear; |
|||
|
|||
// 起始排名
|
|||
private int startRank; |
|||
|
|||
// 结束排名
|
|||
private int endRank; |
|||
|
|||
// 排名变化(正数表示上升,负数表示下降)
|
|||
private int rankChange; |
|||
|
|||
// 起始分数
|
|||
private double startScore; |
|||
|
|||
// 结束分数
|
|||
private double endScore; |
|||
|
|||
// 分数变化
|
|||
private double scoreChange; |
|||
|
|||
public RankChange() { |
|||
} |
|||
|
|||
public RankChange(String universityName, int startYear, int endYear, |
|||
int startRank, int endRank, double startScore, double endScore) { |
|||
this.universityName = universityName; |
|||
this.startYear = startYear; |
|||
this.endYear = endYear; |
|||
this.startRank = startRank; |
|||
this.endRank = endRank; |
|||
this.startScore = startScore; |
|||
this.endScore = endScore; |
|||
|
|||
// 计算变化
|
|||
this.rankChange = startRank - endRank; // 排名数字变小表示上升
|
|||
this.scoreChange = endScore - startScore; |
|||
} |
|||
|
|||
// Getters and Setters
|
|||
public String getUniversityName() { |
|||
return universityName; |
|||
} |
|||
|
|||
public void setUniversityName(String universityName) { |
|||
this.universityName = universityName; |
|||
} |
|||
|
|||
public int getStartYear() { |
|||
return startYear; |
|||
} |
|||
|
|||
public void setStartYear(int startYear) { |
|||
this.startYear = startYear; |
|||
} |
|||
|
|||
public int getEndYear() { |
|||
return endYear; |
|||
} |
|||
|
|||
public void setEndYear(int endYear) { |
|||
this.endYear = endYear; |
|||
} |
|||
|
|||
public int getStartRank() { |
|||
return startRank; |
|||
} |
|||
|
|||
public void setStartRank(int startRank) { |
|||
this.startRank = startRank; |
|||
} |
|||
|
|||
public int getEndRank() { |
|||
return endRank; |
|||
} |
|||
|
|||
public void setEndRank(int endRank) { |
|||
this.endRank = endRank; |
|||
} |
|||
|
|||
public int getRankChange() { |
|||
return rankChange; |
|||
} |
|||
|
|||
public void setRankChange(int rankChange) { |
|||
this.rankChange = rankChange; |
|||
} |
|||
|
|||
public double getStartScore() { |
|||
return startScore; |
|||
} |
|||
|
|||
public void setStartScore(double startScore) { |
|||
this.startScore = startScore; |
|||
} |
|||
|
|||
public double getEndScore() { |
|||
return endScore; |
|||
} |
|||
|
|||
public void setEndScore(double endScore) { |
|||
this.endScore = endScore; |
|||
} |
|||
|
|||
public double getScoreChange() { |
|||
return scoreChange; |
|||
} |
|||
|
|||
public void setScoreChange(double scoreChange) { |
|||
this.scoreChange = scoreChange; |
|||
} |
|||
|
|||
/** |
|||
* 获取变化趋势描述 |
|||
*/ |
|||
public String getTrendDescription() { |
|||
if (rankChange > 0) { |
|||
return String.format("上升%d位", rankChange); |
|||
} else if (rankChange < 0) { |
|||
return String.format("下降%d位", Math.abs(rankChange)); |
|||
} else { |
|||
return "排名不变"; |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return String.format("%s: %d年(第%d名) -> %d年(第%d名), %s", |
|||
universityName, startYear, startRank, endYear, endRank, getTrendDescription()); |
|||
} |
|||
} |
|||
@ -1,120 +0,0 @@ |
|||
package com.university.model; |
|||
|
|||
import java.util.Objects; |
|||
|
|||
/** |
|||
* 高校实体类 (Java Bean) |
|||
* 用于封装高校排名数据 |
|||
*/ |
|||
public class University { |
|||
|
|||
// 排名
|
|||
private int rank; |
|||
|
|||
// 学校名称
|
|||
private String name; |
|||
|
|||
// 所在省份
|
|||
private String province; |
|||
|
|||
// 总分
|
|||
private double score; |
|||
|
|||
// 年份
|
|||
private int year; |
|||
|
|||
// 无参构造方法(必须,用于反射创建对象)
|
|||
public University() { |
|||
} |
|||
|
|||
// 全参构造方法
|
|||
public University(int rank, String name, String province, double score, int year) { |
|||
this.rank = rank; |
|||
this.name = name; |
|||
this.province = province; |
|||
this.score = score; |
|||
this.year = year; |
|||
} |
|||
|
|||
// Getter和Setter方法
|
|||
public int getRank() { |
|||
return rank; |
|||
} |
|||
|
|||
public void setRank(int rank) { |
|||
this.rank = rank; |
|||
} |
|||
|
|||
public String getName() { |
|||
return name; |
|||
} |
|||
|
|||
public void setName(String name) { |
|||
this.name = name; |
|||
} |
|||
|
|||
public String getProvince() { |
|||
return province; |
|||
} |
|||
|
|||
public void setProvince(String province) { |
|||
this.province = province; |
|||
} |
|||
|
|||
public double getScore() { |
|||
return score; |
|||
} |
|||
|
|||
public void setScore(double score) { |
|||
this.score = score; |
|||
} |
|||
|
|||
public int getYear() { |
|||
return year; |
|||
} |
|||
|
|||
public void setYear(int year) { |
|||
this.year = year; |
|||
} |
|||
|
|||
/** |
|||
* 计算排名变化 |
|||
* @param previousRank 往年排名 |
|||
* @return 排名变化(正数表示上升,负数表示下降) |
|||
*/ |
|||
public int calculateRankChange(int previousRank) { |
|||
return previousRank - this.rank; |
|||
} |
|||
|
|||
/** |
|||
* 计算分数变化 |
|||
* @param previousScore 往年分数 |
|||
* @return 分数变化 |
|||
*/ |
|||
public double calculateScoreChange(double previousScore) { |
|||
return this.score - previousScore; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return String.format("University{rank=%d, name='%s', province='%s', score=%.2f, year=%d}", |
|||
rank, name, province, score, year); |
|||
} |
|||
|
|||
@Override |
|||
public boolean equals(Object o) { |
|||
if (this == o) return true; |
|||
if (o == null || getClass() != o.getClass()) return false; |
|||
University that = (University) o; |
|||
return rank == that.rank && |
|||
Double.compare(that.score, score) == 0 && |
|||
year == that.year && |
|||
Objects.equals(name, that.name) && |
|||
Objects.equals(province, that.province); |
|||
} |
|||
|
|||
@Override |
|||
public int hashCode() { |
|||
return Objects.hash(rank, name, province, score, year); |
|||
} |
|||
} |
|||
@ -1,171 +0,0 @@ |
|||
package com.university.model; |
|||
|
|||
/** |
|||
* 高校对比实体类 |
|||
* 用于存储两所高校的对比信息 |
|||
*/ |
|||
public class UniversityComparison { |
|||
|
|||
// 第一所高校
|
|||
private String universityName1; |
|||
|
|||
// 第二所高校
|
|||
private String universityName2; |
|||
|
|||
// 年份
|
|||
private int year; |
|||
|
|||
// 高校1排名
|
|||
private int rank1; |
|||
|
|||
// 高校2排名
|
|||
private int rank2; |
|||
|
|||
// 高校1分数
|
|||
private double score1; |
|||
|
|||
// 高校2分数
|
|||
private double score2; |
|||
|
|||
// 高校1省份
|
|||
private String province1; |
|||
|
|||
// 高校2省份
|
|||
private String province2; |
|||
|
|||
// 排名差距
|
|||
private int rankGap; |
|||
|
|||
// 分数差距
|
|||
private double scoreGap; |
|||
|
|||
public UniversityComparison() { |
|||
} |
|||
|
|||
public UniversityComparison(University u1, University u2) { |
|||
this.universityName1 = u1.getName(); |
|||
this.universityName2 = u2.getName(); |
|||
this.year = u1.getYear(); |
|||
this.rank1 = u1.getRank(); |
|||
this.rank2 = u2.getRank(); |
|||
this.score1 = u1.getScore(); |
|||
this.score2 = u2.getScore(); |
|||
this.province1 = u1.getProvince(); |
|||
this.province2 = u2.getProvince(); |
|||
|
|||
this.rankGap = Math.abs(rank1 - rank2); |
|||
this.scoreGap = Math.abs(score1 - score2); |
|||
} |
|||
|
|||
// Getters and Setters
|
|||
public String getUniversityName1() { |
|||
return universityName1; |
|||
} |
|||
|
|||
public void setUniversityName1(String universityName1) { |
|||
this.universityName1 = universityName1; |
|||
} |
|||
|
|||
public String getUniversityName2() { |
|||
return universityName2; |
|||
} |
|||
|
|||
public void setUniversityName2(String universityName2) { |
|||
this.universityName2 = universityName2; |
|||
} |
|||
|
|||
public int getYear() { |
|||
return year; |
|||
} |
|||
|
|||
public void setYear(int year) { |
|||
this.year = year; |
|||
} |
|||
|
|||
public int getRank1() { |
|||
return rank1; |
|||
} |
|||
|
|||
public void setRank1(int rank1) { |
|||
this.rank1 = rank1; |
|||
} |
|||
|
|||
public int getRank2() { |
|||
return rank2; |
|||
} |
|||
|
|||
public void setRank2(int rank2) { |
|||
this.rank2 = rank2; |
|||
} |
|||
|
|||
public double getScore1() { |
|||
return score1; |
|||
} |
|||
|
|||
public void setScore1(double score1) { |
|||
this.score1 = score1; |
|||
} |
|||
|
|||
public double getScore2() { |
|||
return score2; |
|||
} |
|||
|
|||
public void setScore2(double score2) { |
|||
this.score2 = score2; |
|||
} |
|||
|
|||
public String getProvince1() { |
|||
return province1; |
|||
} |
|||
|
|||
public void setProvince1(String province1) { |
|||
this.province1 = province1; |
|||
} |
|||
|
|||
public String getProvince2() { |
|||
return province2; |
|||
} |
|||
|
|||
public void setProvince2(String province2) { |
|||
this.province2 = province2; |
|||
} |
|||
|
|||
public int getRankGap() { |
|||
return rankGap; |
|||
} |
|||
|
|||
public void setRankGap(int rankGap) { |
|||
this.rankGap = rankGap; |
|||
} |
|||
|
|||
public double getScoreGap() { |
|||
return scoreGap; |
|||
} |
|||
|
|||
public void setScoreGap(double scoreGap) { |
|||
this.scoreGap = scoreGap; |
|||
} |
|||
|
|||
/** |
|||
* 获取排名较高的高校名称 |
|||
*/ |
|||
public String getHigherRankedUniversity() { |
|||
return rank1 < rank2 ? universityName1 : universityName2; |
|||
} |
|||
|
|||
/** |
|||
* 获取对比结果描述 |
|||
*/ |
|||
public String getComparisonResult() { |
|||
String higherUni = getHigherRankedUniversity(); |
|||
return String.format("%d年: %s 排名高于 %s %d位,分数相差 %.2f分", |
|||
year, higherUni, |
|||
higherUni.equals(universityName1) ? universityName2 : universityName1, |
|||
rankGap, scoreGap); |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return getComparisonResult(); |
|||
} |
|||
} |
|||
@ -1,202 +0,0 @@ |
|||
package com.university.storage; |
|||
|
|||
import java.io.File; |
|||
import java.io.FileInputStream; |
|||
import java.io.FileOutputStream; |
|||
import java.io.IOException; |
|||
import java.io.InputStreamReader; |
|||
import java.io.OutputStreamWriter; |
|||
import java.io.Reader; |
|||
import java.io.Writer; |
|||
import java.nio.charset.StandardCharsets; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
import com.opencsv.CSVReader; |
|||
import com.opencsv.CSVWriter; |
|||
import com.opencsv.bean.CsvToBean; |
|||
import com.opencsv.bean.CsvToBeanBuilder; |
|||
import com.opencsv.bean.StatefulBeanToCsv; |
|||
import com.opencsv.bean.StatefulBeanToCsvBuilder; |
|||
import com.opencsv.exceptions.CsvDataTypeMismatchException; |
|||
import com.opencsv.exceptions.CsvRequiredFieldEmptyException; |
|||
import com.opencsv.exceptions.CsvValidationException; |
|||
import com.university.model.University; |
|||
|
|||
/** |
|||
* 数据存储类 |
|||
* 负责数据的持久化存储(CSV格式) |
|||
*/ |
|||
public class DataStorage { |
|||
|
|||
// 数据存储目录
|
|||
private static final String DATA_DIR = "data"; |
|||
|
|||
/** |
|||
* 构造方法,确保数据目录存在 |
|||
*/ |
|||
public DataStorage() { |
|||
File dir = new File(DATA_DIR); |
|||
if (!dir.exists()) { |
|||
dir.mkdirs(); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 保存高校列表到CSV文件 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @param year 年份 |
|||
*/ |
|||
public void saveToCsv(List<University> universities, int year) { |
|||
String filename = DATA_DIR + "/university_rank_" + year + ".csv"; |
|||
|
|||
try (Writer writer = new OutputStreamWriter( |
|||
new FileOutputStream(filename), StandardCharsets.UTF_8)) { |
|||
|
|||
// 添加BOM,解决Excel中文乱码
|
|||
writer.write('\ufeff'); |
|||
|
|||
// 创建CSV写入器
|
|||
StatefulBeanToCsv<University> beanToCsv = new StatefulBeanToCsvBuilder<University>(writer) |
|||
.withQuotechar('"') |
|||
.withSeparator(',') |
|||
.withOrderedResults(true) |
|||
.build(); |
|||
|
|||
// 写入数据
|
|||
beanToCsv.write(universities); |
|||
System.out.println("数据已保存到: " + filename); |
|||
|
|||
} catch (IOException | CsvDataTypeMismatchException | CsvRequiredFieldEmptyException e) { |
|||
System.err.println("保存CSV文件失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 从CSV文件读取高校列表 |
|||
* |
|||
* @param year 年份 |
|||
* @return 高校列表 |
|||
*/ |
|||
public List<University> readFromCsv(int year) { |
|||
String filename = DATA_DIR + "/university_rank_" + year + ".csv"; |
|||
List<University> universities = new ArrayList<>(); |
|||
|
|||
try (Reader reader = new InputStreamReader( |
|||
new FileInputStream(filename), StandardCharsets.UTF_8)) { |
|||
|
|||
// 创建CSV读取器
|
|||
CsvToBean<University> csvToBean = new CsvToBeanBuilder<University>(reader) |
|||
.withType(University.class) |
|||
.withIgnoreLeadingWhiteSpace(true) |
|||
.build(); |
|||
|
|||
// 读取数据
|
|||
universities = csvToBean.parse(); |
|||
System.out.println("从 " + filename + " 读取了 " + universities.size() + " 条数据"); |
|||
|
|||
} catch (IOException e) { |
|||
System.err.println("读取CSV文件失败: " + e.getMessage()); |
|||
} |
|||
|
|||
return universities; |
|||
} |
|||
|
|||
/** |
|||
* 保存原始数据(手动控制格式) |
|||
* |
|||
* @param universities 高校列表 |
|||
* @param year 年份 |
|||
*/ |
|||
public void saveRawData(List<University> universities, int year) { |
|||
String filename = DATA_DIR + "/university_rank_" + year + ".csv"; |
|||
|
|||
try (CSVWriter writer = new CSVWriter(new OutputStreamWriter( |
|||
new FileOutputStream(filename), StandardCharsets.UTF_8))) { |
|||
|
|||
// 写入表头
|
|||
String[] header = {"排名", "学校名称", "省份", "总分", "年份"}; |
|||
writer.writeNext(header); |
|||
|
|||
// 写入数据
|
|||
for (University u : universities) { |
|||
String[] row = { |
|||
String.valueOf(u.getRank()), |
|||
u.getName(), |
|||
u.getProvince(), |
|||
String.valueOf(u.getScore()), |
|||
String.valueOf(u.getYear()) |
|||
}; |
|||
writer.writeNext(row); |
|||
} |
|||
|
|||
System.out.println("原始数据已保存到: " + filename); |
|||
|
|||
} catch (IOException e) { |
|||
System.err.println("保存原始数据失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 读取原始数据 |
|||
* |
|||
* @param year 年份 |
|||
* @return 高校列表 |
|||
*/ |
|||
public List<University> readRawData(int year) { |
|||
String filename = DATA_DIR + "/university_rank_" + year + ".csv"; |
|||
List<University> universities = new ArrayList<>(); |
|||
|
|||
try (CSVReader reader = new CSVReader(new InputStreamReader( |
|||
new FileInputStream(filename), StandardCharsets.UTF_8))) { |
|||
|
|||
// 跳过表头
|
|||
reader.readNext(); |
|||
|
|||
// 读取数据行
|
|||
String[] row; |
|||
while ((row = reader.readNext()) != null) { |
|||
if (row.length >= 5) { |
|||
University u = new University(); |
|||
u.setRank(Integer.parseInt(row[0].trim())); |
|||
u.setName(row[1].trim()); |
|||
u.setProvince(row[2].trim()); |
|||
u.setScore(Double.parseDouble(row[3].trim())); |
|||
u.setYear(Integer.parseInt(row[4].trim())); |
|||
universities.add(u); |
|||
} |
|||
} |
|||
|
|||
System.out.println("从 " + filename + " 读取了 " + universities.size() + " 条数据"); |
|||
|
|||
} catch (IOException | CsvValidationException e) { |
|||
System.err.println("读取原始数据失败: " + e.getMessage()); |
|||
} |
|||
|
|||
return universities; |
|||
} |
|||
|
|||
/** |
|||
* 检查某年份的数据是否存在 |
|||
* |
|||
* @param year 年份 |
|||
* @return 是否存在 |
|||
*/ |
|||
public boolean dataExists(int year) { |
|||
File file = new File(DATA_DIR + "/university_rank_" + year + ".csv"); |
|||
return file.exists(); |
|||
} |
|||
|
|||
/** |
|||
* 删除某年份的数据文件 |
|||
* |
|||
* @param year 年份 |
|||
*/ |
|||
public void deleteData(int year) { |
|||
File file = new File(DATA_DIR + "/university_rank_" + year + ".csv"); |
|||
if (file.exists() && file.delete()) { |
|||
System.out.println("已删除 " + year + " 年的数据文件"); |
|||
} |
|||
} |
|||
} |
|||
@ -1,299 +0,0 @@ |
|||
package com.university.visualization; |
|||
|
|||
import com.university.model.RankChange; |
|||
import com.university.model.University; |
|||
import org.jfree.chart.ChartFactory; |
|||
import org.jfree.chart.ChartUtils; |
|||
import org.jfree.chart.JFreeChart; |
|||
import org.jfree.chart.axis.CategoryAxis; |
|||
import org.jfree.chart.axis.NumberAxis; |
|||
import org.jfree.chart.plot.CategoryPlot; |
|||
import org.jfree.chart.plot.PlotOrientation; |
|||
import org.jfree.chart.renderer.category.BarRenderer; |
|||
import org.jfree.chart.renderer.category.LineAndShapeRenderer; |
|||
import org.jfree.data.category.DefaultCategoryDataset; |
|||
|
|||
import java.awt.*; |
|||
import java.io.File; |
|||
import java.io.IOException; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
/** |
|||
* 图表生成类 |
|||
* 使用JFreeChart生成各种统计图表 |
|||
*/ |
|||
public class ChartGenerator { |
|||
|
|||
// 图表输出目录
|
|||
private static final String CHART_DIR = "charts"; |
|||
|
|||
/** |
|||
* 构造方法,确保图表目录存在 |
|||
*/ |
|||
public ChartGenerator() { |
|||
File dir = new File(CHART_DIR); |
|||
if (!dir.exists()) { |
|||
dir.mkdirs(); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 生成Top N高校柱状图 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @param year 年份 |
|||
* @param n 数量 |
|||
*/ |
|||
public void generateTopNBarChart(List<University> universities, int year, int n) { |
|||
// 创建数据集
|
|||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|||
|
|||
// 取前N名
|
|||
int count = Math.min(n, universities.size()); |
|||
for (int i = 0; i < count; i++) { |
|||
University u = universities.get(i); |
|||
dataset.addValue(u.getScore(), "总分", u.getName()); |
|||
} |
|||
|
|||
// 创建图表
|
|||
JFreeChart chart = ChartFactory.createBarChart( |
|||
year + "年高校排名Top" + n, // 标题
|
|||
"学校", // X轴标签
|
|||
"总分", // Y轴标签
|
|||
dataset, // 数据集
|
|||
PlotOrientation.VERTICAL, // 方向
|
|||
true, // 显示图例
|
|||
true, // 显示工具提示
|
|||
false // 不生成URL
|
|||
); |
|||
|
|||
// 美化图表
|
|||
customizeBarChart(chart); |
|||
|
|||
// 保存图表
|
|||
saveChart(chart, "top" + n + "_" + year + ".png"); |
|||
} |
|||
|
|||
/** |
|||
* 生成省份分布饼图 |
|||
* |
|||
* @param provinceCount 省份统计 |
|||
* @param year 年份 |
|||
*/ |
|||
public void generateProvincePieChart(Map<String, Long> provinceCount, int year) { |
|||
// 创建饼图数据集
|
|||
org.jfree.data.general.DefaultPieDataset<String> dataset = |
|||
new org.jfree.data.general.DefaultPieDataset<>(); |
|||
|
|||
// 添加数据
|
|||
provinceCount.forEach(dataset::setValue); |
|||
|
|||
// 创建饼图
|
|||
JFreeChart chart = ChartFactory.createPieChart( |
|||
year + "年高校省份分布", // 标题
|
|||
dataset, // 数据集
|
|||
true, // 显示图例
|
|||
true, // 显示工具提示
|
|||
false // 不生成URL
|
|||
); |
|||
|
|||
// 获取饼图plot并设置标签
|
|||
org.jfree.chart.plot.PiePlot plot = (org.jfree.chart.plot.PiePlot) chart.getPlot(); |
|||
|
|||
// 设置标签格式:省份名称 + 数量 + 百分比
|
|||
plot.setLabelGenerator(new org.jfree.chart.labels.StandardPieSectionLabelGenerator( |
|||
"{0}: {1}所 ({2})", |
|||
java.text.NumberFormat.getIntegerInstance(), |
|||
java.text.NumberFormat.getPercentInstance() |
|||
)); |
|||
|
|||
// 设置标签字体
|
|||
plot.setLabelFont(new Font("微软雅黑", Font.PLAIN, 12)); |
|||
|
|||
// 设置标签颜色
|
|||
plot.setLabelPaint(Color.BLACK); |
|||
|
|||
// 设置标签背景
|
|||
plot.setLabelBackgroundPaint(new Color(255, 255, 255, 200)); |
|||
|
|||
// 设置标题字体
|
|||
chart.getTitle().setFont(new Font("微软雅黑", Font.BOLD, 16)); |
|||
|
|||
// 保存图表
|
|||
saveChart(chart, "province_distribution_" + year + ".png"); |
|||
} |
|||
|
|||
/** |
|||
* 生成历年排名变化折线图 |
|||
* |
|||
* @param universityHistory 某高校历年数据 |
|||
* @param universityName 高校名称 |
|||
*/ |
|||
public void generateRankTrendLineChart(List<University> universityHistory, |
|||
String universityName) { |
|||
// 创建数据集
|
|||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|||
|
|||
// 添加数据(注意:排名越小越好,所以取负值让折线图向上表示进步)
|
|||
for (University u : universityHistory) { |
|||
dataset.addValue(u.getRank(), "排名", String.valueOf(u.getYear())); |
|||
} |
|||
|
|||
// 创建图表
|
|||
JFreeChart chart = ChartFactory.createLineChart( |
|||
universityName + " 历年排名变化", // 标题
|
|||
"年份", // X轴标签
|
|||
"排名", // Y轴标签
|
|||
dataset, // 数据集
|
|||
PlotOrientation.VERTICAL, // 方向
|
|||
true, // 显示图例
|
|||
true, // 显示工具提示
|
|||
false // 不生成URL
|
|||
); |
|||
|
|||
// 美化折线图
|
|||
customizeLineChart(chart); |
|||
|
|||
// 保存图表
|
|||
saveChart(chart, "rank_trend_" + universityName + ".png"); |
|||
} |
|||
|
|||
/** |
|||
* 生成排名变化对比图 |
|||
* |
|||
* @param changes 排名变化列表 |
|||
* @param title 图表标题 |
|||
* @param filename 文件名 |
|||
*/ |
|||
public void generateRankChangeChart(List<RankChange> changes, String title, String filename) { |
|||
// 创建数据集
|
|||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|||
|
|||
// 添加数据
|
|||
for (RankChange change : changes) { |
|||
dataset.addValue(change.getRankChange(), "排名变化", change.getUniversityName()); |
|||
} |
|||
|
|||
// 创建图表
|
|||
JFreeChart chart = ChartFactory.createBarChart( |
|||
title, |
|||
"学校", |
|||
"排名变化(位)", |
|||
dataset, |
|||
PlotOrientation.HORIZONTAL, |
|||
true, |
|||
true, |
|||
false |
|||
); |
|||
|
|||
// 美化
|
|||
customizeBarChart(chart); |
|||
|
|||
// 保存
|
|||
saveChart(chart, filename); |
|||
} |
|||
|
|||
/** |
|||
* 生成多高校对比图 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @param year 年份 |
|||
*/ |
|||
public void generateComparisonChart(List<University> universities, int year) { |
|||
// 创建数据集
|
|||
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
|||
|
|||
// 添加分数数据
|
|||
for (University u : universities) { |
|||
dataset.addValue(u.getScore(), "总分", u.getName()); |
|||
} |
|||
|
|||
// 创建图表
|
|||
JFreeChart chart = ChartFactory.createBarChart( |
|||
year + "年高校分数对比", |
|||
"学校", |
|||
"总分", |
|||
dataset, |
|||
PlotOrientation.VERTICAL, |
|||
true, |
|||
true, |
|||
false |
|||
); |
|||
|
|||
customizeBarChart(chart); |
|||
saveChart(chart, "comparison_" + year + ".png"); |
|||
} |
|||
|
|||
/** |
|||
* 美化柱状图 |
|||
*/ |
|||
private void customizeBarChart(JFreeChart chart) { |
|||
CategoryPlot plot = chart.getCategoryPlot(); |
|||
|
|||
// 设置背景色
|
|||
plot.setBackgroundPaint(Color.WHITE); |
|||
plot.setRangeGridlinePaint(Color.LIGHT_GRAY); |
|||
|
|||
// 设置柱状图颜色
|
|||
BarRenderer renderer = (BarRenderer) plot.getRenderer(); |
|||
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
|||
|
|||
// 设置字体
|
|||
CategoryAxis domainAxis = plot.getDomainAxis(); |
|||
domainAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10)); |
|||
domainAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12)); |
|||
|
|||
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis(); |
|||
rangeAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10)); |
|||
rangeAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12)); |
|||
|
|||
// 设置标题字体
|
|||
chart.getTitle().setFont(new Font("微软雅黑", Font.BOLD, 16)); |
|||
} |
|||
|
|||
/** |
|||
* 美化折线图 |
|||
*/ |
|||
private void customizeLineChart(JFreeChart chart) { |
|||
CategoryPlot plot = chart.getCategoryPlot(); |
|||
|
|||
// 设置背景色
|
|||
plot.setBackgroundPaint(Color.WHITE); |
|||
plot.setRangeGridlinePaint(Color.LIGHT_GRAY); |
|||
|
|||
// 设置折线样式
|
|||
LineAndShapeRenderer renderer = (LineAndShapeRenderer) plot.getRenderer(); |
|||
renderer.setSeriesPaint(0, new Color(79, 129, 189)); |
|||
renderer.setSeriesStroke(0, new BasicStroke(2.0f)); |
|||
renderer.setSeriesShapesVisible(0, true); |
|||
|
|||
// 设置字体
|
|||
CategoryAxis domainAxis = plot.getDomainAxis(); |
|||
domainAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10)); |
|||
domainAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12)); |
|||
|
|||
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis(); |
|||
rangeAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10)); |
|||
rangeAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12)); |
|||
|
|||
// 设置标题字体
|
|||
chart.getTitle().setFont(new Font("微软雅黑", Font.BOLD, 16)); |
|||
} |
|||
|
|||
/** |
|||
* 保存图表到文件 |
|||
* |
|||
* @param chart 图表对象 |
|||
* @param filename 文件名 |
|||
*/ |
|||
private void saveChart(JFreeChart chart, String filename) { |
|||
try { |
|||
File file = new File(CHART_DIR + "/" + filename); |
|||
ChartUtils.saveChartAsPNG(file, chart, 800, 600); |
|||
System.out.println("图表已保存: " + file.getAbsolutePath()); |
|||
} catch (IOException e) { |
|||
System.err.println("保存图表失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
@ -1,241 +0,0 @@ |
|||
package com.university.visualization; |
|||
|
|||
import com.university.analysis.RankAnalyzer; |
|||
import com.university.model.RankChange; |
|||
import com.university.model.University; |
|||
import com.university.model.UniversityComparison; |
|||
|
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
/** |
|||
* 控制台报表类 |
|||
* 格式化输出各种统计结果到控制台 |
|||
*/ |
|||
public class ConsoleReporter { |
|||
|
|||
/** |
|||
* 打印分隔线 |
|||
*/ |
|||
private void printSeparator() { |
|||
System.out.println("=".repeat(80)); |
|||
} |
|||
|
|||
/** |
|||
* 打印高校列表 |
|||
* |
|||
* @param universities 高校列表 |
|||
* @param title 标题 |
|||
*/ |
|||
public void printUniversityList(List<University> universities, String title) { |
|||
printSeparator(); |
|||
System.out.println("【" + title + "】"); |
|||
printSeparator(); |
|||
|
|||
// 表头
|
|||
System.out.printf("%-6s %-20s %-10s %-10s %-6s%n", |
|||
"排名", "学校名称", "省份", "总分", "年份"); |
|||
System.out.println("-".repeat(80)); |
|||
|
|||
// 数据行
|
|||
for (University u : universities) { |
|||
System.out.printf("%-6d %-20s %-10s %-10.2f %-6d%n", |
|||
u.getRank(), |
|||
truncate(u.getName(), 20), |
|||
u.getProvince(), |
|||
u.getScore(), |
|||
u.getYear()); |
|||
} |
|||
|
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* 打印省份统计 |
|||
* |
|||
* @param provinceCount 省份统计 |
|||
* @param title 标题 |
|||
*/ |
|||
public void printProvinceStatistics(Map<String, Long> provinceCount, String title) { |
|||
printSeparator(); |
|||
System.out.println("【" + title + "】"); |
|||
printSeparator(); |
|||
|
|||
System.out.printf("%-15s %-10s%n", "省份", "高校数量"); |
|||
System.out.println("-".repeat(30)); |
|||
|
|||
// 按数量降序排序
|
|||
provinceCount.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Long>comparingByValue().reversed()) |
|||
.forEach(entry -> System.out.printf("%-15s %-10d%n", |
|||
entry.getKey(), entry.getValue())); |
|||
|
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* 打印分数统计 |
|||
* |
|||
* @param statistics 统计信息 |
|||
* @param title 标题 |
|||
*/ |
|||
public void printScoreStatistics(RankAnalyzer.ScoreStatistics statistics, String title) { |
|||
printSeparator(); |
|||
System.out.println("【" + title + "】"); |
|||
printSeparator(); |
|||
|
|||
System.out.printf("高校数量: %d%n", statistics.getCount()); |
|||
System.out.printf("平均分数: %.2f%n", statistics.getAverage()); |
|||
System.out.printf("最高分数: %.2f%n", statistics.getMax()); |
|||
System.out.printf("最低分数: %.2f%n", statistics.getMin()); |
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* 打印排名变化 |
|||
* |
|||
* @param changes 排名变化列表 |
|||
* @param title 标题 |
|||
*/ |
|||
public void printRankChanges(List<RankChange> changes, String title) { |
|||
printSeparator(); |
|||
System.out.println("【" + title + "】"); |
|||
printSeparator(); |
|||
|
|||
System.out.printf("%-20s %-8s %-8s %-12s %-12s%n", |
|||
"学校名称", "起始年", "结束年", "排名变化", "分数变化"); |
|||
System.out.println("-".repeat(80)); |
|||
|
|||
for (RankChange change : changes) { |
|||
String rankChangeStr = change.getRankChange() > 0 ? |
|||
"↑" + change.getRankChange() : |
|||
(change.getRankChange() < 0 ? |
|||
"↓" + Math.abs(change.getRankChange()) : |
|||
"-"); |
|||
|
|||
System.out.printf("%-20s %-8d %-8d %-12s %+.2f%n", |
|||
truncate(change.getUniversityName(), 20), |
|||
change.getStartYear(), |
|||
change.getEndYear(), |
|||
rankChangeStr, |
|||
change.getScoreChange()); |
|||
} |
|||
|
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* 打印高校对比结果 |
|||
* |
|||
* @param comparison 对比结果 |
|||
*/ |
|||
public void printComparison(UniversityComparison comparison) { |
|||
printSeparator(); |
|||
System.out.println("【高校对比分析】"); |
|||
printSeparator(); |
|||
|
|||
System.out.printf("对比年份: %d年%n%n", comparison.getYear()); |
|||
|
|||
System.out.println("学校信息:"); |
|||
System.out.println("-".repeat(50)); |
|||
System.out.printf("%-20s %-10s %-10s%n", "学校", "排名", "分数"); |
|||
System.out.printf("%-20s %-10d %-10.2f%n", |
|||
comparison.getUniversityName1(), |
|||
comparison.getRank1(), |
|||
comparison.getScore1()); |
|||
System.out.printf("%-20s %-10d %-10.2f%n", |
|||
comparison.getUniversityName2(), |
|||
comparison.getRank2(), |
|||
comparison.getScore2()); |
|||
|
|||
System.out.println(); |
|||
System.out.println("对比结果:"); |
|||
System.out.println("-".repeat(50)); |
|||
System.out.printf("排名领先: %s (领先%d位)%n", |
|||
comparison.getHigherRankedUniversity(), |
|||
comparison.getRankGap()); |
|||
System.out.printf("分数差距: %.2f分%n", comparison.getScoreGap()); |
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* 打印历年趋势 |
|||
* |
|||
* @param history 历年数据 |
|||
* @param name 学校名称 |
|||
*/ |
|||
public void printYearlyTrend(List<University> history, String name) { |
|||
printSeparator(); |
|||
System.out.println("【" + name + " 历年排名趋势】"); |
|||
printSeparator(); |
|||
|
|||
System.out.printf("%-8s %-8s %-10s%n", "年份", "排名", "分数"); |
|||
System.out.println("-".repeat(30)); |
|||
|
|||
University previous = null; |
|||
for (University u : history) { |
|||
String trend = ""; |
|||
if (previous != null) { |
|||
int change = previous.getRank() - u.getRank(); |
|||
if (change > 0) { |
|||
trend = "↑" + change; |
|||
} else if (change < 0) { |
|||
trend = "↓" + Math.abs(change); |
|||
} else { |
|||
trend = "-"; |
|||
} |
|||
} |
|||
|
|||
System.out.printf("%-8d %-8d %-10.2f %s%n", |
|||
u.getYear(), u.getRank(), u.getScore(), trend); |
|||
previous = u; |
|||
} |
|||
|
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* 打印菜单 |
|||
*/ |
|||
public void printMenu() { |
|||
printSeparator(); |
|||
System.out.println("【高校排名分析系统】"); |
|||
printSeparator(); |
|||
System.out.println("1. 查看Top N高校排名"); |
|||
System.out.println("2. 按省份查看高校"); |
|||
System.out.println("3. 搜索高校"); |
|||
System.out.println("4. 查看省份分布统计"); |
|||
System.out.println("5. 查看分数统计"); |
|||
System.out.println("6. 查看历年排名变化"); |
|||
System.out.println("7. 对比两所高校"); |
|||
System.out.println("8. 查看某高校历年趋势"); |
|||
System.out.println("9. 生成所有图表"); |
|||
System.out.println("0. 退出系统"); |
|||
printSeparator(); |
|||
System.out.print("请选择功能(0-9): "); |
|||
} |
|||
|
|||
/** |
|||
* 打印欢迎信息 |
|||
*/ |
|||
public void printWelcome() { |
|||
printSeparator(); |
|||
System.out.println(" 欢迎使用高校排名分析系统"); |
|||
System.out.println(" 本系统提供高校排名数据爬取、分析和可视化功能"); |
|||
printSeparator(); |
|||
System.out.println(); |
|||
} |
|||
|
|||
/** |
|||
* 截断字符串 |
|||
* |
|||
* @param str 原字符串 |
|||
* @param length 最大长度 |
|||
* @return 截断后的字符串 |
|||
*/ |
|||
private String truncate(String str, int length) { |
|||
if (str == null) return ""; |
|||
if (str.length() <= length) return str; |
|||
return str.substring(0, length - 3) + "..."; |
|||
} |
|||
} |
|||