Browse Source

移除文件夹

main
jingjiaying 3 weeks ago
parent
commit
982dc35476
  1. 10
      project/.idea/.gitignore
  2. 13
      project/.idea/compiler.xml
  3. 7
      project/.idea/encodings.xml
  4. 20
      project/.idea/jarRepositories.xml
  5. 12
      project/.idea/misc.xml
  6. 6
      project/.idea/vcs.xml
  7. BIN
      project/202506050211-靖佳颖-期末实验报告.docx
  8. BIN
      project/charts/news_time_trend.png
  9. BIN
      project/charts/news_top_words.png
  10. BIN
      project/charts/price_histogram.png
  11. BIN
      project/charts/province_bar.png
  12. BIN
      project/charts/rating_pie.png
  13. BIN
      project/charts/temperature_comparison.png
  14. BIN
      project/charts/temperature_上海.png
  15. BIN
      project/charts/temperature_北京.png
  16. BIN
      project/charts/temperature_广州.png
  17. 64
      project/dependency-reduced-pom.xml
  18. 3602
      project/output/books_20260530_190333.json
  19. 82
      project/output/news_20260530_190333.json
  20. 212
      project/output/university_ranking_20260530_190333.json
  21. 335
      project/output/weather_20260530_190333.json
  22. 96
      project/pom.xml
  23. 14
      project/reports/book_analysis_report.txt
  24. 31
      project/reports/news_analysis_report.txt
  25. 17
      project/reports/ranking_analysis_report.txt
  26. 29
      project/reports/weather_analysis_report.txt
  27. 15
      project/src/main/java/com/example/crawler/Main.java
  28. 229
      project/src/main/java/com/example/crawler/chart/ChartGenerator.java
  29. 60
      project/src/main/java/com/example/crawler/command/BaseCrawlCommand.java
  30. 32
      project/src/main/java/com/example/crawler/command/BookCommand.java
  31. 20
      project/src/main/java/com/example/crawler/command/Command.java
  32. 45
      project/src/main/java/com/example/crawler/command/CrawlAllCommand.java
  33. 104
      project/src/main/java/com/example/crawler/command/CrawlAndAnalyzeAllCommand.java
  34. 32
      project/src/main/java/com/example/crawler/command/CrawlRankingCommand.java
  35. 19
      project/src/main/java/com/example/crawler/command/ExitCommand.java
  36. 77
      project/src/main/java/com/example/crawler/command/GenerateAllAnalysisCommand.java
  37. 32
      project/src/main/java/com/example/crawler/command/NewsCommand.java
  38. 74
      project/src/main/java/com/example/crawler/command/SaveCommand.java
  39. 32
      project/src/main/java/com/example/crawler/command/WeatherCommand.java
  40. 31
      project/src/main/java/com/example/crawler/constant/CrawlerConstants.java
  41. 90
      project/src/main/java/com/example/crawler/controller/CrawlerController.java
  42. 16
      project/src/main/java/com/example/crawler/exception/CrawlException.java
  43. 16
      project/src/main/java/com/example/crawler/exception/DataSaveException.java
  44. 16
      project/src/main/java/com/example/crawler/exception/NetworkException.java
  45. 16
      project/src/main/java/com/example/crawler/exception/ParseException.java
  46. 65
      project/src/main/java/com/example/crawler/model/Book.java
  47. 54
      project/src/main/java/com/example/crawler/model/News.java
  48. 76
      project/src/main/java/com/example/crawler/model/UniversityRank.java
  49. 140
      project/src/main/java/com/example/crawler/model/Weather.java
  50. 75
      project/src/main/java/com/example/crawler/repository/DataRepository.java
  51. 171
      project/src/main/java/com/example/crawler/service/BookAnalysisService.java
  52. 138
      project/src/main/java/com/example/crawler/service/NewsAnalysisService.java
  53. 189
      project/src/main/java/com/example/crawler/service/RankingAnalysisService.java
  54. 163
      project/src/main/java/com/example/crawler/service/WeatherAnalysisService.java
  55. 127
      project/src/main/java/com/example/crawler/strategy/BookCrawlStrategy.java
  56. 27
      project/src/main/java/com/example/crawler/strategy/CrawlStrategy.java
  57. 151
      project/src/main/java/com/example/crawler/strategy/NewsCrawlStrategy.java
  58. 24
      project/src/main/java/com/example/crawler/strategy/StrategyFactory.java
  59. 148
      project/src/main/java/com/example/crawler/strategy/UniversityRankCrawlStrategy.java
  60. 177
      project/src/main/java/com/example/crawler/strategy/WeatherCrawlStrategy.java
  61. 122
      project/src/main/java/com/example/crawler/util/DataCleaner.java
  62. 126
      project/src/main/java/com/example/crawler/util/HttpUtil.java
  63. 95
      project/src/main/java/com/example/crawler/util/JsonUtil.java
  64. 72
      project/src/main/java/com/example/crawler/view/CrawlerView.java
  65. BIN
      project/target/classes/com/example/crawler/Main.class
  66. BIN
      project/target/classes/com/example/crawler/chart/ChartGenerator.class
  67. BIN
      project/target/classes/com/example/crawler/command/BaseCrawlCommand.class
  68. BIN
      project/target/classes/com/example/crawler/command/BookCommand.class
  69. BIN
      project/target/classes/com/example/crawler/command/Command.class
  70. BIN
      project/target/classes/com/example/crawler/command/CrawlAllCommand.class
  71. BIN
      project/target/classes/com/example/crawler/command/CrawlAndAnalyzeAllCommand.class
  72. BIN
      project/target/classes/com/example/crawler/command/CrawlRankingCommand.class
  73. BIN
      project/target/classes/com/example/crawler/command/ExitCommand.class
  74. BIN
      project/target/classes/com/example/crawler/command/GenerateAllAnalysisCommand.class
  75. BIN
      project/target/classes/com/example/crawler/command/NewsCommand.class
  76. BIN
      project/target/classes/com/example/crawler/command/SaveCommand.class
  77. BIN
      project/target/classes/com/example/crawler/command/WeatherCommand.class
  78. BIN
      project/target/classes/com/example/crawler/constant/CrawlerConstants.class
  79. BIN
      project/target/classes/com/example/crawler/controller/CrawlerController.class
  80. BIN
      project/target/classes/com/example/crawler/exception/CrawlException.class
  81. BIN
      project/target/classes/com/example/crawler/exception/DataSaveException.class
  82. BIN
      project/target/classes/com/example/crawler/exception/NetworkException.class
  83. BIN
      project/target/classes/com/example/crawler/exception/ParseException.class
  84. BIN
      project/target/classes/com/example/crawler/model/Book.class
  85. BIN
      project/target/classes/com/example/crawler/model/News.class
  86. BIN
      project/target/classes/com/example/crawler/model/UniversityRank.class
  87. BIN
      project/target/classes/com/example/crawler/model/Weather.class
  88. BIN
      project/target/classes/com/example/crawler/repository/DataRepository.class
  89. BIN
      project/target/classes/com/example/crawler/service/BookAnalysisService.class
  90. BIN
      project/target/classes/com/example/crawler/service/NewsAnalysisService.class
  91. BIN
      project/target/classes/com/example/crawler/service/RankingAnalysisService.class
  92. BIN
      project/target/classes/com/example/crawler/service/WeatherAnalysisService.class
  93. BIN
      project/target/classes/com/example/crawler/strategy/BookCrawlStrategy.class
  94. BIN
      project/target/classes/com/example/crawler/strategy/CrawlStrategy.class
  95. BIN
      project/target/classes/com/example/crawler/strategy/NewsCrawlStrategy.class
  96. BIN
      project/target/classes/com/example/crawler/strategy/StrategyFactory.class
  97. BIN
      project/target/classes/com/example/crawler/strategy/UniversityRankCrawlStrategy.class
  98. BIN
      project/target/classes/com/example/crawler/strategy/WeatherCrawlStrategy.class
  99. BIN
      project/target/classes/com/example/crawler/util/DataCleaner.class
  100. BIN
      project/target/classes/com/example/crawler/util/HttpUtil.class

10
project/.idea/.gitignore

@ -1,10 +0,0 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 已忽略包含查询文件的默认文件夹
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/

13
project/.idea/compiler.xml

@ -1,13 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<annotationProcessing>
<profile name="Maven default annotation processors profile" enabled="true">
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="crawler-project" />
</profile>
</annotationProcessing>
</component>
</project>

7
project/.idea/encodings.xml

@ -1,7 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
</component>
</project>

20
project/.idea/jarRepositories.xml

@ -1,20 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RemoteRepositoriesConfiguration">
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://repo.maven.apache.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Maven Central repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="jboss.community" />
<option name="name" value="JBoss Community repository" />
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
</remote-repository>
</component>
</project>

12
project/.idea/misc.xml

@ -1,12 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="21" project-jdk-type="JavaSDK" />
</project>

6
project/.idea/vcs.xml

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>

BIN
project/202506050211-靖佳颖-期末实验报告.docx

Binary file not shown.

BIN
project/charts/news_time_trend.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 18 KiB

BIN
project/charts/news_top_words.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 35 KiB

BIN
project/charts/price_histogram.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 26 KiB

BIN
project/charts/province_bar.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 22 KiB

BIN
project/charts/rating_pie.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 24 KiB

BIN
project/charts/temperature_comparison.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 44 KiB

BIN
project/charts/temperature_上海.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

BIN
project/charts/temperature_北京.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 32 KiB

BIN
project/charts/temperature_广州.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 30 KiB

64
project/dependency-reduced-pom.xml

@ -1,64 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>crawler-project</artifactId>
<name>crawler-project</name>
<version>1.0.0</version>
<description>Java爬虫项目 - MVC + Command + Strategy模式</description>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>3.5.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer>
<mainClass>com.example.crawler.Main</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>hamcrest-core</artifactId>
<groupId>org.hamcrest</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<properties>
<java.version>11</java.version>
<maven.compiler.source>11</maven.compiler.source>
<jsoup.version>1.17.2</jsoup.version>
<jfreechart.version>1.5.3</jfreechart.version>
<maven.compiler.target>11</maven.compiler.target>
<gson.version>2.10.1</gson.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
</project>

3602
project/output/books_20260530_190333.json

File diff suppressed because it is too large

82
project/output/news_20260530_190333.json

@ -1,82 +0,0 @@
[
{
"title": "专栏",
"publishTime": "",
"url": "http://zhuanlan.sina.com.cn/"
},
{
"title": "导航",
"publishTime": "",
"url": "http://news.sina.com.cn/guide/"
},
{
"title": "新浪财经",
"publishTime": "",
"url": "https://finance.sina.com.cn/mobile/comfinanceweb.shtml"
},
{
"title": "新浪博客",
"publishTime": "",
"url": "https://blog.sina.com.cn/lm/z/app/"
},
{
"title": "我的收藏",
"publishTime": "",
"url": "http://my.sina.com.cn/#location=fav"
},
{
"title": "注册",
"publishTime": "",
"url": "https://login.sina.com.cn/signup/signup?entry=news"
},
{
"title": "新闻中心",
"publishTime": "",
"url": "http://news.sina.com.cn/"
},
{
"title": "新闻排行",
"publishTime": "",
"url": "http://news.sina.com.cn/hotnews/"
},
{
"title": "联系我们",
"publishTime": "",
"url": "http://www.sina.com.cn/contactus.html"
},
{
"title": "广告服务",
"publishTime": "",
"url": "http://emarketing.sina.com.cn/"
},
{
"title": "通行证注册",
"publishTime": "",
"url": "http://login.sina.com.cn/signup/signup"
},
{
"title": "产品答疑",
"publishTime": "",
"url": "http://help.sina.com.cn/"
},
{
"title": "招聘信息",
"publishTime": "",
"url": "http://career.sina.com.cn/"
},
{
"title": "网站律师",
"publishTime": "",
"url": "http://corp.sina.com.cn/lawfirm/sina.htm"
},
{
"title": "版权所有",
"publishTime": "",
"url": "https://corp.sina.com.cn/chn/copyright.html"
},
{
"title": "意见反馈",
"publishTime": "",
"url": "http://news.sina.com.cn/feedback/post.html"
}
]

212
project/output/university_ranking_20260530_190333.json

@ -1,212 +0,0 @@
[
{
"rank": 1,
"universityName": "清华大学 Tsinghua University 双一流/985/211",
"totalScore": "综合",
"province": "北京",
"category": ""
},
{
"rank": 2,
"universityName": "北京大学 Peking University 双一流/985/211",
"totalScore": "综合",
"province": "北京",
"category": ""
},
{
"rank": 3,
"universityName": "浙江大学 Zhejiang University 双一流/985/211",
"totalScore": "综合",
"province": "浙江",
"category": ""
},
{
"rank": 4,
"universityName": "上海交通大学 Shanghai Jiao Tong University 双一流/985/211",
"totalScore": "综合",
"province": "上海",
"category": ""
},
{
"rank": 5,
"universityName": "复旦大学 Fudan University 双一流/985/211",
"totalScore": "综合",
"province": "上海",
"category": ""
},
{
"rank": 6,
"universityName": "南京大学 Nanjing University 双一流/985/211",
"totalScore": "综合",
"province": "江苏",
"category": ""
},
{
"rank": 7,
"universityName": "中国科学技术大学 University of Science and Technology of China 双一流/985/211",
"totalScore": "理工",
"province": "安徽",
"category": ""
},
{
"rank": 8,
"universityName": "武汉大学 Wuhan University 双一流/985/211",
"totalScore": "综合",
"province": "湖北",
"category": ""
},
{
"rank": 9,
"universityName": "华中科技大学 Huazhong University of Science and Technology 双一流/985/211",
"totalScore": "综合",
"province": "湖北",
"category": ""
},
{
"rank": 10,
"universityName": "西安交通大学 Xi'an Jiaotong University 双一流/985/211",
"totalScore": "综合",
"province": "陕西",
"category": ""
},
{
"rank": 11,
"universityName": "北京航空航天大学 Beihang University 双一流/985/211",
"totalScore": "理工",
"province": "北京",
"category": ""
},
{
"rank": 12,
"universityName": "中山大学 Sun Yat-sen University 双一流/985/211",
"totalScore": "综合",
"province": "广东",
"category": ""
},
{
"rank": 13,
"universityName": "北京理工大学 Beijing Institute of Technology 双一流/985/211",
"totalScore": "理工",
"province": "北京",
"category": ""
},
{
"rank": 14,
"universityName": "哈尔滨工业大学 Harbin Institute of Technology 双一流/985/211",
"totalScore": "理工",
"province": "黑龙江",
"category": ""
},
{
"rank": 15,
"universityName": "四川大学 Sichuan University 双一流/985/211",
"totalScore": "综合",
"province": "四川",
"category": ""
},
{
"rank": 16,
"universityName": "东南大学 Southeast University 双一流/985/211",
"totalScore": "综合",
"province": "江苏",
"category": ""
},
{
"rank": 17,
"universityName": "中国人民大学 Renmin University of China 双一流/985/211",
"totalScore": "综合",
"province": "北京",
"category": ""
},
{
"rank": 18,
"universityName": "同济大学 Tongji University 双一流/985/211",
"totalScore": "综合",
"province": "上海",
"category": ""
},
{
"rank": 19,
"universityName": "北京师范大学 Beijing Normal University 双一流/985/211",
"totalScore": "师范",
"province": "北京",
"category": ""
},
{
"rank": 20,
"universityName": "天津大学 Tianjin University 双一流/985/211",
"totalScore": "理工",
"province": "天津",
"category": ""
},
{
"rank": 21,
"universityName": "西北工业大学 Northwestern Polytechnical University 双一流/985/211",
"totalScore": "理工",
"province": "陕西",
"category": ""
},
{
"rank": 22,
"universityName": "山东大学 Shandong University 双一流/985/211",
"totalScore": "综合",
"province": "山东",
"category": ""
},
{
"rank": 23,
"universityName": "南开大学 Nankai University 双一流/985/211",
"totalScore": "综合",
"province": "天津",
"category": ""
},
{
"rank": 24,
"universityName": "厦门大学 Xiamen University 双一流/985/211",
"totalScore": "综合",
"province": "福建",
"category": ""
},
{
"rank": 25,
"universityName": "中国农业大学 China Agricultural University 双一流/985/211",
"totalScore": "农业",
"province": "北京",
"category": ""
},
{
"rank": 26,
"universityName": "吉林大学 Jilin University 双一流/985/211",
"totalScore": "综合",
"province": "吉林",
"category": ""
},
{
"rank": 27,
"universityName": "中南大学 Central South University 双一流/985/211",
"totalScore": "综合",
"province": "湖南",
"category": ""
},
{
"rank": 28,
"universityName": "大连理工大学 Dalian University of Technology 双一流/985/211",
"totalScore": "理工",
"province": "辽宁",
"category": ""
},
{
"rank": 29,
"universityName": "湖南大学 Hunan University 双一流/985/211",
"totalScore": "综合",
"province": "湖南",
"category": ""
},
{
"rank": 30,
"universityName": "华东师范大学 East China Normal University 双一流/985/211",
"totalScore": "师范",
"province": "上海",
"category": ""
}
]

335
project/output/weather_20260530_190333.json

@ -1,335 +0,0 @@
[
{
"cityName": "上海",
"temperature": 22.7,
"humidity": 83.0,
"windSpeed": 7.8,
"weatherCode": "3",
"hourlyTimes": [
"00:00",
"01:00",
"02:00",
"03:00",
"04:00",
"05:00",
"06:00",
"07:00",
"08:00",
"09:00",
"10:00",
"11:00",
"12:00",
"13:00",
"14:00",
"15:00",
"16:00",
"17:00",
"18:00",
"19:00",
"20:00",
"21:00",
"22:00",
"23:00"
],
"hourlyTemperatures": [
19.2,
19.0,
18.9,
18.3,
18.1,
17.8,
18.7,
20.9,
23.5,
24.9,
26.2,
27.0,
27.5,
28.1,
28.2,
27.4,
26.7,
25.0,
23.8,
22.7,
22.0,
20.6,
19.9,
19.4
],
"hourlyHumidities": [
83,
84,
85,
87,
89,
92,
90,
79,
55,
43,
38,
34,
33,
31,
30,
32,
35,
45,
54,
63,
67,
73,
76,
78
],
"hourlyWindSpeeds": [
3.8,
3.3,
2.6,
1.9,
1.0,
0.6,
2.3,
0.6,
1.8,
2.7,
3.0,
3.5,
5.4,
5.4,
6.0,
7.8,
9.2,
9.0,
8.1,
7.8,
7.2,
7.1,
7.1,
7.1
]
},
{
"cityName": "广州",
"temperature": 25.9,
"humidity": 85.0,
"windSpeed": 5.3,
"weatherCode": "81",
"hourlyTimes": [
"00:00",
"01:00",
"02:00",
"03:00",
"04:00",
"05:00",
"06:00",
"07:00",
"08:00",
"09:00",
"10:00",
"11:00",
"12:00",
"13:00",
"14:00",
"15:00",
"16:00",
"17:00",
"18:00",
"19:00",
"20:00",
"21:00",
"22:00",
"23:00"
],
"hourlyTemperatures": [
27.7,
27.2,
26.0,
25.5,
25.4,
25.0,
25.0,
26.0,
28.1,
29.3,
30.6,
31.9,
33.0,
33.8,
33.9,
33.6,
34.2,
30.5,
29.4,
25.9,
26.4,
26.5,
26.3,
26.2
],
"hourlyHumidities": [
85,
87,
82,
84,
85,
90,
92,
87,
76,
70,
63,
57,
54,
53,
53,
54,
51,
69,
72,
95,
97,
96,
98,
98
],
"hourlyWindSpeeds": [
5.8,
4.9,
4.4,
3.3,
3.4,
3.8,
4.1,
5.6,
4.0,
3.8,
4.0,
2.8,
1.3,
3.3,
5.1,
5.2,
5.1,
12.3,
3.1,
5.3,
3.6,
1.7,
2.0,
1.4
]
},
{
"cityName": "北京",
"temperature": 32.3,
"humidity": 56.0,
"windSpeed": 17.1,
"weatherCode": "0",
"hourlyTimes": [
"00:00",
"01:00",
"02:00",
"03:00",
"04:00",
"05:00",
"06:00",
"07:00",
"08:00",
"09:00",
"10:00",
"11:00",
"12:00",
"13:00",
"14:00",
"15:00",
"16:00",
"17:00",
"18:00",
"19:00",
"20:00",
"21:00",
"22:00",
"23:00"
],
"hourlyTemperatures": [
22.8,
21.9,
21.2,
20.1,
19.6,
18.8,
19.2,
20.7,
23.7,
27.0,
29.9,
32.5,
34.5,
35.8,
36.3,
36.6,
36.2,
35.7,
34.2,
32.3,
30.9,
29.9,
29.1,
28.6
],
"hourlyHumidities": [
56,
60,
63,
69,
71,
75,
74,
67,
57,
45,
37,
28,
21,
18,
20,
21,
26,
26,
30,
33,
35,
36,
35,
34
],
"hourlyWindSpeeds": [
11.6,
10.6,
7.6,
4.5,
3.9,
2.3,
2.3,
0.6,
0.8,
2.2,
2.4,
4.9,
7.6,
10.4,
12.2,
13.4,
14.7,
15.1,
14.5,
17.1,
16.9,
18.1,
19.7,
20.1
]
}
]

96
project/pom.xml

@ -1,96 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>crawler-project</artifactId>
<version>1.0.0</version>
<name>crawler-project</name>
<description>Java爬虫项目 - MVC + Command + Strategy模式</description>
<properties>
<java.version>11</java.version>
<jsoup.version>1.17.2</jsoup.version>
<gson.version>2.10.1</gson.version>
<jfreechart.version>1.5.3</jfreechart.version>
<logback.version>1.4.14</logback.version>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!-- Jsoup HTML解析 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>${jsoup.version}</version>
</dependency>
<!-- Gson JSON处理 -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>${gson.version}</version>
</dependency>
<!-- JFreeChart 图表生成 -->
<dependency>
<groupId>org.jfree</groupId>
<artifactId>jfreechart</artifactId>
<version>${jfreechart.version}</version>
</dependency>
<!-- Logback 日志框架 -->
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>${logback.version}</version>
</dependency>
<!-- 测试依赖 -->
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.5.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.example.crawler.Main</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

14
project/reports/book_analysis_report.txt

@ -1,14 +0,0 @@
========== 书籍数据分析报告 ==========
生成时间: 2026-05-30T17:47:42.026682900
分析书籍总数: 600
【价格统计】
最高价: £59.92
最低价: £10.01
平均价: £35.29
【库存统计】
有库存: 600 本
缺货: 0 本
报告生成完成

31
project/reports/news_analysis_report.txt

@ -1,31 +0,0 @@
========== 新闻数据分析报告 ==========
生成时间: 2026-05-30T17:47:42.145591
分析新闻总数: 16
【发布时间分布】
00:00 - 01:00: 0 条
01:00 - 02:00: 0 条
02:00 - 03:00: 0 条
03:00 - 04:00: 0 条
04:00 - 05:00: 0 条
05:00 - 06:00: 0 条
06:00 - 07:00: 0 条
07:00 - 08:00: 0 条
08:00 - 09:00: 0 条
09:00 - 10:00: 0 条
10:00 - 11:00: 0 条
11:00 - 12:00: 0 条
12:00 - 13:00: 0 条
13:00 - 14:00: 0 条
14:00 - 15:00: 0 条
15:00 - 16:00: 0 条
16:00 - 17:00: 0 条
17:00 - 18:00: 16 条
18:00 - 19:00: 0 条
19:00 - 20:00: 0 条
20:00 - 21:00: 0 条
21:00 - 22:00: 0 条
22:00 - 23:00: 0 条
23:00 - 00:00: 0 条
报告生成完成

17
project/reports/ranking_analysis_report.txt

@ -1,17 +0,0 @@
========== 大学排名数据分析报告 ==========
生成时间: 2026-05-30T17:47:42.272388
分析大学总数: 30
【省份排行榜 TOP 10】
北京: 7 所大学
上海: 4 所大学
湖北: 2 所大学
湖南: 2 所大学
天津: 2 所大学
陕西: 2 所大学
江苏: 2 所大学
山东: 1 所大学
福建: 1 所大学
吉林: 1 所大学
报告生成完成

29
project/reports/weather_analysis_report.txt

@ -1,29 +0,0 @@
========== 天气数据分析报告 ==========
生成时间: 2026-05-30T17:47:42.585539200
分析城市数量: 3
数据来源: Open-Meteo API (CC BY 4.0)
【多城市天气对比】
城市: 上海
当前温度: 24.0°C
当前湿度: 83%
风速: 8.3 km/h
天气: 多云
24小时平均温度: 22.7°C
城市: 广州
当前温度: 29.8°C
当前湿度: 85%
风速: 2.4 km/h
天气: 小毛毛雨
24小时平均温度: 28.6°C
城市: 北京
当前温度: 34.6°C
当前湿度: 56%
风速: 14.4 km/h
天气: 晴
24小时平均温度: 28.2°C
报告生成完成

15
project/src/main/java/com/example/crawler/Main.java

@ -1,15 +0,0 @@
package com.example.crawler;
import com.example.crawler.controller.CrawlerController;
/**
* 爬虫项目主入口类
*/
public class Main {
public static void main(String[] args) {
// 创建控制器并启动CLI界面
CrawlerController controller = new CrawlerController();
controller.start();
}
}

229
project/src/main/java/com/example/crawler/chart/ChartGenerator.java

@ -1,229 +0,0 @@
package com.example.crawler.chart;
import java.awt.Color;
import java.awt.Font;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.example.crawler.constant.CrawlerConstants;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.axis.CategoryAxis;
import org.jfree.chart.axis.NumberAxis;
import org.jfree.chart.plot.CategoryPlot;
import org.jfree.chart.plot.PiePlot;
import org.jfree.chart.plot.XYPlot;
import org.jfree.chart.renderer.category.BarRenderer;
import org.jfree.chart.renderer.category.LineAndShapeRenderer;
import org.jfree.data.category.DefaultCategoryDataset;
import org.jfree.data.general.DefaultPieDataset;
import org.jfree.data.xy.XYDataset;
import org.jfree.data.xy.XYSeries;
import org.jfree.data.xy.XYSeriesCollection;
public class ChartGenerator {
static {
File dir = new File(CrawlerConstants.CHARTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public static void generatePriceHistogram(Map<String, Integer> priceDistribution, String fileName) {
DefaultCategoryDataset dataset = createCategoryDataset(priceDistribution);
JFreeChart chart = ChartFactory.createBarChart(
"书籍价格分布",
"价格区间(£)",
"书籍数量",
dataset
);
customizeBarChart(chart);
saveChart(chart, fileName);
}
public static void generateRatingPieChart(Map<String, Integer> ratingDistribution, String fileName) {
DefaultPieDataset<String> dataset = new DefaultPieDataset<>();
for (Map.Entry<String, Integer> entry : ratingDistribution.entrySet()) {
dataset.setValue(entry.getKey(), entry.getValue());
}
JFreeChart chart = ChartFactory.createPieChart(
"书籍评分分布",
dataset,
true,
true,
false
);
customizePieChart(chart);
saveChart(chart, fileName);
}
public static void generateNewsTimeTrend(Map<Integer, Integer> hourDistribution, String fileName) {
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
for (int i = 0; i < 24; i++) {
int count = hourDistribution.getOrDefault(i, 0);
dataset.addValue(count, "新闻数量", String.format("%02d:00", i));
}
JFreeChart chart = ChartFactory.createLineChart(
"新闻发布时间分布",
"小时",
"新闻数量",
dataset
);
customizeLineChart(chart);
saveChart(chart, fileName);
}
public static void generateWordFrequencyBarChart(Map<String, Integer> wordFrequency, String fileName) {
Map<String, Integer> top10 = wordFrequency.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(10)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
for (Map.Entry<String, Integer> entry : top10.entrySet()) {
dataset.addValue(entry.getValue(), "词频", entry.getKey());
}
JFreeChart chart = ChartFactory.createBarChart(
"新闻高频词 TOP 10",
"关键词",
"出现次数",
dataset
);
customizeBarChart(chart);
saveChart(chart, fileName);
}
public static void generateProvinceBarChart(Map<String, Integer> provinceDistribution, String fileName) {
Map<String, Integer> top10 = provinceDistribution.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(10)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
DefaultCategoryDataset dataset = createCategoryDataset(top10);
JFreeChart chart = ChartFactory.createBarChart(
"各省上榜大学数量 TOP 10",
"省份",
"大学数量",
dataset
);
customizeBarChart(chart);
saveChart(chart, fileName);
}
public static void generateScoreHistogram(Map<String, Integer> scoreDistribution, String fileName) {
DefaultCategoryDataset dataset = createCategoryDataset(scoreDistribution);
JFreeChart chart = ChartFactory.createBarChart(
"大学总分分布",
"分数区间",
"大学数量",
dataset
);
customizeBarChart(chart);
saveChart(chart, fileName);
}
public static void generateTemperatureTrend(List<String> times, List<Double> temperatures, String cityName, String fileName) {
XYSeries series = new XYSeries(cityName);
for (int i = 0; i < Math.min(times.size(), temperatures.size()); i++) {
series.add(i, temperatures.get(i));
}
XYDataset dataset = new XYSeriesCollection(series);
JFreeChart chart = ChartFactory.createXYLineChart(
cityName + " 未来24小时温度变化",
"小时",
"温度(°C)",
dataset
);
customizeXYLineChart(chart);
saveChart(chart, fileName);
}
public static void generateMultiCityTemperatureComparison(Map<String, List<Double>> cityTemperatures, String fileName) {
XYSeriesCollection dataset = new XYSeriesCollection();
for (Map.Entry<String, List<Double>> entry : cityTemperatures.entrySet()) {
XYSeries series = new XYSeries(entry.getKey());
List<Double> temps = entry.getValue();
for (int i = 0; i < Math.min(temps.size(), 24); i++) {
series.add(i, temps.get(i));
}
dataset.addSeries(series);
}
JFreeChart chart = ChartFactory.createXYLineChart(
"多城市未来24小时温度对比",
"小时",
"温度(°C)",
dataset
);
customizeXYLineChart(chart);
saveChart(chart, fileName);
}
private static DefaultCategoryDataset createCategoryDataset(Map<String, Integer> data) {
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
for (Map.Entry<String, Integer> entry : data.entrySet()) {
dataset.addValue(entry.getValue(), "数值", entry.getKey());
}
return dataset;
}
private static void customizeBarChart(JFreeChart chart) {
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16));
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
CategoryPlot plot = chart.getCategoryPlot();
CategoryAxis domainAxis = plot.getDomainAxis();
domainAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
domainAxis.setTickLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 10));
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis();
rangeAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
BarRenderer renderer = (BarRenderer) plot.getRenderer();
renderer.setSeriesPaint(0, new Color(79, 129, 189));
}
private static void customizePieChart(JFreeChart chart) {
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16));
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
PiePlot plot = (PiePlot) chart.getPlot();
plot.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
}
private static void customizeLineChart(JFreeChart chart) {
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16));
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
CategoryPlot plot = chart.getCategoryPlot();
LineAndShapeRenderer renderer = (LineAndShapeRenderer) plot.getRenderer();
renderer.setSeriesPaint(0, new Color(79, 129, 189));
}
private static void customizeXYLineChart(JFreeChart chart) {
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16));
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
XYPlot plot = chart.getXYPlot();
NumberAxis xAxis = (NumberAxis) plot.getDomainAxis();
xAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
NumberAxis yAxis = (NumberAxis) plot.getRangeAxis();
yAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
}
private static void saveChart(JFreeChart chart, String fileName) {
try {
File file = new File(CrawlerConstants.CHARTS_DIR, fileName);
ChartUtils.saveChartAsPNG(file, chart, 800, 500);
System.out.println("图表已保存: " + file.getAbsolutePath());
} catch (IOException e) {
System.err.println("保存图表失败: " + e.getMessage());
}
}
}

60
project/src/main/java/com/example/crawler/command/BaseCrawlCommand.java

@ -1,60 +0,0 @@
package com.example.crawler.command;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.CrawlStrategy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class BaseCrawlCommand implements Command {
protected static final Logger logger = LoggerFactory.getLogger(BaseCrawlCommand.class);
protected DataRepository repository;
protected int maxRetries;
protected long retryDelayMs;
public BaseCrawlCommand(DataRepository repository) {
this.repository = repository;
this.maxRetries = CrawlerConstants.MAX_RETRIES;
this.retryDelayMs = 2000;
}
protected abstract CrawlStrategy<?> getStrategy();
protected abstract void saveToRepository(Object data);
@Override
public void execute() {
try {
Object data = crawlWithRetry();
saveToRepository(data);
logger.info("Crawling completed and saved to repository");
} catch (Exception e) {
logger.error("Crawling failed", e);
System.err.println("爬取失败: " + e.getMessage());
}
}
protected Object crawlWithRetry() throws Exception {
int attempts = 0;
while (attempts < maxRetries) {
try {
CrawlStrategy<?> strategy = getStrategy();
return strategy.crawl();
} catch (NetworkException e) {
attempts++;
if (attempts < maxRetries) {
logger.warn("Network error, retrying in {}ms (attempt {}/{})", retryDelayMs, attempts, maxRetries);
Thread.sleep(retryDelayMs);
} else {
logger.error("Max retries reached, giving up");
throw e;
}
}
}
throw new CrawlException("Max retries exceeded");
}
}

32
project/src/main/java/com/example/crawler/command/BookCommand.java

@ -1,32 +0,0 @@
package com.example.crawler.command;
import com.example.crawler.model.Book;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.BookCrawlStrategy;
import com.example.crawler.strategy.CrawlStrategy;
import java.util.List;
public class BookCommand extends BaseCrawlCommand {
public BookCommand(DataRepository repository) {
super(repository);
}
@Override
protected CrawlStrategy<?> getStrategy() {
return new BookCrawlStrategy();
}
@Override
@SuppressWarnings("unchecked")
protected void saveToRepository(Object data) {
repository.saveBooks((List<Book>) data);
System.out.println("成功爬取 " + ((List<Book>) data).size() + " 本书籍信息");
}
@Override
public String getName() {
return "爬取书籍信息";
}
}

20
project/src/main/java/com/example/crawler/command/Command.java

@ -1,20 +0,0 @@
package com.example.crawler.command;
/**
* 命令接口
* 定义命令执行的标准方法实现Command模式
*/
public interface Command {
/**
* 执行命令
*/
void execute();
/**
* 获取命令名称
*
* @return 命令名称
*/
String getName();
}

45
project/src/main/java/com/example/crawler/command/CrawlAllCommand.java

@ -1,45 +0,0 @@
package com.example.crawler.command;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.repository.DataRepository;
public class CrawlAllCommand implements Command {
private final DataRepository repository;
private final CrawlerController controller;
public CrawlAllCommand(CrawlerController controller) {
this.controller = controller;
this.repository = controller.getRepository();
}
@Override
public void execute() {
System.out.println("\n=== 开始爬取全部数据源 ===");
Command[] commands = {
new BookCommand(repository),
new NewsCommand(repository),
new CrawlRankingCommand(repository),
new WeatherCommand(repository)
};
for (Command command : commands) {
command.execute();
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
new SaveCommand(controller).execute();
System.out.println("\n=== 全部数据爬取完成 ===");
}
@Override
public String getName() {
return "爬取全部数据并保存";
}
}

104
project/src/main/java/com/example/crawler/command/CrawlAndAnalyzeAllCommand.java

@ -1,104 +0,0 @@
package com.example.crawler.command;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.service.BookAnalysisService;
import com.example.crawler.service.NewsAnalysisService;
import com.example.crawler.service.RankingAnalysisService;
import com.example.crawler.service.WeatherAnalysisService;
public class CrawlAndAnalyzeAllCommand implements Command {
private final DataRepository repository;
private final CrawlerController controller;
public CrawlAndAnalyzeAllCommand(CrawlerController controller) {
this.controller = controller;
this.repository = controller.getRepository();
}
@Override
public void execute() {
System.out.println("\n========== 爬取全部数据并生成分析 ==========\n");
System.out.println("第1步:爬取书籍信息...");
try {
BookCommand bookCommand = new BookCommand(repository);
bookCommand.execute();
} catch (Exception e) {
System.err.println("书籍爬取失败: " + e.getMessage());
}
System.out.println("\n第2步:爬取新闻信息...");
try {
NewsCommand newsCommand = new NewsCommand(repository);
newsCommand.execute();
} catch (Exception e) {
System.err.println("新闻爬取失败: " + e.getMessage());
}
System.out.println("\n第3步:爬取大学排名...");
try {
CrawlRankingCommand rankingCommand = new CrawlRankingCommand(repository);
rankingCommand.execute();
} catch (Exception e) {
System.err.println("大学排名爬取失败: " + e.getMessage());
}
System.out.println("\n第4步:爬取天气数据...");
try {
WeatherCommand weatherCommand = new WeatherCommand(repository);
weatherCommand.execute();
} catch (Exception e) {
System.err.println("天气数据爬取失败: " + e.getMessage());
}
System.out.println("\n========== 数据爬取完成,开始分析 ==========\n");
try {
BookAnalysisService bookService = new BookAnalysisService();
if (!repository.getBooks().isEmpty()) {
bookService.analyze(repository.getBooks());
}
} catch (Exception e) {
System.err.println("书籍分析失败: " + e.getMessage());
}
try {
NewsAnalysisService newsService = new NewsAnalysisService();
if (!repository.getNewsList().isEmpty()) {
newsService.analyze(repository.getNewsList());
}
} catch (Exception e) {
System.err.println("新闻分析失败: " + e.getMessage());
}
try {
RankingAnalysisService rankingService = new RankingAnalysisService();
if (!repository.getRankings().isEmpty()) {
rankingService.analyze(repository.getRankings());
}
} catch (Exception e) {
System.err.println("大学排名分析失败: " + e.getMessage());
}
try {
WeatherAnalysisService weatherService = new WeatherAnalysisService();
if (!repository.getWeatherList().isEmpty()) {
weatherService.analyze(repository.getWeatherList());
}
} catch (Exception e) {
System.err.println("天气分析失败: " + e.getMessage());
}
System.out.println("\n========== 全部完成 ==========");
System.out.println("原始数据已保存到 output/ 目录");
System.out.println("分析报告已保存到 reports/ 目录");
System.out.println("图表已保存到 charts/ 目录");
}
@Override
public String getName() {
return "爬取并分析全部数据";
}
}

32
project/src/main/java/com/example/crawler/command/CrawlRankingCommand.java

@ -1,32 +0,0 @@
package com.example.crawler.command;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.CrawlStrategy;
import com.example.crawler.strategy.UniversityRankCrawlStrategy;
import java.util.List;
public class CrawlRankingCommand extends BaseCrawlCommand {
public CrawlRankingCommand(DataRepository repository) {
super(repository);
}
@Override
protected CrawlStrategy<?> getStrategy() {
return new UniversityRankCrawlStrategy();
}
@Override
@SuppressWarnings("unchecked")
protected void saveToRepository(Object data) {
repository.saveRankings((List<UniversityRank>) data);
System.out.println("成功爬取 " + ((List<UniversityRank>) data).size() + " 条大学排名数据");
}
@Override
public String getName() {
return "爬取软科中国大学排名";
}
}

19
project/src/main/java/com/example/crawler/command/ExitCommand.java

@ -1,19 +0,0 @@
package com.example.crawler.command;
/**
* 退出命令
* // Command模式:退出命令
*/
public class ExitCommand implements Command {
@Override
public void execute() {
System.out.println("\n=== 感谢使用数据爬取系统 ===");
System.exit(0);
}
@Override
public String getName() {
return "退出";
}
}

77
project/src/main/java/com/example/crawler/command/GenerateAllAnalysisCommand.java

@ -1,77 +0,0 @@
package com.example.crawler.command;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.service.BookAnalysisService;
import com.example.crawler.service.NewsAnalysisService;
import com.example.crawler.service.RankingAnalysisService;
import com.example.crawler.service.WeatherAnalysisService;
public class GenerateAllAnalysisCommand implements Command {
private final DataRepository repository;
private final CrawlerController controller;
public GenerateAllAnalysisCommand(CrawlerController controller) {
this.controller = controller;
this.repository = controller.getRepository();
}
@Override
public void execute() {
System.out.println("\n========== 生成所有数据源分析报告 ==========\n");
try {
BookAnalysisService bookService = new BookAnalysisService();
if (!repository.getBooks().isEmpty()) {
bookService.analyze(repository.getBooks());
} else {
System.out.println("没有书籍数据,跳过书籍分析");
}
} catch (Exception e) {
System.err.println("书籍分析失败: " + e.getMessage());
}
try {
NewsAnalysisService newsService = new NewsAnalysisService();
if (!repository.getNewsList().isEmpty()) {
newsService.analyze(repository.getNewsList());
} else {
System.out.println("没有新闻数据,跳过新闻分析");
}
} catch (Exception e) {
System.err.println("新闻分析失败: " + e.getMessage());
}
try {
RankingAnalysisService rankingService = new RankingAnalysisService();
if (!repository.getRankings().isEmpty()) {
rankingService.analyze(repository.getRankings());
} else {
System.out.println("没有大学排名数据,跳过排名分析");
}
} catch (Exception e) {
System.err.println("大学排名分析失败: " + e.getMessage());
}
try {
WeatherAnalysisService weatherService = new WeatherAnalysisService();
if (!repository.getWeatherList().isEmpty()) {
weatherService.analyze(repository.getWeatherList());
} else {
System.out.println("没有天气数据,跳过天气分析");
}
} catch (Exception e) {
System.err.println("天气分析失败: " + e.getMessage());
}
System.out.println("\n========== 分析完成 ==========");
System.out.println("报告已保存到 reports/ 目录");
System.out.println("图表已保存到 charts/ 目录");
}
@Override
public String getName() {
return "生成所有分析报告";
}
}

32
project/src/main/java/com/example/crawler/command/NewsCommand.java

@ -1,32 +0,0 @@
package com.example.crawler.command;
import com.example.crawler.model.News;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.CrawlStrategy;
import com.example.crawler.strategy.NewsCrawlStrategy;
import java.util.List;
public class NewsCommand extends BaseCrawlCommand {
public NewsCommand(DataRepository repository) {
super(repository);
}
@Override
protected CrawlStrategy<?> getStrategy() {
return new NewsCrawlStrategy();
}
@Override
@SuppressWarnings("unchecked")
protected void saveToRepository(Object data) {
repository.saveNewsList((List<News>) data);
System.out.println("成功爬取 " + ((List<News>) data).size() + " 条新闻");
}
@Override
public String getName() {
return "爬取新浪国内新闻";
}
}

74
project/src/main/java/com/example/crawler/command/SaveCommand.java

@ -1,74 +0,0 @@
package com.example.crawler.command;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.model.Book;
import com.example.crawler.model.News;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.model.Weather;
import com.example.crawler.util.JsonUtil;
public class SaveCommand implements Command {
private final CrawlerController controller;
public SaveCommand(CrawlerController controller) {
this.controller = controller;
}
@Override
public void execute() {
System.out.println("\n=== 开始保存数据 ===");
try {
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
// 保存书籍数据
List<Book> books = controller.getBooks();
if (books != null && !books.isEmpty()) {
String bookFileName = CrawlerConstants.OUTPUT_DIR + "/books_" + timestamp + ".json";
JsonUtil.saveListToJsonFile(books, bookFileName);
System.out.println("书籍数据已保存到: " + bookFileName);
}
// 保存新闻数据
List<News> newsList = controller.getNewsList();
if (newsList != null && !newsList.isEmpty()) {
String newsFileName = CrawlerConstants.OUTPUT_DIR + "/news_" + timestamp + ".json";
JsonUtil.saveListToJsonFile(newsList, newsFileName);
System.out.println("新闻数据已保存到: " + newsFileName);
}
// 保存大学排名数据
List<UniversityRank> universityRankList = controller.getUniversityRankList();
if (universityRankList != null && !universityRankList.isEmpty()) {
String rankingFileName = CrawlerConstants.OUTPUT_DIR + "/university_ranking_" + timestamp + ".json";
JsonUtil.saveListToJsonFile(universityRankList, rankingFileName);
System.out.println("大学排名数据已保存到: " + rankingFileName);
}
// 保存天气数据
List<Weather> weatherList = controller.getWeatherList();
if (weatherList != null && !weatherList.isEmpty()) {
String weatherFileName = CrawlerConstants.OUTPUT_DIR + "/weather_" + timestamp + ".json";
JsonUtil.saveListToJsonFile(weatherList, weatherFileName);
System.out.println("天气数据已保存到: " + weatherFileName);
}
System.out.println("\n=== 数据保存完成 ===");
} catch (Exception e) {
System.err.println("保存数据失败: " + e.getMessage());
e.printStackTrace();
}
}
@Override
public String getName() {
return "保存当前数据到文件";
}
}

32
project/src/main/java/com/example/crawler/command/WeatherCommand.java

@ -1,32 +0,0 @@
package com.example.crawler.command;
import com.example.crawler.model.Weather;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.CrawlStrategy;
import com.example.crawler.strategy.WeatherCrawlStrategy;
import java.util.List;
public class WeatherCommand extends BaseCrawlCommand {
public WeatherCommand(DataRepository repository) {
super(repository);
}
@Override
protected CrawlStrategy<?> getStrategy() {
return new WeatherCrawlStrategy();
}
@Override
@SuppressWarnings("unchecked")
protected void saveToRepository(Object data) {
repository.saveWeatherList((List<Weather>) data);
System.out.println("成功爬取 " + ((List<Weather>) data).size() + " 个城市的天气信息");
}
@Override
public String getName() {
return "爬取天气数据";
}
}

31
project/src/main/java/com/example/crawler/constant/CrawlerConstants.java

@ -1,31 +0,0 @@
package com.example.crawler.constant;
import java.util.HashMap;
import java.util.Map;
public class CrawlerConstants {
public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36";
public static final String REFERER = "https://www.baidu.com";
public static final int TIMEOUT_MS = 10000;
public static final int MAX_RETRIES = 3;
public static final long DELAY_MS = 3000;
public static final String URL_BOOKS = "https://books.toscrape.com/";
public static final String URL_NEWS = "https://news.sina.com.cn/china/";
public static final String URL_RANKING = "https://www.shanghairanking.cn/rankings/bcur/202310";
public static final String URL_WEATHER_API = "https://api.open-meteo.com/v1/forecast";
public static final String OUTPUT_DIR = "output";
public static final String REPORTS_DIR = "reports";
public static final String CHARTS_DIR = "charts";
public static final Map<String, double[]> CITY_COORDINATES;
static {
CITY_COORDINATES = new HashMap<>();
CITY_COORDINATES.put("北京", new double[]{39.9042, 116.4074});
CITY_COORDINATES.put("上海", new double[]{31.2304, 121.4737});
CITY_COORDINATES.put("广州", new double[]{23.1291, 113.2644});
}
}

90
project/src/main/java/com/example/crawler/controller/CrawlerController.java

@ -1,90 +0,0 @@
package com.example.crawler.controller;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import com.example.crawler.command.BookCommand;
import com.example.crawler.command.Command;
import com.example.crawler.command.CrawlAllCommand;
import com.example.crawler.command.CrawlAndAnalyzeAllCommand;
import com.example.crawler.command.CrawlRankingCommand;
import com.example.crawler.command.ExitCommand;
import com.example.crawler.command.GenerateAllAnalysisCommand;
import com.example.crawler.command.NewsCommand;
import com.example.crawler.command.SaveCommand;
import com.example.crawler.command.WeatherCommand;
import com.example.crawler.model.Book;
import com.example.crawler.model.News;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.model.Weather;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.view.CrawlerView;
public class CrawlerController {
private final CrawlerView view;
private final Map<Integer, Command> commandMap;
private final DataRepository repository;
public CrawlerController() {
this.view = new CrawlerView();
this.repository = DataRepository.getInstance();
this.commandMap = new HashMap<>();
initCommands();
}
private void initCommands() {
commandMap.put(1, new BookCommand(repository));
commandMap.put(2, new NewsCommand(repository));
commandMap.put(3, new CrawlRankingCommand(repository));
commandMap.put(4, new WeatherCommand(repository));
commandMap.put(5, new CrawlAllCommand(this));
commandMap.put(6, new SaveCommand(this));
commandMap.put(7, new GenerateAllAnalysisCommand(this));
commandMap.put(8, new CrawlAndAnalyzeAllCommand(this));
commandMap.put(9, new ExitCommand());
}
public void start() {
Scanner scanner = new Scanner(System.in);
while (true) {
view.showMenu();
int choice = view.getInput(scanner);
Command command = commandMap.get(choice);
if (command != null) {
command.execute();
} else {
view.showError("无效的选择,请输入1-9之间的数字");
}
if (choice != 9) {
view.pause(scanner);
}
}
}
public List<Book> getBooks() {
return repository.getBooks();
}
public List<News> getNewsList() {
return repository.getNewsList();
}
public List<UniversityRank> getUniversityRankList() {
return repository.getRankings();
}
public List<Weather> getWeatherList() {
return repository.getWeatherList();
}
public DataRepository getRepository() {
return repository;
}
}

16
project/src/main/java/com/example/crawler/exception/CrawlException.java

@ -1,16 +0,0 @@
package com.example.crawler.exception;
/**
* 爬虫异常基类
* 所有爬虫相关异常都继承此类
*/
public class CrawlException extends Exception {
public CrawlException(String message) {
super(message);
}
public CrawlException(String message, Throwable cause) {
super(message, cause);
}
}

16
project/src/main/java/com/example/crawler/exception/DataSaveException.java

@ -1,16 +0,0 @@
package com.example.crawler.exception;
/**
* 数据保存异常
* 用于处理文件写入失败JSON序列化失败等数据保存相关错误
*/
public class DataSaveException extends CrawlException {
public DataSaveException(String message) {
super(message);
}
public DataSaveException(String message, Throwable cause) {
super(message, cause);
}
}

16
project/src/main/java/com/example/crawler/exception/NetworkException.java

@ -1,16 +0,0 @@
package com.example.crawler.exception;
/**
* 网络异常
* 用于处理HTTP请求失败连接超时等网络相关错误
*/
public class NetworkException extends CrawlException {
public NetworkException(String message) {
super(message);
}
public NetworkException(String message, Throwable cause) {
super(message, cause);
}
}

16
project/src/main/java/com/example/crawler/exception/ParseException.java

@ -1,16 +0,0 @@
package com.example.crawler.exception;
/**
* 解析异常
* 用于处理HTML解析失败JSON解析失败等数据解析相关错误
*/
public class ParseException extends CrawlException {
public ParseException(String message) {
super(message);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
}

65
project/src/main/java/com/example/crawler/model/Book.java

@ -1,65 +0,0 @@
package com.example.crawler.model;
/**
* 书籍数据模型
* 存储toscrape.com网站的书籍信息
*/
public class Book {
private String title;
private String price;
private String availability;
private String rating;
public Book() {
}
public Book(String title, String price, String availability, String rating) {
this.title = title;
this.price = price;
this.availability = availability;
this.rating = rating;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getAvailability() {
return availability;
}
public void setAvailability(String availability) {
this.availability = availability;
}
public String getRating() {
return rating;
}
public void setRating(String rating) {
this.rating = rating;
}
@Override
public String toString() {
return "Book{" +
"title='" + title + '\'' +
", price='" + price + '\'' +
", availability='" + availability + '\'' +
", rating='" + rating + '\'' +
'}';
}
}

54
project/src/main/java/com/example/crawler/model/News.java

@ -1,54 +0,0 @@
package com.example.crawler.model;
/**
* 新闻数据模型
* 存储新浪新闻的国内新闻信息
*/
public class News {
private String title;
private String publishTime;
private String url;
public News() {
}
public News(String title, String publishTime, String url) {
this.title = title;
this.publishTime = publishTime;
this.url = url;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getPublishTime() {
return publishTime;
}
public void setPublishTime(String publishTime) {
this.publishTime = publishTime;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
@Override
public String toString() {
return "News{" +
"title='" + title + '\'' +
", publishTime='" + publishTime + '\'' +
", url='" + url + '\'' +
'}';
}
}

76
project/src/main/java/com/example/crawler/model/UniversityRank.java

@ -1,76 +0,0 @@
package com.example.crawler.model;
/**
* 大学排名数据模型
* 存储软科中国大学排名信息
*/
public class UniversityRank {
private Integer rank;
private String universityName;
private String totalScore;
private String province;
private String category;
public UniversityRank() {
}
public UniversityRank(Integer rank, String universityName, String totalScore, String province, String category) {
this.rank = rank;
this.universityName = universityName;
this.totalScore = totalScore;
this.province = province;
this.category = category;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
public String getUniversityName() {
return universityName;
}
public void setUniversityName(String universityName) {
this.universityName = universityName;
}
public String getTotalScore() {
return totalScore;
}
public void setTotalScore(String totalScore) {
this.totalScore = totalScore;
}
public String getProvince() {
return province;
}
public void setProvince(String province) {
this.province = province;
}
public String getCategory() {
return category;
}
public void setCategory(String category) {
this.category = category;
}
@Override
public String toString() {
return "UniversityRank{" +
"rank=" + rank +
", universityName='" + universityName + '\'' +
", totalScore='" + totalScore + '\'' +
", province='" + province + '\'' +
", category='" + category + '\'' +
'}';
}
}

140
project/src/main/java/com/example/crawler/model/Weather.java

@ -1,140 +0,0 @@
package com.example.crawler.model;
import java.util.ArrayList;
import java.util.List;
/**
* 天气数据模型
* 存储 Open-Meteo API 的城市天气信息
* 数据来源Open-Meteo (CC BY 4.0)
*/
public class Weather {
private String cityName;
private double temperature;
private double humidity;
private double windSpeed;
private String weatherCode;
private List<String> hourlyTimes;
private List<Double> hourlyTemperatures;
private List<Integer> hourlyHumidities;
private List<Double> hourlyWindSpeeds;
public Weather() {
this.hourlyTimes = new ArrayList<>();
this.hourlyTemperatures = new ArrayList<>();
this.hourlyHumidities = new ArrayList<>();
this.hourlyWindSpeeds = new ArrayList<>();
}
public Weather(String cityName, double temperature, double humidity, double windSpeed, String weatherCode) {
this.cityName = cityName;
this.temperature = temperature;
this.humidity = humidity;
this.windSpeed = windSpeed;
this.weatherCode = weatherCode;
this.hourlyTimes = new ArrayList<>();
this.hourlyTemperatures = new ArrayList<>();
this.hourlyHumidities = new ArrayList<>();
this.hourlyWindSpeeds = new ArrayList<>();
}
public String getCityName() {
return cityName;
}
public void setCityName(String cityName) {
this.cityName = cityName;
}
public double getTemperature() {
return temperature;
}
public void setTemperature(double temperature) {
this.temperature = temperature;
}
public double getHumidity() {
return humidity;
}
public void setHumidity(double humidity) {
this.humidity = humidity;
}
public double getWindSpeed() {
return windSpeed;
}
public void setWindSpeed(double windSpeed) {
this.windSpeed = windSpeed;
}
public String getWeatherCode() {
return weatherCode;
}
public void setWeatherCode(String weatherCode) {
this.weatherCode = weatherCode;
}
public List<String> getHourlyTimes() {
return hourlyTimes;
}
public void setHourlyTimes(List<String> hourlyTimes) {
this.hourlyTimes = hourlyTimes;
}
public List<Double> getHourlyTemperatures() {
return hourlyTemperatures;
}
public void setHourlyTemperatures(List<Double> hourlyTemperatures) {
this.hourlyTemperatures = hourlyTemperatures;
}
public List<Integer> getHourlyHumidities() {
return hourlyHumidities;
}
public void setHourlyHumidities(List<Integer> hourlyHumidities) {
this.hourlyHumidities = hourlyHumidities;
}
public List<Double> getHourlyWindSpeeds() {
return hourlyWindSpeeds;
}
public void setHourlyWindSpeeds(List<Double> hourlyWindSpeeds) {
this.hourlyWindSpeeds = hourlyWindSpeeds;
}
public String getWeatherDescription() {
if (weatherCode == null) return "未知";
switch (weatherCode) {
case "0": return "晴";
case "1": case "2": case "3": return "多云";
case "45": case "48": return "雾";
case "51": case "53": case "55": return "小毛毛雨";
case "61": case "63": case "65": return "小雨";
case "80": case "81": case "82": return "阵雨";
case "95": return "雷暴";
case "96": case "99": return "雷暴加冰雹";
default: return "未知";
}
}
@Override
public String toString() {
return "Weather{" +
"cityName='" + cityName + '\'' +
", temperature=" + temperature +
", humidity=" + humidity +
", windSpeed=" + windSpeed +
", weatherCode='" + weatherCode + '\'' +
", weather='" + getWeatherDescription() + '\'' +
'}';
}
}

75
project/src/main/java/com/example/crawler/repository/DataRepository.java

@ -1,75 +0,0 @@
package com.example.crawler.repository;
import com.example.crawler.model.Book;
import com.example.crawler.model.News;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.model.Weather;
import java.util.ArrayList;
import java.util.List;
public class DataRepository {
private static DataRepository instance;
private List<Book> books;
private List<News> newsList;
private List<UniversityRank> rankings;
private List<Weather> weatherList;
private DataRepository() {
this.books = new ArrayList<>();
this.newsList = new ArrayList<>();
this.rankings = new ArrayList<>();
this.weatherList = new ArrayList<>();
}
public static synchronized DataRepository getInstance() {
if (instance == null) {
instance = new DataRepository();
}
return instance;
}
public List<Book> getBooks() {
return new ArrayList<>(books);
}
public void saveBooks(List<Book> books) {
this.books.clear();
this.books.addAll(books);
}
public List<News> getNewsList() {
return new ArrayList<>(newsList);
}
public void saveNewsList(List<News> newsList) {
this.newsList.clear();
this.newsList.addAll(newsList);
}
public List<UniversityRank> getRankings() {
return new ArrayList<>(rankings);
}
public void saveRankings(List<UniversityRank> rankings) {
this.rankings.clear();
this.rankings.addAll(rankings);
}
public List<Weather> getWeatherList() {
return new ArrayList<>(weatherList);
}
public void saveWeatherList(List<Weather> weatherList) {
this.weatherList.clear();
this.weatherList.addAll(weatherList);
}
public void clearAll() {
books.clear();
newsList.clear();
rankings.clear();
weatherList.clear();
}
}

171
project/src/main/java/com/example/crawler/service/BookAnalysisService.java

@ -1,171 +0,0 @@
package com.example.crawler.service;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.example.crawler.chart.ChartGenerator;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.model.Book;
import com.example.crawler.util.DataCleaner;
public class BookAnalysisService {
static {
File dir = new File(CrawlerConstants.REPORTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public void analyze(List<Book> books) {
if (books == null || books.isEmpty()) {
System.out.println("没有书籍数据可分析");
return;
}
System.out.println("\n========== 书籍数据分析 ==========");
System.out.println("共分析 " + books.size() + " 本书\n");
analyzePriceDistribution(books);
analyzeRatingDistribution(books);
analyzeStockStatus(books);
generateReport(books);
}
private void analyzePriceDistribution(List<Book> books) {
System.out.println("【价格分析】");
List<Double> prices = new ArrayList<>();
for (Book book : books) {
double price = DataCleaner.cleanPrice(book.getPrice());
if (price > 0) {
prices.add(price);
}
}
if (prices.isEmpty()) {
System.out.println("无法获取有效价格数据");
return;
}
double maxPrice = prices.stream().mapToDouble(Double::doubleValue).max().orElse(0);
double minPrice = prices.stream().mapToDouble(Double::doubleValue).min().orElse(0);
double avgPrice = prices.stream().mapToDouble(Double::doubleValue).average().orElse(0);
System.out.println("最高价: £" + String.format("%.2f", maxPrice));
System.out.println("最低价: £" + String.format("%.2f", minPrice));
System.out.println("平均价: £" + String.format("%.2f", avgPrice));
Map<String, Integer> priceRanges = new HashMap<>();
String[] ranges = {"0-10", "10-20", "20-30", "30-40", "40-50", "50+"};
for (String range : ranges) {
priceRanges.put(range, 0);
}
for (Double price : prices) {
if (price < 10) priceRanges.put("0-10", priceRanges.get("0-10") + 1);
else if (price < 20) priceRanges.put("10-20", priceRanges.get("10-20") + 1);
else if (price < 30) priceRanges.put("20-30", priceRanges.get("20-30") + 1);
else if (price < 40) priceRanges.put("30-40", priceRanges.get("30-40") + 1);
else if (price < 50) priceRanges.put("40-50", priceRanges.get("40-50") + 1);
else priceRanges.put("50+", priceRanges.get("50+") + 1);
}
System.out.println("\n价格区间分布:");
for (Map.Entry<String, Integer> entry : priceRanges.entrySet()) {
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本");
}
ChartGenerator.generatePriceHistogram(priceRanges, "price_histogram.png");
}
private void analyzeRatingDistribution(List<Book> books) {
System.out.println("\n【评分分析】");
Map<String, Integer> ratingCounts = new HashMap<>();
ratingCounts.put("5星", 0);
ratingCounts.put("4星", 0);
ratingCounts.put("3星", 0);
ratingCounts.put("2星", 0);
ratingCounts.put("1星", 0);
ratingCounts.put("未知", 0);
for (Book book : books) {
int rating = DataCleaner.cleanRating(book.getRating());
switch (rating) {
case 5: ratingCounts.put("5星", ratingCounts.get("5星") + 1); break;
case 4: ratingCounts.put("4星", ratingCounts.get("4星") + 1); break;
case 3: ratingCounts.put("3星", ratingCounts.get("3星") + 1); break;
case 2: ratingCounts.put("2星", ratingCounts.get("2星") + 1); break;
case 1: ratingCounts.put("1星", ratingCounts.get("1星") + 1); break;
default: ratingCounts.put("未知", ratingCounts.get("未知") + 1);
}
}
int total = books.size();
System.out.println("评分分布:");
for (Map.Entry<String, Integer> entry : ratingCounts.entrySet()) {
double percentage = (entry.getValue() * 100.0) / total;
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本 (" + String.format("%.1f", percentage) + "%)");
}
ChartGenerator.generateRatingPieChart(ratingCounts, "rating_pie.png");
}
private void analyzeStockStatus(List<Book> books) {
System.out.println("\n【库存分析】");
int inStock = 0;
int outOfStock = 0;
for (Book book : books) {
String availability = book.getAvailability();
if (availability != null && availability.toLowerCase().contains("in stock")) {
inStock++;
} else {
outOfStock++;
}
}
System.out.println("有库存: " + inStock + " 本");
System.out.println("缺货: " + outOfStock + " 本");
}
private void generateReport(List<Book> books) {
String fileName = CrawlerConstants.REPORTS_DIR + "/book_analysis_report.txt";
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) {
writer.println("========== 书籍数据分析报告 ==========");
writer.println("生成时间: " + java.time.LocalDateTime.now());
writer.println("分析书籍总数: " + books.size());
writer.println();
List<Double> prices = books.stream()
.map(b -> DataCleaner.cleanPrice(b.getPrice()))
.filter(p -> p > 0)
.collect(Collectors.toList());
if (!prices.isEmpty()) {
writer.println("【价格统计】");
writer.println("最高价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).max().orElse(0)));
writer.println("最低价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).min().orElse(0)));
writer.println("平均价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).average().orElse(0)));
writer.println();
}
writer.println("【库存统计】");
long inStock = books.stream().filter(b -> b.getAvailability() != null && b.getAvailability().toLowerCase().contains("in stock")).count();
writer.println("有库存: " + inStock + " 本");
writer.println("缺货: " + (books.size() - inStock) + " 本");
writer.println("\n报告生成完成");
System.out.println("\n报告已保存: " + fileName);
} catch (IOException e) {
System.err.println("生成报告失败: " + e.getMessage());
}
}
}

138
project/src/main/java/com/example/crawler/service/NewsAnalysisService.java

@ -1,138 +0,0 @@
package com.example.crawler.service;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.example.crawler.chart.ChartGenerator;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.model.News;
import com.example.crawler.util.DataCleaner;
public class NewsAnalysisService {
static {
File dir = new File(CrawlerConstants.REPORTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public void analyze(List<News> newsList) {
if (newsList == null || newsList.isEmpty()) {
System.out.println("没有新闻数据可分析");
return;
}
System.out.println("\n========== 新闻数据分析 ==========");
System.out.println("共分析 " + newsList.size() + " 条新闻\n");
analyzeTimeDistribution(newsList);
analyzeKeywords(newsList);
generateReport(newsList);
}
private void analyzeTimeDistribution(List<News> newsList) {
System.out.println("【发布时间分布】");
Map<Integer, Integer> hourDistribution = new HashMap<>();
for (int i = 0; i < 24; i++) {
hourDistribution.put(i, 0);
}
for (News news : newsList) {
try {
java.time.LocalDateTime dateTime = DataCleaner.cleanNewsTime(news.getPublishTime());
int hour = DataCleaner.extractHour(dateTime);
hourDistribution.put(hour, hourDistribution.get(hour) + 1);
} catch (Exception e) {
// 忽略解析失败的数据
}
}
System.out.println("\n按小时统计:");
for (int i = 0; i < 24; i++) {
int count = hourDistribution.get(i);
String bar = "*".repeat(Math.max(1, count));
System.out.printf(" %02d:00 - %02d:00: %3d %s%n", i, (i + 1) % 24, count, bar);
}
int peakHour = 0;
int peakCount = 0;
for (Map.Entry<Integer, Integer> entry : hourDistribution.entrySet()) {
if (entry.getValue() > peakCount) {
peakCount = entry.getValue();
peakHour = entry.getKey();
}
}
System.out.println("\n高峰时段: " + String.format("%02d:00", peakHour) + " (发布 " + peakCount + " 条新闻)");
ChartGenerator.generateNewsTimeTrend(hourDistribution, "news_time_trend.png");
}
private void analyzeKeywords(List<News> newsList) {
System.out.println("\n【关键词分析】");
Map<String, Integer> allWords = new HashMap<>();
for (News news : newsList) {
String title = DataCleaner.cleanTitle(news.getTitle());
String[] words = DataCleaner.extractWords(title);
Map<String, Integer> wordFreq = DataCleaner.countWordFrequency(words);
for (Map.Entry<String, Integer> entry : wordFreq.entrySet()) {
allWords.put(entry.getKey(), allWords.getOrDefault(entry.getKey(), 0) + entry.getValue());
}
}
List<Map.Entry<String, Integer>> sortedWords = allWords.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(20)
.collect(Collectors.toList());
System.out.println("\n高频词 TOP 10:");
for (int i = 0; i < Math.min(10, sortedWords.size()); i++) {
Map.Entry<String, Integer> entry = sortedWords.get(i);
System.out.printf(" %2d. %s: %d%n", i + 1, entry.getKey(), entry.getValue());
}
Map<String, Integer> top10 = sortedWords.stream()
.limit(10)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
ChartGenerator.generateWordFrequencyBarChart(top10, "news_top_words.png");
}
private void generateReport(List<News> newsList) {
String fileName = CrawlerConstants.REPORTS_DIR + "/news_analysis_report.txt";
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) {
writer.println("========== 新闻数据分析报告 ==========");
writer.println("生成时间: " + java.time.LocalDateTime.now());
writer.println("分析新闻总数: " + newsList.size());
writer.println();
Map<Integer, Integer> hourDistribution = new HashMap<>();
for (int i = 0; i < 24; i++) hourDistribution.put(i, 0);
for (News news : newsList) {
try {
int hour = DataCleaner.extractHour(DataCleaner.cleanNewsTime(news.getPublishTime()));
hourDistribution.put(hour, hourDistribution.get(hour) + 1);
} catch (Exception e) {}
}
writer.println("【发布时间分布】");
for (int i = 0; i < 24; i++) {
writer.println(String.format(" %02d:00 - %02d:00: %d 条", i, (i + 1) % 24, hourDistribution.get(i)));
}
writer.println("\n报告生成完成");
System.out.println("\n报告已保存: " + fileName);
} catch (IOException e) {
System.err.println("生成报告失败: " + e.getMessage());
}
}
}

189
project/src/main/java/com/example/crawler/service/RankingAnalysisService.java

@ -1,189 +0,0 @@
package com.example.crawler.service;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.example.crawler.chart.ChartGenerator;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.util.DataCleaner;
public class RankingAnalysisService {
static {
File dir = new File(CrawlerConstants.REPORTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public void analyze(List<UniversityRank> ranks) {
if (ranks == null || ranks.isEmpty()) {
System.out.println("没有大学排名数据可分析");
return;
}
System.out.println("\n========== 大学排名数据分析 ==========");
System.out.println("共分析 " + ranks.size() + " 所大学\n");
analyzeProvinceDistribution(ranks);
analyzeScoreDistribution(ranks);
analyzeCategoryDistribution(ranks);
generateReport(ranks);
}
private void analyzeProvinceDistribution(List<UniversityRank> ranks) {
System.out.println("【各省份上榜大学数量】");
Map<String, Integer> provinceCounts = new HashMap<>();
for (UniversityRank rank : ranks) {
String province = rank.getProvince();
if (province != null && !province.isEmpty()) {
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1);
}
}
List<Map.Entry<String, Integer>> sorted = provinceCounts.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.collect(Collectors.toList());
System.out.println("\n省份排行榜 TOP 10:");
int rankNum = 1;
for (Map.Entry<String, Integer> entry : sorted) {
if (rankNum > 10) break;
System.out.printf(" %2d. %s: %d 所大学%n", rankNum++, entry.getKey(), entry.getValue());
}
Map<String, Integer> top10 = sorted.stream()
.limit(10)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
ChartGenerator.generateProvinceBarChart(top10, "province_bar.png");
}
private void analyzeScoreDistribution(List<UniversityRank> ranks) {
System.out.println("\n【总分分析】");
List<Double> scores = new ArrayList<>();
for (UniversityRank rank : ranks) {
double score = DataCleaner.cleanScore(rank.getTotalScore());
if (score > 0) {
scores.add(score);
}
}
if (scores.isEmpty()) {
System.out.println("无法获取有效分数数据");
return;
}
double maxScore = scores.stream().mapToDouble(Double::doubleValue).max().orElse(0);
double minScore = scores.stream().mapToDouble(Double::doubleValue).min().orElse(0);
double avgScore = scores.stream().mapToDouble(Double::doubleValue).average().orElse(0);
List<Double> sortedScores = scores.stream().sorted().collect(Collectors.toList());
double median = sortedScores.get(sortedScores.size() / 2);
System.out.println("最高分: " + String.format("%.2f", maxScore));
System.out.println("最低分: " + String.format("%.2f", minScore));
System.out.println("平均分: " + String.format("%.2f", avgScore));
System.out.println("中位数: " + String.format("%.2f", median));
Map<String, Integer> scoreRanges = new HashMap<>();
String[] ranges = {"0-20", "20-40", "40-60", "60-80", "80-100"};
for (String range : ranges) {
scoreRanges.put(range, 0);
}
for (Double score : scores) {
if (score < 20) scoreRanges.put("0-20", scoreRanges.get("0-20") + 1);
else if (score < 40) scoreRanges.put("20-40", scoreRanges.get("20-40") + 1);
else if (score < 60) scoreRanges.put("40-60", scoreRanges.get("40-60") + 1);
else if (score < 80) scoreRanges.put("60-80", scoreRanges.get("60-80") + 1);
else scoreRanges.put("80-100", scoreRanges.get("80-100") + 1);
}
System.out.println("\n分数区间分布:");
for (Map.Entry<String, Integer> entry : scoreRanges.entrySet()) {
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 所");
}
ChartGenerator.generateScoreHistogram(scoreRanges, "score_boxplot.png");
}
private void analyzeCategoryDistribution(List<UniversityRank> ranks) {
System.out.println("\n【办学层次统计】");
Map<String, Integer> categoryCounts = new HashMap<>();
for (UniversityRank rank : ranks) {
String category = rank.getCategory();
if (category != null && !category.isEmpty()) {
categoryCounts.put(category, categoryCounts.getOrDefault(category, 0) + 1);
}
}
if (categoryCounts.isEmpty()) {
System.out.println("没有办学层次数据");
return;
}
List<Map.Entry<String, Integer>> sorted = categoryCounts.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.collect(Collectors.toList());
System.out.println("\n办学层次分布:");
for (Map.Entry<String, Integer> entry : sorted) {
System.out.printf(" %s: %d 所%n", entry.getKey(), entry.getValue());
}
}
private void generateReport(List<UniversityRank> ranks) {
String fileName = CrawlerConstants.REPORTS_DIR + "/ranking_analysis_report.txt";
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) {
writer.println("========== 大学排名数据分析报告 ==========");
writer.println("生成时间: " + java.time.LocalDateTime.now());
writer.println("分析大学总数: " + ranks.size());
writer.println();
Map<String, Integer> provinceCounts = new HashMap<>();
for (UniversityRank rank : ranks) {
String province = rank.getProvince();
if (province != null && !province.isEmpty()) {
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1);
}
}
writer.println("【省份排行榜 TOP 10】");
provinceCounts.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(10)
.forEach(e -> writer.println(" " + e.getKey() + ": " + e.getValue() + " 所大学"));
List<Double> scores = ranks.stream()
.map(r -> DataCleaner.cleanScore(r.getTotalScore()))
.filter(s -> s > 0)
.collect(Collectors.toList());
if (!scores.isEmpty()) {
writer.println();
writer.println("【分数统计】");
writer.println("最高分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).max().orElse(0)));
writer.println("最低分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).min().orElse(0)));
writer.println("平均分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).average().orElse(0)));
}
writer.println("\n报告生成完成");
System.out.println("\n报告已保存: " + fileName);
} catch (IOException e) {
System.err.println("生成报告失败: " + e.getMessage());
}
}
}

163
project/src/main/java/com/example/crawler/service/WeatherAnalysisService.java

@ -1,163 +0,0 @@
package com.example.crawler.service;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.example.crawler.chart.ChartGenerator;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.model.Weather;
public class WeatherAnalysisService {
static {
File dir = new File(CrawlerConstants.REPORTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public void analyze(List<Weather> weatherList) {
if (weatherList == null || weatherList.isEmpty()) {
System.out.println("没有天气数据可分析");
return;
}
System.out.println("\n========== 天气数据分析 ==========");
System.out.println("共分析 " + weatherList.size() + " 个城市\n");
analyzeCurrentWeather(weatherList);
analyzeTemperatureTrend(weatherList);
analyzeHumidityTrend(weatherList);
analyzeComfortIndex(weatherList);
generateReport(weatherList);
}
private void analyzeCurrentWeather(List<Weather> weatherList) {
System.out.println("【当前天气对比】");
System.out.println("┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐");
System.out.println("│ 城市名称 │ 温度(°C)│ 湿度(%) │ 风速(km/h)│ 天气状况 │ 舒适度 │");
System.out.println("├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤");
for (Weather weather : weatherList) {
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity());
String comfortDesc = getComfortDescription(comfort);
System.out.printf("│ %-8s │ %8.1f │ %8.0f │ %8.1f │ %-8s │ %-8s │%n",
weather.getCityName(),
weather.getTemperature(),
weather.getHumidity(),
weather.getWindSpeed(),
weather.getWeatherDescription(),
comfortDesc);
}
System.out.println("└──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘");
}
private void analyzeTemperatureTrend(List<Weather> weatherList) {
System.out.println("\n【未来24小时温度分析】");
Map<String, List<Double>> cityTemperatures = new HashMap<>();
for (Weather weather : weatherList) {
cityTemperatures.put(weather.getCityName(), weather.getHourlyTemperatures());
List<Double> temps = weather.getHourlyTemperatures();
if (!temps.isEmpty()) {
double maxTemp = temps.stream().mapToDouble(Double::doubleValue).max().orElse(0);
double minTemp = temps.stream().mapToDouble(Double::doubleValue).min().orElse(0);
double avgTemp = temps.stream().mapToDouble(Double::doubleValue).average().orElse(0);
int maxIndex = temps.indexOf(maxTemp);
int minIndex = temps.indexOf(minTemp);
String maxTime = maxIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(maxIndex) : "";
String minTime = minIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(minIndex) : "";
System.out.printf(" %s: 最高 %.1f°C(%s) 最低 %.1f°C(%s) 平均 %.1f°C%n",
weather.getCityName(), maxTemp, maxTime, minTemp, minTime, avgTemp);
}
ChartGenerator.generateTemperatureTrend(
weather.getHourlyTimes(),
weather.getHourlyTemperatures(),
weather.getCityName(),
"temperature_" + weather.getCityName() + ".png"
);
}
ChartGenerator.generateMultiCityTemperatureComparison(cityTemperatures, "temperature_comparison.png");
}
private void analyzeHumidityTrend(List<Weather> weatherList) {
System.out.println("\n【未来24小时湿度分析】");
for (Weather weather : weatherList) {
List<Integer> humidities = weather.getHourlyHumidities();
if (!humidities.isEmpty()) {
double avgHumidity = humidities.stream().mapToInt(Integer::intValue).average().orElse(0);
System.out.printf(" %s: 平均湿度 %.0f%%%n", weather.getCityName(), avgHumidity);
}
}
}
private void analyzeComfortIndex(List<Weather> weatherList) {
System.out.println("\n【舒适度指数分析】");
System.out.println("(基于温度和湿度的体感舒适度计算,0-100分制)");
for (Weather weather : weatherList) {
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity());
String description = getComfortDescription(comfort);
System.out.printf(" %s: %.1f分 (%s)%n", weather.getCityName(), comfort, description);
}
}
private double calculateComfortIndex(double temperature, double humidity) {
double tempDiff = Math.abs(temperature - 22);
double humDiff = Math.abs(humidity - 50);
double comfort = 100 - (tempDiff * 3 + humDiff * 0.5);
return Math.max(0, Math.min(100, comfort));
}
private String getComfortDescription(double comfort) {
if (comfort >= 80) return "非常舒适";
if (comfort >= 60) return "舒适";
if (comfort >= 40) return "一般";
if (comfort >= 20) return "不舒适";
return "极不舒适";
}
private void generateReport(List<Weather> weatherList) {
String fileName = CrawlerConstants.REPORTS_DIR + "/weather_analysis_report.txt";
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) {
writer.println("========== 天气数据分析报告 ==========");
writer.println("生成时间: " + java.time.LocalDateTime.now());
writer.println("分析城市数量: " + weatherList.size());
writer.println("数据来源: Open-Meteo API (CC BY 4.0)");
writer.println();
writer.println("【多城市天气对比】");
for (Weather weather : weatherList) {
writer.println("\n城市: " + weather.getCityName());
writer.println(" 当前温度: " + String.format("%.1f°C", weather.getTemperature()));
writer.println(" 当前湿度: " + String.format("%.0f%%", weather.getHumidity()));
writer.println(" 风速: " + String.format("%.1f km/h", weather.getWindSpeed()));
writer.println(" 天气: " + weather.getWeatherDescription());
List<Double> temps = weather.getHourlyTemperatures();
if (!temps.isEmpty()) {
writer.println(" 24小时平均温度: " + String.format("%.1f°C", temps.stream().mapToDouble(Double::doubleValue).average().orElse(0)));
}
}
writer.println("\n报告生成完成");
System.out.println("\n报告已保存: " + fileName);
} catch (IOException e) {
System.err.println("生成报告失败: " + e.getMessage());
}
}
}

127
project/src/main/java/com/example/crawler/strategy/BookCrawlStrategy.java

@ -1,127 +0,0 @@
package com.example.crawler.strategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.exception.ParseException;
import com.example.crawler.model.Book;
import com.example.crawler.util.HttpUtil;
/**
* 书籍爬取策略
* // 策略模式:书籍信息爬取策略
*/
public class BookCrawlStrategy implements CrawlStrategy<Book> {
private static final String BASE_URL = "https://books.toscrape.com/";
private static final String PAGE_URL_FORMAT = "https://books.toscrape.com/catalogue/page-%d.html";
private static final int MAX_PAGES = 30; // 最大爬取页数
@Override
public List<Book> crawl() throws CrawlException {
List<Book> books = new ArrayList<>();
int pageNum = 1;
try {
while (true) {
// 达到最大页数限制时停止
if (pageNum > MAX_PAGES) {
System.out.println("已达到最大爬取页数限制(" + MAX_PAGES + "页),停止爬取");
break;
}
String url = pageNum == 1 ? BASE_URL : String.format(PAGE_URL_FORMAT, pageNum);
// 设置请求头
Map<String, String> headers = Map.of(
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
);
String html = HttpUtil.get(url, headers);
Document doc = Jsoup.parse(html);
Elements bookElements = doc.select(".product_pod");
// 如果没有书籍元素,说明已到达最后一页
if (bookElements.isEmpty()) {
System.out.println("第 " + pageNum + " 页没有书籍数据,停止爬取");
break;
}
for (Element bookElement : bookElements) {
Book book = parseBook(bookElement);
books.add(book);
}
System.out.println("已爬取第 " + pageNum + " 页,共 " + books.size() + " 本书");
// 设置请求间隔
HttpUtil.sleep(1);
pageNum++;
}
return books;
} catch (NetworkException e) {
// 如果是404错误且已经爬取了一些数据,返回已获取的数据
if (e.getMessage().contains("404") && !books.isEmpty()) {
System.out.println("第 " + pageNum + " 页不存在(404),返回已爬取的 " + books.size() + " 本书");
return books;
}
throw new NetworkException("爬取书籍信息时网络异常: " + e.getMessage(), e);
} catch (ParseException e) {
throw new ParseException("解析书籍信息时异常: " + e.getMessage(), e);
} catch (Exception e) {
throw new CrawlException("爬取书籍信息时发生未知异常: " + e.getMessage(), e);
}
}
/**
* 解析书籍元素
*/
private Book parseBook(Element bookElement) throws ParseException {
try {
// 获取书名
Element titleElement = bookElement.selectFirst("h3 a");
String title = titleElement != null ? titleElement.attr("title") : "未知书名";
// 获取价格
Element priceElement = bookElement.selectFirst(".price_color");
String price = priceElement != null ? priceElement.text() : "未知价格";
// 获取库存状态
Element availabilityElement = bookElement.selectFirst(".instock.availability");
String availability = availabilityElement != null ? availabilityElement.text().trim() : "未知库存";
// 获取星级评分
Element ratingElement = bookElement.selectFirst(".star-rating");
String rating = "未知";
if (ratingElement != null) {
String classAttr = ratingElement.attr("class");
if (classAttr.contains("One")) rating = "1星";
else if (classAttr.contains("Two")) rating = "2星";
else if (classAttr.contains("Three")) rating = "3星";
else if (classAttr.contains("Four")) rating = "4星";
else if (classAttr.contains("Five")) rating = "5星";
}
return new Book(title, price, availability, rating);
} catch (Exception e) {
throw new ParseException("解析书籍信息失败: " + e.getMessage(), e);
}
}
@Override
public String getDataSourceName() {
return "toscrape.com书籍信息";
}
}

27
project/src/main/java/com/example/crawler/strategy/CrawlStrategy.java

@ -1,27 +0,0 @@
package com.example.crawler.strategy;
import com.example.crawler.exception.CrawlException;
import java.util.List;
/**
* 爬取策略接口
* 定义爬取操作的标准方法实现策略模式
*/
public interface CrawlStrategy<T> {
/**
* 执行爬取操作
*
* @return 爬取到的数据列表
* @throws CrawlException 爬虫异常
*/
List<T> crawl() throws CrawlException;
/**
* 获取数据源名称
*
* @return 数据源名称
*/
String getDataSourceName();
}

151
project/src/main/java/com/example/crawler/strategy/NewsCrawlStrategy.java

@ -1,151 +0,0 @@
package com.example.crawler.strategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.exception.ParseException;
import com.example.crawler.model.News;
import com.example.crawler.util.HttpUtil;
/**
* 新浪新闻爬取策略
* // 策略模式:新浪新闻爬取策略
*/
public class NewsCrawlStrategy implements CrawlStrategy<News> {
private static final String NEWS_URL = "https://news.sina.com.cn/china/";
private static final int MAX_NEWS_COUNT = 20;
@Override
public List<News> crawl() throws CrawlException {
List<News> newsList = new ArrayList<>();
try {
// 设置请求头
Map<String, String> headers = Map.of(
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer", "https://news.sina.com.cn/",
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
);
String html = HttpUtil.get(NEWS_URL, headers);
Document doc = Jsoup.parse(html);
// 新浪新闻页面结构可能变化,使用多种选择器尝试
Elements newsElements = doc.select(".news-item, .news-list li, .list-item, .feed-card-item");
// 如果上述选择器都没找到,尝试更通用的选择器
if (newsElements.isEmpty()) {
newsElements = doc.select("a[href*=sina.com.cn]");
}
int count = 0;
for (Element element : newsElements) {
if (count >= MAX_NEWS_COUNT) {
break;
}
try {
News news = parseNews(element);
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) {
newsList.add(news);
count++;
}
} catch (ParseException e) {
// 跳过解析失败的新闻,继续处理下一个
continue;
}
}
// 如果使用通用选择器获取的结果不够,尝试另一种方式
if (newsList.size() < MAX_NEWS_COUNT) {
Elements titleElements = doc.select("h2 a, h3 a, .title a, .news-title a");
for (Element element : titleElements) {
if (count >= MAX_NEWS_COUNT) {
break;
}
try {
News news = parseNewsFromTitleElement(element);
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) {
newsList.add(news);
count++;
}
} catch (ParseException e) {
continue;
}
}
}
System.out.println("已爬取 " + newsList.size() + " 条新浪新闻");
return newsList;
} catch (NetworkException e) {
throw new NetworkException("爬取新浪新闻时网络异常: " + e.getMessage(), e);
} catch (Exception e) {
throw new CrawlException("爬取新浪新闻时发生未知异常: " + e.getMessage(), e);
}
}
/**
* 解析新闻元素
*/
private News parseNews(Element element) throws ParseException {
try {
String title = "";
String url = "";
String publishTime = "";
// 尝试获取标题和链接
Element linkElement = element.selectFirst("a");
if (linkElement != null) {
title = linkElement.text().trim();
url = linkElement.attr("abs:href");
}
// 尝试获取发布时间
Element timeElement = element.selectFirst(".time, .pubtime, span[class*=time]");
if (timeElement != null) {
publishTime = timeElement.text().trim();
}
if (title.isEmpty() || url.isEmpty()) {
return null;
}
return new News(title, publishTime, url);
} catch (Exception e) {
throw new ParseException("解析新闻信息失败: " + e.getMessage(), e);
}
}
/**
* 从标题元素解析新闻
*/
private News parseNewsFromTitleElement(Element element) throws ParseException {
try {
String title = element.text().trim();
String url = element.attr("abs:href");
if (title.isEmpty() || url.isEmpty()) {
return null;
}
return new News(title, "", url);
} catch (Exception e) {
throw new ParseException("解析新闻标题失败: " + e.getMessage(), e);
}
}
@Override
public String getDataSourceName() {
return "新浪国内新闻";
}
}

24
project/src/main/java/com/example/crawler/strategy/StrategyFactory.java

@ -1,24 +0,0 @@
package com.example.crawler.strategy;
import com.example.crawler.strategy.BookCrawlStrategy;
import com.example.crawler.strategy.NewsCrawlStrategy;
import com.example.crawler.strategy.UniversityRankCrawlStrategy;
import com.example.crawler.strategy.WeatherCrawlStrategy;
public class StrategyFactory {
public static CrawlStrategy<?> getStrategy(int choice) {
switch (choice) {
case 1:
return new BookCrawlStrategy();
case 2:
return new NewsCrawlStrategy();
case 3:
return new UniversityRankCrawlStrategy();
case 4:
return new WeatherCrawlStrategy();
default:
throw new IllegalArgumentException("Invalid choice: " + choice);
}
}
}

148
project/src/main/java/com/example/crawler/strategy/UniversityRankCrawlStrategy.java

@ -1,148 +0,0 @@
package com.example.crawler.strategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.exception.ParseException;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.util.HttpUtil;
/**
* 软科中国大学排名爬取策略
* // 策略模式:软科中国大学排名爬取策略
*/
public class UniversityRankCrawlStrategy implements CrawlStrategy<UniversityRank> {
private static final String RANKING_URL = "https://www.shanghairanking.cn/rankings/bcur/2025";
@Override
public List<UniversityRank> crawl() throws CrawlException {
List<UniversityRank> rankings = new ArrayList<>();
try {
// 设置请求头
Map<String, String> headers = Map.of(
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Referer", "https://www.shanghairanking.cn/"
);
// 设置请求延迟
HttpUtil.sleep(3);
String html = HttpUtil.get(RANKING_URL, headers);
Document doc = Jsoup.parse(html);
// 提取表格数据
Elements rows = doc.select("table tbody tr");
if (rows.isEmpty()) {
// 如果第一个选择器失败,尝试其他可能的选择器
rows = doc.select(".rk-table tbody tr");
}
if (rows.isEmpty()) {
// 尝试更通用的选择器
rows = doc.select("tr");
}
int count = 0;
for (Element row : rows) {
try {
UniversityRank ranking = parseRow(row);
if (ranking != null && ranking.getRank() != null) {
rankings.add(ranking);
count++;
// 最多爬取200条数据
if (count >= 200) {
break;
}
}
} catch (ParseException e) {
// 跳过解析失败的行
continue;
}
}
System.out.println("已爬取 " + rankings.size() + " 条大学排名数据");
return rankings;
} catch (NetworkException e) {
throw new NetworkException("爬取软科大学排名时网络异常: " + e.getMessage(), e);
} catch (Exception e) {
throw new CrawlException("爬取软科大学排名时发生未知异常: " + e.getMessage(), e);
}
}
/**
* 解析表格行数据
*/
private UniversityRank parseRow(Element row) throws ParseException {
try {
Elements cells = row.select("td");
if (cells.size() < 4) {
return null;
}
// 第1列:排名
String rankStr = cells.get(0).text().trim();
Integer rank = null;
try {
rank = Integer.parseInt(rankStr);
} catch (NumberFormatException e) {
// 如果排名不是数字(如"1-3"这样的范围),尝试提取第一个数字
String numPart = rankStr.replaceAll("[^0-9]", "");
if (!numPart.isEmpty()) {
rank = Integer.parseInt(numPart);
}
}
if (rank == null) {
return null;
}
// 第2列:学校名称
String universityName = cells.get(1).text().trim();
// 第4列:总分
String totalScore = "";
if (cells.size() > 3) {
totalScore = cells.get(3).text().trim();
}
// 尝试提取省份和办学层次(第3列可能包含这些信息)
String province = "";
String category = "";
if (cells.size() > 2) {
String thirdColumn = cells.get(2).text().trim();
// 尝试解析省份和办学层次
String[] parts = thirdColumn.split("\\s+");
if (parts.length >= 1) {
province = parts[0];
}
if (parts.length >= 2) {
category = parts[1];
}
}
return new UniversityRank(rank, universityName, totalScore, province, category);
} catch (Exception e) {
throw new ParseException("解析大学排名行数据失败: " + e.getMessage(), e);
}
}
@Override
public String getDataSourceName() {
return "软科中国大学排名";
}
}

177
project/src/main/java/com/example/crawler/strategy/WeatherCrawlStrategy.java

@ -1,177 +0,0 @@
package com.example.crawler.strategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.exception.ParseException;
import com.example.crawler.model.Weather;
import com.example.crawler.util.HttpUtil;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
public class WeatherCrawlStrategy implements CrawlStrategy<Weather> {
@Override
public List<Weather> crawl() throws CrawlException {
List<Weather> weatherList = new ArrayList<>();
try {
for (Map.Entry<String, double[]> entry : CrawlerConstants.CITY_COORDINATES.entrySet()) {
String cityName = entry.getKey();
double[] coords = entry.getValue();
double latitude = coords[0];
double longitude = coords[1];
String weatherUrl = buildApiUrl(latitude, longitude);
Map<String, String> headers = Map.of(
"User-Agent", CrawlerConstants.USER_AGENT
);
String response = HttpUtil.get(weatherUrl, headers);
Weather weather = parseWeatherData(cityName, response);
weatherList.add(weather);
System.out.println("已获取 " + cityName + " 的天气信息");
HttpUtil.sleep(2);
}
return weatherList;
} catch (NetworkException e) {
throw new NetworkException("爬取天气数据时网络异常: " + e.getMessage(), e);
} catch (ParseException e) {
throw new ParseException("解析天气数据时异常: " + e.getMessage(), e);
} catch (Exception e) {
throw new CrawlException("爬取天气数据时发生未知异常: " + e.getMessage(), e);
}
}
private String buildApiUrl(double latitude, double longitude) {
return CrawlerConstants.URL_WEATHER_API + "?latitude=" + latitude +
"&longitude=" + longitude +
"&current_weather=true" +
"&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m" +
"&forecast_days=1" +
"&timezone=Asia/Shanghai";
}
private Weather parseWeatherData(String cityName, String jsonData) throws ParseException {
try {
JsonObject obj = JsonParser.parseString(jsonData).getAsJsonObject();
Weather weather = new Weather();
weather.setCityName(cityName);
JsonObject currentWeather = obj.getAsJsonObject("current_weather");
if (currentWeather != null) {
weather.setTemperature(cleanTemperature(getJsonDouble(currentWeather, "temperature", 0)));
weather.setWindSpeed(cleanWindSpeed(getJsonDouble(currentWeather, "windspeed", 0)));
weather.setWeatherCode(String.valueOf(getJsonInt(currentWeather, "weathercode", -1)));
}
JsonObject hourly = obj.getAsJsonObject("hourly");
if (hourly != null) {
JsonArray times = hourly.getAsJsonArray("time");
JsonArray temps = hourly.getAsJsonArray("temperature_2m");
JsonArray humidities = hourly.getAsJsonArray("relative_humidity_2m");
JsonArray windSpeeds = hourly.getAsJsonArray("wind_speed_10m");
if (times != null && temps != null) {
int count = Math.min(times.size(), 24);
for (int i = 0; i < count; i++) {
weather.getHourlyTimes().add(cleanTimeString(getJsonString(times, i, "")));
weather.getHourlyTemperatures().add(cleanTemperature(getJsonDouble(temps, i, 0)));
}
}
if (humidities != null) {
int count = Math.min(humidities.size(), 24);
for (int i = 0; i < count; i++) {
weather.getHourlyHumidities().add(cleanHumidity(getJsonInt(humidities, i, 50)));
}
}
if (windSpeeds != null) {
int count = Math.min(windSpeeds.size(), 24);
for (int i = 0; i < count; i++) {
weather.getHourlyWindSpeeds().add(cleanWindSpeed(getJsonDouble(windSpeeds, i, 0)));
}
}
if (!weather.getHourlyHumidities().isEmpty()) {
weather.setHumidity(weather.getHourlyHumidities().get(0));
}
}
return weather;
} catch (Exception e) {
throw new ParseException("解析天气JSON数据失败: " + e.getMessage(), e);
}
}
private String getJsonString(JsonArray arr, int index, String defaultValue) {
if (arr == null || index >= arr.size()) return defaultValue;
JsonElement element = arr.get(index);
return element.isJsonNull() ? defaultValue : element.getAsString();
}
private double getJsonDouble(JsonObject obj, String key, double defaultValue) {
JsonElement element = obj.get(key);
if (element == null || element.isJsonNull()) return defaultValue;
return element.getAsDouble();
}
private int getJsonInt(JsonObject obj, String key, int defaultValue) {
JsonElement element = obj.get(key);
if (element == null || element.isJsonNull()) return defaultValue;
return element.getAsInt();
}
private double getJsonDouble(JsonArray arr, int index, double defaultValue) {
if (arr == null || index >= arr.size()) return defaultValue;
JsonElement element = arr.get(index);
if (element == null || element.isJsonNull()) return defaultValue;
return element.getAsDouble();
}
private int getJsonInt(JsonArray arr, int index, int defaultValue) {
if (arr == null || index >= arr.size()) return defaultValue;
JsonElement element = arr.get(index);
if (element == null || element.isJsonNull()) return defaultValue;
return element.getAsInt();
}
private double cleanTemperature(double temp) {
return Math.round(temp * 10.0) / 10.0;
}
private double cleanWindSpeed(double speed) {
return Math.round(speed * 10.0) / 10.0;
}
private int cleanHumidity(int humidity) {
if (humidity < 0) return 50;
if (humidity > 100) return 100;
return humidity;
}
private String cleanTimeString(String time) {
if (time == null || time.isEmpty()) return "";
if (time.contains("T")) {
return time.substring(time.indexOf("T") + 1, time.indexOf("T") + 6);
}
return time;
}
@Override
public String getDataSourceName() {
return "Open-Meteo 实时天气";
}
}

122
project/src/main/java/com/example/crawler/util/DataCleaner.java

@ -1,122 +0,0 @@
package com.example.crawler.util;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 数据清洗工具类
* 提供各类数据的清洗方法
*/
public class DataCleaner {
private static final Map<String, String> STOP_WORDS = new HashMap<>();
static {
STOP_WORDS.put("的", "的");
STOP_WORDS.put("了", "了");
STOP_WORDS.put("是", "是");
STOP_WORDS.put("在", "在");
STOP_WORDS.put("和", "和");
STOP_WORDS.put("与", "与");
STOP_WORDS.put("对", "对");
STOP_WORDS.put("为", "为");
STOP_WORDS.put("有", "有");
STOP_WORDS.put("我", "我");
STOP_WORDS.put("你", "你");
STOP_WORDS.put("他", "他");
STOP_WORDS.put("她", "她");
STOP_WORDS.put("它", "它");
STOP_WORDS.put("这", "这");
STOP_WORDS.put("那", "那");
STOP_WORDS.put("就", "就");
STOP_WORDS.put("也", "也");
STOP_WORDS.put("都", "都");
STOP_WORDS.put("要", "要");
STOP_WORDS.put("会", "会");
STOP_WORDS.put("能", "能");
STOP_WORDS.put("可", "可");
STOP_WORDS.put("以", "以");
STOP_WORDS.put("说", "说");
STOP_WORDS.put("到", "到");
STOP_WORDS.put("来", "来");
STOP_WORDS.put("去", "去");
STOP_WORDS.put("着", "着");
STOP_WORDS.put("过", "过");
}
public static double cleanPrice(String price) {
if (price == null || price.isEmpty()) return 0.0;
String cleaned = price.replaceAll("[^0-9.]", "");
try {
return Double.parseDouble(cleaned);
} catch (NumberFormatException e) {
return 0.0;
}
}
public static int cleanRating(String ratingClass) {
if (ratingClass == null) return 0;
if (ratingClass.contains("Five")) return 5;
if (ratingClass.contains("Four")) return 4;
if (ratingClass.contains("Three")) return 3;
if (ratingClass.contains("Two")) return 2;
if (ratingClass.contains("One")) return 1;
return 0;
}
public static LocalDateTime cleanNewsTime(String timeStr) {
if (timeStr == null || timeStr.isEmpty()) return LocalDateTime.now();
try {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
return LocalDateTime.parse(timeStr, formatter);
} catch (Exception e) {
try {
DateTimeFormatter formatter2 = DateTimeFormatter.ofPattern("yyyy年MM月dd日 HH:mm");
return LocalDateTime.parse(timeStr, formatter2);
} catch (Exception e2) {
return LocalDateTime.now();
}
}
}
public static String cleanTitle(String title) {
if (title == null) return "";
return title.trim().replaceAll("\\s+", " ");
}
public static double cleanScore(String score) {
if (score == null || score.isEmpty()) return 0.0;
String cleaned = score.replaceAll("[^0-9.]", "");
try {
return Double.parseDouble(cleaned);
} catch (NumberFormatException e) {
return 0.0;
}
}
public static String[] extractWords(String text) {
if (text == null || text.isEmpty()) return new String[0];
String cleaned = text.replaceAll("[^\u4e00-\u9fa5a-zA-Z0-9]", " ");
return cleaned.split("\\s+");
}
public static boolean isStopWord(String word) {
return word == null || word.length() < 2 || STOP_WORDS.containsKey(word);
}
public static Map<String, Integer> countWordFrequency(String[] words) {
Map<String, Integer> frequency = new HashMap<>();
for (String word : words) {
if (isStopWord(word)) continue;
frequency.put(word, frequency.getOrDefault(word, 0) + 1);
}
return frequency;
}
public static int extractHour(LocalDateTime dateTime) {
return dateTime.getHour();
}
}

126
project/src/main/java/com/example/crawler/util/HttpUtil.java

@ -1,126 +0,0 @@
package com.example.crawler.util;
import com.example.crawler.exception.NetworkException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.Map;
/**
* HTTP工具类
* 封装HTTP请求操作使用Java 11内置HttpClient
*/
public class HttpUtil {
private static final HttpClient httpClient = HttpClient.newBuilder()
.connectTimeout(Duration.ofSeconds(30))
.followRedirects(HttpClient.Redirect.NORMAL)
.build();
private static final String DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
/**
* 发送GET请求
*
* @param url 请求URL
* @return 响应内容
* @throws NetworkException 网络异常
*/
public static String get(String url) throws NetworkException {
return get(url, Map.of());
}
/**
* 发送GET请求带请求头
*
* @param url 请求URL
* @param headers 请求头
* @return 响应内容
* @throws NetworkException 网络异常
*/
public static String get(String url, Map<String, String> headers) throws NetworkException {
try {
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(30))
.GET();
// 添加默认User-Agent
if (!headers.containsKey("User-Agent")) {
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT);
}
// 添加自定义请求头
headers.forEach(requestBuilder::header);
HttpRequest request = requestBuilder.build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode());
}
return response.body();
} catch (NetworkException e) {
throw e;
} catch (Exception e) {
throw new NetworkException("网络请求失败: " + e.getMessage(), e);
}
}
/**
* 发送POST请求
*
* @param url 请求URL
* @param body 请求体
* @param headers 请求头
* @return 响应内容
* @throws NetworkException 网络异常
*/
public static String post(String url, String body, Map<String, String> headers) throws NetworkException {
try {
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(body));
// 添加默认User-Agent
if (!headers.containsKey("User-Agent")) {
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT);
}
// 添加自定义请求头
headers.forEach(requestBuilder::header);
HttpRequest request = requestBuilder.build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode());
}
return response.body();
} catch (NetworkException e) {
throw e;
} catch (Exception e) {
throw new NetworkException("网络请求失败: " + e.getMessage(), e);
}
}
/**
* 设置请求间隔避免对服务器造成压力
*
* @param seconds 间隔秒数
*/
public static void sleep(int seconds) {
try {
Thread.sleep(seconds * 1000L);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}

95
project/src/main/java/com/example/crawler/util/JsonUtil.java

@ -1,95 +0,0 @@
package com.example.crawler.util;
import com.example.crawler.exception.DataSaveException;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
/**
* JSON工具类
* 封装JSON序列化和文件读写操作
*/
public class JsonUtil {
private static final Gson gson = new GsonBuilder()
.setPrettyPrinting()
.disableHtmlEscaping()
.create();
/**
* 将对象序列化为JSON字符串
*
* @param obj 对象
* @return JSON字符串
*/
public static String toJson(Object obj) {
return gson.toJson(obj);
}
/**
* 将JSON字符串反序列化为对象
*
* @param json JSON字符串
* @param classOfT 目标类
* @param <T> 泛型类型
* @return 反序列化后的对象
*/
public static <T> T fromJson(String json, Class<T> classOfT) {
return gson.fromJson(json, classOfT);
}
/**
* 将对象保存为JSON文件
*
* @param obj 对象
* @param filePath 文件路径
* @throws DataSaveException 数据保存异常
*/
public static void saveToJsonFile(Object obj, String filePath) throws DataSaveException {
try {
// 确保目录存在
Path path = Paths.get(filePath);
Path parentDir = path.getParent();
if (parentDir != null && !Files.exists(parentDir)) {
Files.createDirectories(parentDir);
}
try (FileWriter writer = new FileWriter(filePath)) {
gson.toJson(obj, writer);
}
} catch (IOException e) {
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e);
}
}
/**
* 将列表保存为JSON文件
*
* @param list 列表
* @param filePath 文件路径
* @param <T> 泛型类型
* @throws DataSaveException 数据保存异常
*/
public static <T> void saveListToJsonFile(List<T> list, String filePath) throws DataSaveException {
try {
// 确保目录存在
Path path = Paths.get(filePath);
Path parentDir = path.getParent();
if (parentDir != null && !Files.exists(parentDir)) {
Files.createDirectories(parentDir);
}
try (FileWriter writer = new FileWriter(filePath)) {
gson.toJson(list, writer);
}
} catch (IOException e) {
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e);
}
}
}

72
project/src/main/java/com/example/crawler/view/CrawlerView.java

@ -1,72 +0,0 @@
package com.example.crawler.view;
import java.util.Scanner;
/**
* 爬虫视图类
* // MVC模式:View层,负责CLI界面显示和用户交互
*/
public class CrawlerView {
/**
* 显示主菜单
*/
public void showMenu() {
System.out.println("\n=== 数据爬取与分析系统 ===");
System.out.println("1. 爬取书籍信息(toscrape.com)");
System.out.println("2. 爬取新浪国内新闻");
System.out.println("3. 爬取软科中国大学排名");
System.out.println("4. 爬取Open-Meteo实时天气");
System.out.println("5. 爬取全部数据并保存");
System.out.println("6. 保存当前数据到文件");
System.out.println("7. 生成所有数据源的分析报告与图表");
System.out.println("8. 爬取并分析所有数据(一键完成)");
System.out.println("9. 退出");
System.out.print("请选择操作:");
}
/**
* 获取用户输入
*
* @param scanner 输入扫描器
* @return 用户选择的数字
*/
public int getInput(Scanner scanner) {
try {
String input = scanner.nextLine().trim();
return Integer.parseInt(input);
} catch (NumberFormatException e) {
return -1; // 返回无效值
}
}
/**
* 显示错误信息
*
* @param message 错误信息
*/
public void showError(String message) {
System.err.println("错误: " + message);
}
/**
* 显示成功信息
*
* @param message 成功信息
*/
public void showSuccess(String message) {
System.out.println("成功: " + message);
}
/**
* 暂停并等待用户按回车键继续
*
* @param scanner 输入扫描器
*/
public void pause(Scanner scanner) {
System.out.print("\n按回车键继续...");
scanner.nextLine();
System.out.print("\033[H\033[2J");
System.out.flush();
}
}

BIN
project/target/classes/com/example/crawler/Main.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/chart/ChartGenerator.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/BaseCrawlCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/BookCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/Command.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/CrawlAllCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/CrawlAndAnalyzeAllCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/CrawlRankingCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/ExitCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/GenerateAllAnalysisCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/NewsCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/SaveCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/WeatherCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/constant/CrawlerConstants.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/controller/CrawlerController.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/exception/CrawlException.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/exception/DataSaveException.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/exception/NetworkException.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/exception/ParseException.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/model/Book.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/model/News.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/model/UniversityRank.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/model/Weather.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/repository/DataRepository.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/service/BookAnalysisService.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/service/NewsAnalysisService.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/service/RankingAnalysisService.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/service/WeatherAnalysisService.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/strategy/BookCrawlStrategy.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/strategy/CrawlStrategy.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/strategy/NewsCrawlStrategy.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/strategy/StrategyFactory.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/strategy/UniversityRankCrawlStrategy.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/strategy/WeatherCrawlStrategy.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/util/DataCleaner.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/util/HttpUtil.class

Binary file not shown.

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save