Browse Source

期末实验项目

main
jingjiaying 3 weeks ago
parent
commit
c2c18f627a
  1. 4
      java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java
  2. 10
      project/.idea/.gitignore
  3. 13
      project/.idea/compiler.xml
  4. 7
      project/.idea/encodings.xml
  5. 20
      project/.idea/jarRepositories.xml
  6. 12
      project/.idea/misc.xml
  7. 6
      project/.idea/vcs.xml
  8. BIN
      project/202506050211-靖佳颖-期末实验报告.docx
  9. BIN
      project/charts/news_time_trend.png
  10. BIN
      project/charts/news_top_words.png
  11. BIN
      project/charts/price_histogram.png
  12. BIN
      project/charts/province_bar.png
  13. BIN
      project/charts/province_distribution_2022.png
  14. BIN
      project/charts/province_distribution_2023.png
  15. BIN
      project/charts/province_distribution_2024.png
  16. BIN
      project/charts/rank_trend_上海交通大学.png
  17. BIN
      project/charts/rank_trend_北京大学.png
  18. BIN
      project/charts/rank_trend_复旦大学.png
  19. BIN
      project/charts/rank_trend_浙江大学.png
  20. BIN
      project/charts/rank_trend_清华大学.png
  21. BIN
      project/charts/rating_pie.png
  22. BIN
      project/charts/temperature_comparison.png
  23. BIN
      project/charts/temperature_上海.png
  24. BIN
      project/charts/temperature_北京.png
  25. BIN
      project/charts/temperature_广州.png
  26. BIN
      project/charts/top10_2022.png
  27. BIN
      project/charts/top10_2023.png
  28. BIN
      project/charts/top10_2024.png
  29. 21
      project/data/university_rank_2022.csv
  30. 21
      project/data/university_rank_2023.csv
  31. 21
      project/data/university_rank_2024.csv
  32. 45
      project/dependency-reduced-pom.xml
  33. 3602
      project/output/books_20260530_190333.json
  34. 82
      project/output/news_20260530_190333.json
  35. 212
      project/output/university_ranking_20260530_190333.json
  36. 335
      project/output/weather_20260530_190333.json
  37. 74
      project/pom.xml
  38. 14
      project/reports/book_analysis_report.txt
  39. 31
      project/reports/news_analysis_report.txt
  40. 17
      project/reports/ranking_analysis_report.txt
  41. 29
      project/reports/weather_analysis_report.txt
  42. 15
      project/src/main/java/com/example/crawler/Main.java
  43. 229
      project/src/main/java/com/example/crawler/chart/ChartGenerator.java
  44. 60
      project/src/main/java/com/example/crawler/command/BaseCrawlCommand.java
  45. 32
      project/src/main/java/com/example/crawler/command/BookCommand.java
  46. 20
      project/src/main/java/com/example/crawler/command/Command.java
  47. 45
      project/src/main/java/com/example/crawler/command/CrawlAllCommand.java
  48. 104
      project/src/main/java/com/example/crawler/command/CrawlAndAnalyzeAllCommand.java
  49. 32
      project/src/main/java/com/example/crawler/command/CrawlRankingCommand.java
  50. 19
      project/src/main/java/com/example/crawler/command/ExitCommand.java
  51. 77
      project/src/main/java/com/example/crawler/command/GenerateAllAnalysisCommand.java
  52. 32
      project/src/main/java/com/example/crawler/command/NewsCommand.java
  53. 74
      project/src/main/java/com/example/crawler/command/SaveCommand.java
  54. 32
      project/src/main/java/com/example/crawler/command/WeatherCommand.java
  55. 31
      project/src/main/java/com/example/crawler/constant/CrawlerConstants.java
  56. 90
      project/src/main/java/com/example/crawler/controller/CrawlerController.java
  57. 16
      project/src/main/java/com/example/crawler/exception/CrawlException.java
  58. 16
      project/src/main/java/com/example/crawler/exception/DataSaveException.java
  59. 16
      project/src/main/java/com/example/crawler/exception/NetworkException.java
  60. 16
      project/src/main/java/com/example/crawler/exception/ParseException.java
  61. 65
      project/src/main/java/com/example/crawler/model/Book.java
  62. 54
      project/src/main/java/com/example/crawler/model/News.java
  63. 76
      project/src/main/java/com/example/crawler/model/UniversityRank.java
  64. 140
      project/src/main/java/com/example/crawler/model/Weather.java
  65. 75
      project/src/main/java/com/example/crawler/repository/DataRepository.java
  66. 171
      project/src/main/java/com/example/crawler/service/BookAnalysisService.java
  67. 138
      project/src/main/java/com/example/crawler/service/NewsAnalysisService.java
  68. 189
      project/src/main/java/com/example/crawler/service/RankingAnalysisService.java
  69. 163
      project/src/main/java/com/example/crawler/service/WeatherAnalysisService.java
  70. 127
      project/src/main/java/com/example/crawler/strategy/BookCrawlStrategy.java
  71. 27
      project/src/main/java/com/example/crawler/strategy/CrawlStrategy.java
  72. 151
      project/src/main/java/com/example/crawler/strategy/NewsCrawlStrategy.java
  73. 24
      project/src/main/java/com/example/crawler/strategy/StrategyFactory.java
  74. 148
      project/src/main/java/com/example/crawler/strategy/UniversityRankCrawlStrategy.java
  75. 177
      project/src/main/java/com/example/crawler/strategy/WeatherCrawlStrategy.java
  76. 122
      project/src/main/java/com/example/crawler/util/DataCleaner.java
  77. 126
      project/src/main/java/com/example/crawler/util/HttpUtil.java
  78. 95
      project/src/main/java/com/example/crawler/util/JsonUtil.java
  79. 72
      project/src/main/java/com/example/crawler/view/CrawlerView.java
  80. 359
      project/src/main/java/com/university/Main.java
  81. 250
      project/src/main/java/com/university/analysis/RankAnalyzer.java
  82. 153
      project/src/main/java/com/university/crawler/UniversityRankCrawler.java
  83. 145
      project/src/main/java/com/university/model/RankChange.java
  84. 120
      project/src/main/java/com/university/model/University.java
  85. 171
      project/src/main/java/com/university/model/UniversityComparison.java
  86. 202
      project/src/main/java/com/university/storage/DataStorage.java
  87. 299
      project/src/main/java/com/university/visualization/ChartGenerator.java
  88. 241
      project/src/main/java/com/university/visualization/ConsoleReporter.java
  89. BIN
      project/target/classes/com/example/crawler/Main.class
  90. BIN
      project/target/classes/com/example/crawler/chart/ChartGenerator.class
  91. BIN
      project/target/classes/com/example/crawler/command/BaseCrawlCommand.class
  92. BIN
      project/target/classes/com/example/crawler/command/BookCommand.class
  93. BIN
      project/target/classes/com/example/crawler/command/Command.class
  94. BIN
      project/target/classes/com/example/crawler/command/CrawlAllCommand.class
  95. BIN
      project/target/classes/com/example/crawler/command/CrawlAndAnalyzeAllCommand.class
  96. BIN
      project/target/classes/com/example/crawler/command/CrawlRankingCommand.class
  97. BIN
      project/target/classes/com/example/crawler/command/ExitCommand.class
  98. BIN
      project/target/classes/com/example/crawler/command/GenerateAllAnalysisCommand.class
  99. BIN
      project/target/classes/com/example/crawler/command/NewsCommand.class
  100. BIN
      project/target/classes/com/example/crawler/command/SaveCommand.class

4
java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java

@ -0,0 +1,4 @@
package com.example.datacollect.command;
public class HistoryCommand {
}

10
project/.idea/.gitignore

@ -0,0 +1,10 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 已忽略包含查询文件的默认文件夹
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/

13
project/.idea/compiler.xml

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<annotationProcessing>
<profile name="Maven default annotation processors profile" enabled="true">
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="crawler-project" />
</profile>
</annotationProcessing>
</component>
</project>

7
project/.idea/encodings.xml

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="Encoding">
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" />
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" />
</component>
</project>

20
project/.idea/jarRepositories.xml

@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RemoteRepositoriesConfiguration">
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://repo.maven.apache.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Maven Central repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="jboss.community" />
<option name="name" value="JBoss Community repository" />
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
</remote-repository>
</component>
</project>

12
project/.idea/misc.xml

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_21" default="true" project-jdk-name="21" project-jdk-type="JavaSDK" />
</project>

6
project/.idea/vcs.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>

BIN
project/202506050211-靖佳颖-期末实验报告.docx

Binary file not shown.

BIN
project/charts/news_time_trend.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
project/charts/news_top_words.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
project/charts/price_histogram.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
project/charts/province_bar.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
project/charts/province_distribution_2022.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 54 KiB

BIN
project/charts/province_distribution_2023.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 55 KiB

BIN
project/charts/province_distribution_2024.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 55 KiB

BIN
project/charts/rank_trend_上海交通大学.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 16 KiB

BIN
project/charts/rank_trend_北京大学.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 15 KiB

BIN
project/charts/rank_trend_复旦大学.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

BIN
project/charts/rank_trend_浙江大学.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 14 KiB

BIN
project/charts/rank_trend_清华大学.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 17 KiB

BIN
project/charts/rating_pie.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
project/charts/temperature_comparison.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 44 KiB

BIN
project/charts/temperature_上海.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

BIN
project/charts/temperature_北京.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

BIN
project/charts/temperature_广州.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

BIN
project/charts/top10_2022.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

BIN
project/charts/top10_2023.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

BIN
project/charts/top10_2024.png

Binary file not shown.

Before

Width:  |  Height:  |  Size: 31 KiB

21
project/data/university_rank_2022.csv

@ -1,21 +0,0 @@
"排名","学校名称","省份","总分","年份"
"1","清华大学","北京","852.5","2022"
"2","北京大学","北京","848.2","2022"
"3","浙江大学","浙江","822.5","2022"
"4","上海交通大学","上海","815.3","2022"
"5","复旦大学","上海","805.1","2022"
"6","南京大学","江苏","785.6","2022"
"7","中国科学技术大学","安徽","782.4","2022"
"8","华中科技大学","湖北","765.8","2022"
"9","武汉大学","湖北","758.2","2022"
"10","西安交通大学","陕西","752.6","2022"
"11","中山大学","广东","745.3","2022"
"12","四川大学","四川","738.9","2022"
"13","哈尔滨工业大学","黑龙江","732.5","2022"
"14","北京航空航天大学","北京","725.8","2022"
"15","东南大学","江苏","718.4","2022"
"16","北京理工大学","北京","712.6","2022"
"17","同济大学","上海","705.3","2022"
"18","中国人民大学","北京","698.5","2022"
"19","北京师范大学","北京","692.1","2022"
"20","南开大学","天津","685.7","2022"
1 排名 学校名称 省份 总分 年份
2 1 清华大学 北京 852.5 2022
3 2 北京大学 北京 848.2 2022
4 3 浙江大学 浙江 822.5 2022
5 4 上海交通大学 上海 815.3 2022
6 5 复旦大学 上海 805.1 2022
7 6 南京大学 江苏 785.6 2022
8 7 中国科学技术大学 安徽 782.4 2022
9 8 华中科技大学 湖北 765.8 2022
10 9 武汉大学 湖北 758.2 2022
11 10 西安交通大学 陕西 752.6 2022
12 11 中山大学 广东 745.3 2022
13 12 四川大学 四川 738.9 2022
14 13 哈尔滨工业大学 黑龙江 732.5 2022
15 14 北京航空航天大学 北京 725.8 2022
16 15 东南大学 江苏 718.4 2022
17 16 北京理工大学 北京 712.6 2022
18 17 同济大学 上海 705.3 2022
19 18 中国人民大学 北京 698.5 2022
20 19 北京师范大学 北京 692.1 2022
21 20 南开大学 天津 685.7 2022

21
project/data/university_rank_2023.csv

@ -1,21 +0,0 @@
"排名","学校名称","省份","总分","年份"
"1","清华大学","北京","853.0","2023"
"2","北京大学","北京","848.7","2023"
"3","浙江大学","浙江","823.0","2023"
"4","上海交通大学","上海","815.8","2023"
"5","复旦大学","上海","805.6","2023"
"6","南京大学","江苏","786.1","2023"
"7","中国科学技术大学","安徽","782.9","2023"
"8","华中科技大学","湖北","766.3","2023"
"9","武汉大学","湖北","758.7","2023"
"10","西安交通大学","陕西","753.1","2023"
"11","中山大学","广东","745.8","2023"
"12","四川大学","四川","739.4","2023"
"13","哈尔滨工业大学","黑龙江","733.0","2023"
"14","北京航空航天大学","北京","726.3","2023"
"15","东南大学","江苏","718.9","2023"
"16","北京理工大学","北京","713.1","2023"
"17","同济大学","上海","705.8","2023"
"18","中国人民大学","北京","699.0","2023"
"19","北京师范大学","北京","692.6","2023"
"20","南开大学","天津","686.2","2023"
1 排名 学校名称 省份 总分 年份
2 1 清华大学 北京 853.0 2023
3 2 北京大学 北京 848.7 2023
4 3 浙江大学 浙江 823.0 2023
5 4 上海交通大学 上海 815.8 2023
6 5 复旦大学 上海 805.6 2023
7 6 南京大学 江苏 786.1 2023
8 7 中国科学技术大学 安徽 782.9 2023
9 8 华中科技大学 湖北 766.3 2023
10 9 武汉大学 湖北 758.7 2023
11 10 西安交通大学 陕西 753.1 2023
12 11 中山大学 广东 745.8 2023
13 12 四川大学 四川 739.4 2023
14 13 哈尔滨工业大学 黑龙江 733.0 2023
15 14 北京航空航天大学 北京 726.3 2023
16 15 东南大学 江苏 718.9 2023
17 16 北京理工大学 北京 713.1 2023
18 17 同济大学 上海 705.8 2023
19 18 中国人民大学 北京 699.0 2023
20 19 北京师范大学 北京 692.6 2023
21 20 南开大学 天津 686.2 2023

21
project/data/university_rank_2024.csv

@ -1,21 +0,0 @@
"排名","学校名称","省份","总分","年份"
"1","清华大学","北京","853.5","2024"
"2","北京大学","北京","849.2","2024"
"3","浙江大学","浙江","823.5","2024"
"4","上海交通大学","上海","816.3","2024"
"5","复旦大学","上海","806.1","2024"
"6","南京大学","江苏","786.6","2024"
"7","中国科学技术大学","安徽","783.4","2024"
"8","华中科技大学","湖北","766.8","2024"
"9","武汉大学","湖北","759.2","2024"
"10","西安交通大学","陕西","753.6","2024"
"11","中山大学","广东","746.3","2024"
"12","四川大学","四川","739.9","2024"
"13","哈尔滨工业大学","黑龙江","733.5","2024"
"14","北京航空航天大学","北京","726.8","2024"
"15","东南大学","江苏","719.4","2024"
"16","北京理工大学","北京","713.6","2024"
"17","同济大学","上海","706.3","2024"
"18","中国人民大学","北京","699.5","2024"
"19","北京师范大学","北京","693.1","2024"
"20","南开大学","天津","686.7","2024"
1 排名 学校名称 省份 总分 年份
2 1 清华大学 北京 853.5 2024
3 2 北京大学 北京 849.2 2024
4 3 浙江大学 浙江 823.5 2024
5 4 上海交通大学 上海 816.3 2024
6 5 复旦大学 上海 806.1 2024
7 6 南京大学 江苏 786.6 2024
8 7 中国科学技术大学 安徽 783.4 2024
9 8 华中科技大学 湖北 766.8 2024
10 9 武汉大学 湖北 759.2 2024
11 10 西安交通大学 陕西 753.6 2024
12 11 中山大学 广东 746.3 2024
13 12 四川大学 四川 739.9 2024
14 13 哈尔滨工业大学 黑龙江 733.5 2024
15 14 北京航空航天大学 北京 726.8 2024
16 15 东南大学 江苏 719.4 2024
17 16 北京理工大学 北京 713.6 2024
18 17 同济大学 上海 706.3 2024
19 18 中国人民大学 北京 699.5 2024
20 19 北京师范大学 北京 693.1 2024
21 20 南开大学 天津 686.7 2024

45
project/dependency-reduced-pom.xml

@ -1,22 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<groupId>com.university</groupId> <groupId>com.example</groupId>
<artifactId>university-rank-crawler</artifactId> <artifactId>crawler-project</artifactId>
<version>1.0-SNAPSHOT</version> <name>crawler-project</name>
<version>1.0.0</version>
<description>Java爬虫项目 - MVC + Command + Strategy模式</description>
<build> <build>
<plugins> <plugins>
<plugin> <plugin>
<artifactId>maven-compiler-plugin</artifactId> <artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version> <version>3.11.0</version>
<configuration> <configuration>
<source>11</source> <source>${java.version}</source>
<target>11</target> <target>${java.version}</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration> </configuration>
</plugin> </plugin>
<plugin> <plugin>
<artifactId>maven-shade-plugin</artifactId> <artifactId>maven-shade-plugin</artifactId>
<version>3.5.1</version> <version>3.5.0</version>
<executions> <executions>
<execution> <execution>
<phase>package</phase> <phase>package</phase>
@ -26,26 +29,36 @@
<configuration> <configuration>
<transformers> <transformers>
<transformer> <transformer>
<mainClass>com.university.Main</mainClass> <mainClass>com.example.crawler.Main</mainClass>
</transformer> </transformer>
</transformers> </transformers>
</configuration> </configuration>
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.1.1</version>
<configuration>
<mainClass>com.university.Main</mainClass>
</configuration>
</plugin>
</plugins> </plugins>
</build> </build>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>hamcrest-core</artifactId>
<groupId>org.hamcrest</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<properties> <properties>
<maven.compiler.target>11</maven.compiler.target> <java.version>11</java.version>
<maven.compiler.source>11</maven.compiler.source> <maven.compiler.source>11</maven.compiler.source>
<jsoup.version>1.17.2</jsoup.version>
<jfreechart.version>1.5.3</jfreechart.version>
<maven.compiler.target>11</maven.compiler.target>
<gson.version>2.10.1</gson.version>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties> </properties>
</project> </project>

3602
project/output/books_20260530_190333.json

File diff suppressed because it is too large

82
project/output/news_20260530_190333.json

@ -0,0 +1,82 @@
[
{
"title": "专栏",
"publishTime": "",
"url": "http://zhuanlan.sina.com.cn/"
},
{
"title": "导航",
"publishTime": "",
"url": "http://news.sina.com.cn/guide/"
},
{
"title": "新浪财经",
"publishTime": "",
"url": "https://finance.sina.com.cn/mobile/comfinanceweb.shtml"
},
{
"title": "新浪博客",
"publishTime": "",
"url": "https://blog.sina.com.cn/lm/z/app/"
},
{
"title": "我的收藏",
"publishTime": "",
"url": "http://my.sina.com.cn/#location=fav"
},
{
"title": "注册",
"publishTime": "",
"url": "https://login.sina.com.cn/signup/signup?entry=news"
},
{
"title": "新闻中心",
"publishTime": "",
"url": "http://news.sina.com.cn/"
},
{
"title": "新闻排行",
"publishTime": "",
"url": "http://news.sina.com.cn/hotnews/"
},
{
"title": "联系我们",
"publishTime": "",
"url": "http://www.sina.com.cn/contactus.html"
},
{
"title": "广告服务",
"publishTime": "",
"url": "http://emarketing.sina.com.cn/"
},
{
"title": "通行证注册",
"publishTime": "",
"url": "http://login.sina.com.cn/signup/signup"
},
{
"title": "产品答疑",
"publishTime": "",
"url": "http://help.sina.com.cn/"
},
{
"title": "招聘信息",
"publishTime": "",
"url": "http://career.sina.com.cn/"
},
{
"title": "网站律师",
"publishTime": "",
"url": "http://corp.sina.com.cn/lawfirm/sina.htm"
},
{
"title": "版权所有",
"publishTime": "",
"url": "https://corp.sina.com.cn/chn/copyright.html"
},
{
"title": "意见反馈",
"publishTime": "",
"url": "http://news.sina.com.cn/feedback/post.html"
}
]

212
project/output/university_ranking_20260530_190333.json

@ -0,0 +1,212 @@
[
{
"rank": 1,
"universityName": "清华大学 Tsinghua University 双一流/985/211",
"totalScore": "综合",
"province": "北京",
"category": ""
},
{
"rank": 2,
"universityName": "北京大学 Peking University 双一流/985/211",
"totalScore": "综合",
"province": "北京",
"category": ""
},
{
"rank": 3,
"universityName": "浙江大学 Zhejiang University 双一流/985/211",
"totalScore": "综合",
"province": "浙江",
"category": ""
},
{
"rank": 4,
"universityName": "上海交通大学 Shanghai Jiao Tong University 双一流/985/211",
"totalScore": "综合",
"province": "上海",
"category": ""
},
{
"rank": 5,
"universityName": "复旦大学 Fudan University 双一流/985/211",
"totalScore": "综合",
"province": "上海",
"category": ""
},
{
"rank": 6,
"universityName": "南京大学 Nanjing University 双一流/985/211",
"totalScore": "综合",
"province": "江苏",
"category": ""
},
{
"rank": 7,
"universityName": "中国科学技术大学 University of Science and Technology of China 双一流/985/211",
"totalScore": "理工",
"province": "安徽",
"category": ""
},
{
"rank": 8,
"universityName": "武汉大学 Wuhan University 双一流/985/211",
"totalScore": "综合",
"province": "湖北",
"category": ""
},
{
"rank": 9,
"universityName": "华中科技大学 Huazhong University of Science and Technology 双一流/985/211",
"totalScore": "综合",
"province": "湖北",
"category": ""
},
{
"rank": 10,
"universityName": "西安交通大学 Xi'an Jiaotong University 双一流/985/211",
"totalScore": "综合",
"province": "陕西",
"category": ""
},
{
"rank": 11,
"universityName": "北京航空航天大学 Beihang University 双一流/985/211",
"totalScore": "理工",
"province": "北京",
"category": ""
},
{
"rank": 12,
"universityName": "中山大学 Sun Yat-sen University 双一流/985/211",
"totalScore": "综合",
"province": "广东",
"category": ""
},
{
"rank": 13,
"universityName": "北京理工大学 Beijing Institute of Technology 双一流/985/211",
"totalScore": "理工",
"province": "北京",
"category": ""
},
{
"rank": 14,
"universityName": "哈尔滨工业大学 Harbin Institute of Technology 双一流/985/211",
"totalScore": "理工",
"province": "黑龙江",
"category": ""
},
{
"rank": 15,
"universityName": "四川大学 Sichuan University 双一流/985/211",
"totalScore": "综合",
"province": "四川",
"category": ""
},
{
"rank": 16,
"universityName": "东南大学 Southeast University 双一流/985/211",
"totalScore": "综合",
"province": "江苏",
"category": ""
},
{
"rank": 17,
"universityName": "中国人民大学 Renmin University of China 双一流/985/211",
"totalScore": "综合",
"province": "北京",
"category": ""
},
{
"rank": 18,
"universityName": "同济大学 Tongji University 双一流/985/211",
"totalScore": "综合",
"province": "上海",
"category": ""
},
{
"rank": 19,
"universityName": "北京师范大学 Beijing Normal University 双一流/985/211",
"totalScore": "师范",
"province": "北京",
"category": ""
},
{
"rank": 20,
"universityName": "天津大学 Tianjin University 双一流/985/211",
"totalScore": "理工",
"province": "天津",
"category": ""
},
{
"rank": 21,
"universityName": "西北工业大学 Northwestern Polytechnical University 双一流/985/211",
"totalScore": "理工",
"province": "陕西",
"category": ""
},
{
"rank": 22,
"universityName": "山东大学 Shandong University 双一流/985/211",
"totalScore": "综合",
"province": "山东",
"category": ""
},
{
"rank": 23,
"universityName": "南开大学 Nankai University 双一流/985/211",
"totalScore": "综合",
"province": "天津",
"category": ""
},
{
"rank": 24,
"universityName": "厦门大学 Xiamen University 双一流/985/211",
"totalScore": "综合",
"province": "福建",
"category": ""
},
{
"rank": 25,
"universityName": "中国农业大学 China Agricultural University 双一流/985/211",
"totalScore": "农业",
"province": "北京",
"category": ""
},
{
"rank": 26,
"universityName": "吉林大学 Jilin University 双一流/985/211",
"totalScore": "综合",
"province": "吉林",
"category": ""
},
{
"rank": 27,
"universityName": "中南大学 Central South University 双一流/985/211",
"totalScore": "综合",
"province": "湖南",
"category": ""
},
{
"rank": 28,
"universityName": "大连理工大学 Dalian University of Technology 双一流/985/211",
"totalScore": "理工",
"province": "辽宁",
"category": ""
},
{
"rank": 29,
"universityName": "湖南大学 Hunan University 双一流/985/211",
"totalScore": "综合",
"province": "湖南",
"category": ""
},
{
"rank": 30,
"universityName": "华东师范大学 East China Normal University 双一流/985/211",
"totalScore": "师范",
"province": "上海",
"category": ""
}
]

335
project/output/weather_20260530_190333.json

@ -0,0 +1,335 @@
[
{
"cityName": "上海",
"temperature": 22.7,
"humidity": 83.0,
"windSpeed": 7.8,
"weatherCode": "3",
"hourlyTimes": [
"00:00",
"01:00",
"02:00",
"03:00",
"04:00",
"05:00",
"06:00",
"07:00",
"08:00",
"09:00",
"10:00",
"11:00",
"12:00",
"13:00",
"14:00",
"15:00",
"16:00",
"17:00",
"18:00",
"19:00",
"20:00",
"21:00",
"22:00",
"23:00"
],
"hourlyTemperatures": [
19.2,
19.0,
18.9,
18.3,
18.1,
17.8,
18.7,
20.9,
23.5,
24.9,
26.2,
27.0,
27.5,
28.1,
28.2,
27.4,
26.7,
25.0,
23.8,
22.7,
22.0,
20.6,
19.9,
19.4
],
"hourlyHumidities": [
83,
84,
85,
87,
89,
92,
90,
79,
55,
43,
38,
34,
33,
31,
30,
32,
35,
45,
54,
63,
67,
73,
76,
78
],
"hourlyWindSpeeds": [
3.8,
3.3,
2.6,
1.9,
1.0,
0.6,
2.3,
0.6,
1.8,
2.7,
3.0,
3.5,
5.4,
5.4,
6.0,
7.8,
9.2,
9.0,
8.1,
7.8,
7.2,
7.1,
7.1,
7.1
]
},
{
"cityName": "广州",
"temperature": 25.9,
"humidity": 85.0,
"windSpeed": 5.3,
"weatherCode": "81",
"hourlyTimes": [
"00:00",
"01:00",
"02:00",
"03:00",
"04:00",
"05:00",
"06:00",
"07:00",
"08:00",
"09:00",
"10:00",
"11:00",
"12:00",
"13:00",
"14:00",
"15:00",
"16:00",
"17:00",
"18:00",
"19:00",
"20:00",
"21:00",
"22:00",
"23:00"
],
"hourlyTemperatures": [
27.7,
27.2,
26.0,
25.5,
25.4,
25.0,
25.0,
26.0,
28.1,
29.3,
30.6,
31.9,
33.0,
33.8,
33.9,
33.6,
34.2,
30.5,
29.4,
25.9,
26.4,
26.5,
26.3,
26.2
],
"hourlyHumidities": [
85,
87,
82,
84,
85,
90,
92,
87,
76,
70,
63,
57,
54,
53,
53,
54,
51,
69,
72,
95,
97,
96,
98,
98
],
"hourlyWindSpeeds": [
5.8,
4.9,
4.4,
3.3,
3.4,
3.8,
4.1,
5.6,
4.0,
3.8,
4.0,
2.8,
1.3,
3.3,
5.1,
5.2,
5.1,
12.3,
3.1,
5.3,
3.6,
1.7,
2.0,
1.4
]
},
{
"cityName": "北京",
"temperature": 32.3,
"humidity": 56.0,
"windSpeed": 17.1,
"weatherCode": "0",
"hourlyTimes": [
"00:00",
"01:00",
"02:00",
"03:00",
"04:00",
"05:00",
"06:00",
"07:00",
"08:00",
"09:00",
"10:00",
"11:00",
"12:00",
"13:00",
"14:00",
"15:00",
"16:00",
"17:00",
"18:00",
"19:00",
"20:00",
"21:00",
"22:00",
"23:00"
],
"hourlyTemperatures": [
22.8,
21.9,
21.2,
20.1,
19.6,
18.8,
19.2,
20.7,
23.7,
27.0,
29.9,
32.5,
34.5,
35.8,
36.3,
36.6,
36.2,
35.7,
34.2,
32.3,
30.9,
29.9,
29.1,
28.6
],
"hourlyHumidities": [
56,
60,
63,
69,
71,
75,
74,
67,
57,
45,
37,
28,
21,
18,
20,
21,
26,
26,
30,
33,
35,
36,
35,
34
],
"hourlyWindSpeeds": [
11.6,
10.6,
7.6,
4.5,
3.9,
2.3,
2.3,
0.6,
0.8,
2.2,
2.4,
4.9,
7.6,
10.4,
12.2,
13.4,
14.7,
15.1,
14.5,
17.1,
16.9,
18.1,
19.7,
20.1
]
}
]

74
project/pom.xml

@ -1,72 +1,80 @@
<?xml version="1.0" encoding="UTF-8"?> <?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" <project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion> <modelVersion>4.0.0</modelVersion>
<!-- 项目基本信息 --> <groupId>com.example</groupId>
<groupId>com.university</groupId> <artifactId>crawler-project</artifactId>
<artifactId>university-rank-crawler</artifactId> <version>1.0.0</version>
<version>1.0-SNAPSHOT</version> <name>crawler-project</name>
<packaging>jar</packaging> <description>Java爬虫项目 - MVC + Command + Strategy模式</description>
<properties> <properties>
<!-- 设置Java版本为11 --> <java.version>11</java.version>
<jsoup.version>1.17.2</jsoup.version>
<gson.version>2.10.1</gson.version>
<jfreechart.version>1.5.3</jfreechart.version>
<logback.version>1.4.14</logback.version>
<maven.compiler.source>11</maven.compiler.source> <maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target> <maven.compiler.target>11</maven.compiler.target>
<!-- 设置编码 -->
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties> </properties>
<dependencies> <dependencies>
<!-- Jsoup: 用于发送HTTP请求和解析HTML --> <!-- Jsoup HTML解析 -->
<dependency> <dependency>
<groupId>org.jsoup</groupId> <groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId> <artifactId>jsoup</artifactId>
<version>1.16.2</version> <version>${jsoup.version}</version>
</dependency> </dependency>
<!-- JFreeChart: 用于生成图表 --> <!-- Gson JSON处理 -->
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>${gson.version}</version>
</dependency>
<!-- JFreeChart 图表生成 -->
<dependency> <dependency>
<groupId>org.jfree</groupId> <groupId>org.jfree</groupId>
<artifactId>jfreechart</artifactId> <artifactId>jfreechart</artifactId>
<version>1.5.3</version> <version>${jfreechart.version}</version>
</dependency> </dependency>
<!-- OpenCSV: 用于读写CSV文件 --> <!-- Logback 日志框架 -->
<dependency> <dependency>
<groupId>com.opencsv</groupId> <groupId>ch.qos.logback</groupId>
<artifactId>opencsv</artifactId> <artifactId>logback-classic</artifactId>
<version>5.8</version> <version>${logback.version}</version>
</dependency> </dependency>
<!-- SLF4J: 日志接口 --> <!-- 测试依赖 -->
<dependency> <dependency>
<groupId>org.slf4j</groupId> <groupId>junit</groupId>
<artifactId>slf4j-simple</artifactId> <artifactId>junit</artifactId>
<version>2.0.9</version> <version>4.13.2</version>
<scope>test</scope>
</dependency> </dependency>
</dependencies> </dependencies>
<build> <build>
<plugins> <plugins>
<!-- Maven编译插件 -->
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId> <artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version> <version>3.11.0</version>
<configuration> <configuration>
<source>11</source> <source>${java.version}</source>
<target>11</target> <target>${java.version}</target>
<encoding>${project.build.sourceEncoding}</encoding>
</configuration> </configuration>
</plugin> </plugin>
<!-- Maven打包插件,包含依赖 -->
<plugin> <plugin>
<groupId>org.apache.maven.plugins</groupId> <groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId> <artifactId>maven-shade-plugin</artifactId>
<version>3.5.1</version> <version>3.5.0</version>
<executions> <executions>
<execution> <execution>
<phase>package</phase> <phase>package</phase>
@ -76,23 +84,13 @@
<configuration> <configuration>
<transformers> <transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> <transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.university.Main</mainClass> <mainClass>com.example.crawler.Main</mainClass>
</transformer> </transformer>
</transformers> </transformers>
</configuration> </configuration>
</execution> </execution>
</executions> </executions>
</plugin> </plugin>
<!-- Exec插件,用于运行程序 -->
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.1.1</version>
<configuration>
<mainClass>com.university.Main</mainClass>
</configuration>
</plugin>
</plugins> </plugins>
</build> </build>
</project> </project>

14
project/reports/book_analysis_report.txt

@ -0,0 +1,14 @@
========== 书籍数据分析报告 ==========
生成时间: 2026-05-30T17:47:42.026682900
分析书籍总数: 600
【价格统计】
最高价: £59.92
最低价: £10.01
平均价: £35.29
【库存统计】
有库存: 600 本
缺货: 0 本
报告生成完成

31
project/reports/news_analysis_report.txt

@ -0,0 +1,31 @@
========== 新闻数据分析报告 ==========
生成时间: 2026-05-30T17:47:42.145591
分析新闻总数: 16
【发布时间分布】
00:00 - 01:00: 0 条
01:00 - 02:00: 0 条
02:00 - 03:00: 0 条
03:00 - 04:00: 0 条
04:00 - 05:00: 0 条
05:00 - 06:00: 0 条
06:00 - 07:00: 0 条
07:00 - 08:00: 0 条
08:00 - 09:00: 0 条
09:00 - 10:00: 0 条
10:00 - 11:00: 0 条
11:00 - 12:00: 0 条
12:00 - 13:00: 0 条
13:00 - 14:00: 0 条
14:00 - 15:00: 0 条
15:00 - 16:00: 0 条
16:00 - 17:00: 0 条
17:00 - 18:00: 16 条
18:00 - 19:00: 0 条
19:00 - 20:00: 0 条
20:00 - 21:00: 0 条
21:00 - 22:00: 0 条
22:00 - 23:00: 0 条
23:00 - 00:00: 0 条
报告生成完成

17
project/reports/ranking_analysis_report.txt

@ -0,0 +1,17 @@
========== 大学排名数据分析报告 ==========
生成时间: 2026-05-30T17:47:42.272388
分析大学总数: 30
【省份排行榜 TOP 10】
北京: 7 所大学
上海: 4 所大学
湖北: 2 所大学
湖南: 2 所大学
天津: 2 所大学
陕西: 2 所大学
江苏: 2 所大学
山东: 1 所大学
福建: 1 所大学
吉林: 1 所大学
报告生成完成

29
project/reports/weather_analysis_report.txt

@ -0,0 +1,29 @@
========== 天气数据分析报告 ==========
生成时间: 2026-05-30T17:47:42.585539200
分析城市数量: 3
数据来源: Open-Meteo API (CC BY 4.0)
【多城市天气对比】
城市: 上海
当前温度: 24.0°C
当前湿度: 83%
风速: 8.3 km/h
天气: 多云
24小时平均温度: 22.7°C
城市: 广州
当前温度: 29.8°C
当前湿度: 85%
风速: 2.4 km/h
天气: 小毛毛雨
24小时平均温度: 28.6°C
城市: 北京
当前温度: 34.6°C
当前湿度: 56%
风速: 14.4 km/h
天气: 晴
24小时平均温度: 28.2°C
报告生成完成

15
project/src/main/java/com/example/crawler/Main.java

@ -0,0 +1,15 @@
package com.example.crawler;
import com.example.crawler.controller.CrawlerController;
/**
* 爬虫项目主入口类
*/
public class Main {
public static void main(String[] args) {
// 创建控制器并启动CLI界面
CrawlerController controller = new CrawlerController();
controller.start();
}
}

229
project/src/main/java/com/example/crawler/chart/ChartGenerator.java

@ -0,0 +1,229 @@
package com.example.crawler.chart;
import java.awt.Color;
import java.awt.Font;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.example.crawler.constant.CrawlerConstants;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.axis.CategoryAxis;
import org.jfree.chart.axis.NumberAxis;
import org.jfree.chart.plot.CategoryPlot;
import org.jfree.chart.plot.PiePlot;
import org.jfree.chart.plot.XYPlot;
import org.jfree.chart.renderer.category.BarRenderer;
import org.jfree.chart.renderer.category.LineAndShapeRenderer;
import org.jfree.data.category.DefaultCategoryDataset;
import org.jfree.data.general.DefaultPieDataset;
import org.jfree.data.xy.XYDataset;
import org.jfree.data.xy.XYSeries;
import org.jfree.data.xy.XYSeriesCollection;
public class ChartGenerator {
static {
File dir = new File(CrawlerConstants.CHARTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public static void generatePriceHistogram(Map<String, Integer> priceDistribution, String fileName) {
DefaultCategoryDataset dataset = createCategoryDataset(priceDistribution);
JFreeChart chart = ChartFactory.createBarChart(
"书籍价格分布",
"价格区间(£)",
"书籍数量",
dataset
);
customizeBarChart(chart);
saveChart(chart, fileName);
}
public static void generateRatingPieChart(Map<String, Integer> ratingDistribution, String fileName) {
DefaultPieDataset<String> dataset = new DefaultPieDataset<>();
for (Map.Entry<String, Integer> entry : ratingDistribution.entrySet()) {
dataset.setValue(entry.getKey(), entry.getValue());
}
JFreeChart chart = ChartFactory.createPieChart(
"书籍评分分布",
dataset,
true,
true,
false
);
customizePieChart(chart);
saveChart(chart, fileName);
}
public static void generateNewsTimeTrend(Map<Integer, Integer> hourDistribution, String fileName) {
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
for (int i = 0; i < 24; i++) {
int count = hourDistribution.getOrDefault(i, 0);
dataset.addValue(count, "新闻数量", String.format("%02d:00", i));
}
JFreeChart chart = ChartFactory.createLineChart(
"新闻发布时间分布",
"小时",
"新闻数量",
dataset
);
customizeLineChart(chart);
saveChart(chart, fileName);
}
public static void generateWordFrequencyBarChart(Map<String, Integer> wordFrequency, String fileName) {
Map<String, Integer> top10 = wordFrequency.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(10)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
for (Map.Entry<String, Integer> entry : top10.entrySet()) {
dataset.addValue(entry.getValue(), "词频", entry.getKey());
}
JFreeChart chart = ChartFactory.createBarChart(
"新闻高频词 TOP 10",
"关键词",
"出现次数",
dataset
);
customizeBarChart(chart);
saveChart(chart, fileName);
}
public static void generateProvinceBarChart(Map<String, Integer> provinceDistribution, String fileName) {
Map<String, Integer> top10 = provinceDistribution.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(10)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
DefaultCategoryDataset dataset = createCategoryDataset(top10);
JFreeChart chart = ChartFactory.createBarChart(
"各省上榜大学数量 TOP 10",
"省份",
"大学数量",
dataset
);
customizeBarChart(chart);
saveChart(chart, fileName);
}
public static void generateScoreHistogram(Map<String, Integer> scoreDistribution, String fileName) {
DefaultCategoryDataset dataset = createCategoryDataset(scoreDistribution);
JFreeChart chart = ChartFactory.createBarChart(
"大学总分分布",
"分数区间",
"大学数量",
dataset
);
customizeBarChart(chart);
saveChart(chart, fileName);
}
public static void generateTemperatureTrend(List<String> times, List<Double> temperatures, String cityName, String fileName) {
XYSeries series = new XYSeries(cityName);
for (int i = 0; i < Math.min(times.size(), temperatures.size()); i++) {
series.add(i, temperatures.get(i));
}
XYDataset dataset = new XYSeriesCollection(series);
JFreeChart chart = ChartFactory.createXYLineChart(
cityName + " 未来24小时温度变化",
"小时",
"温度(°C)",
dataset
);
customizeXYLineChart(chart);
saveChart(chart, fileName);
}
public static void generateMultiCityTemperatureComparison(Map<String, List<Double>> cityTemperatures, String fileName) {
XYSeriesCollection dataset = new XYSeriesCollection();
for (Map.Entry<String, List<Double>> entry : cityTemperatures.entrySet()) {
XYSeries series = new XYSeries(entry.getKey());
List<Double> temps = entry.getValue();
for (int i = 0; i < Math.min(temps.size(), 24); i++) {
series.add(i, temps.get(i));
}
dataset.addSeries(series);
}
JFreeChart chart = ChartFactory.createXYLineChart(
"多城市未来24小时温度对比",
"小时",
"温度(°C)",
dataset
);
customizeXYLineChart(chart);
saveChart(chart, fileName);
}
private static DefaultCategoryDataset createCategoryDataset(Map<String, Integer> data) {
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
for (Map.Entry<String, Integer> entry : data.entrySet()) {
dataset.addValue(entry.getValue(), "数值", entry.getKey());
}
return dataset;
}
private static void customizeBarChart(JFreeChart chart) {
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16));
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
CategoryPlot plot = chart.getCategoryPlot();
CategoryAxis domainAxis = plot.getDomainAxis();
domainAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
domainAxis.setTickLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 10));
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis();
rangeAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
BarRenderer renderer = (BarRenderer) plot.getRenderer();
renderer.setSeriesPaint(0, new Color(79, 129, 189));
}
private static void customizePieChart(JFreeChart chart) {
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16));
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
PiePlot plot = (PiePlot) chart.getPlot();
plot.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
}
private static void customizeLineChart(JFreeChart chart) {
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16));
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
CategoryPlot plot = chart.getCategoryPlot();
LineAndShapeRenderer renderer = (LineAndShapeRenderer) plot.getRenderer();
renderer.setSeriesPaint(0, new Color(79, 129, 189));
}
private static void customizeXYLineChart(JFreeChart chart) {
chart.getTitle().setFont(new Font("Microsoft YaHei", Font.BOLD, 16));
chart.getLegend().setItemFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
XYPlot plot = chart.getXYPlot();
NumberAxis xAxis = (NumberAxis) plot.getDomainAxis();
xAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
NumberAxis yAxis = (NumberAxis) plot.getRangeAxis();
yAxis.setLabelFont(new Font("Microsoft YaHei", Font.PLAIN, 12));
}
private static void saveChart(JFreeChart chart, String fileName) {
try {
File file = new File(CrawlerConstants.CHARTS_DIR, fileName);
ChartUtils.saveChartAsPNG(file, chart, 800, 500);
System.out.println("图表已保存: " + file.getAbsolutePath());
} catch (IOException e) {
System.err.println("保存图表失败: " + e.getMessage());
}
}
}

60
project/src/main/java/com/example/crawler/command/BaseCrawlCommand.java

@ -0,0 +1,60 @@
package com.example.crawler.command;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.CrawlStrategy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class BaseCrawlCommand implements Command {
protected static final Logger logger = LoggerFactory.getLogger(BaseCrawlCommand.class);
protected DataRepository repository;
protected int maxRetries;
protected long retryDelayMs;
public BaseCrawlCommand(DataRepository repository) {
this.repository = repository;
this.maxRetries = CrawlerConstants.MAX_RETRIES;
this.retryDelayMs = 2000;
}
protected abstract CrawlStrategy<?> getStrategy();
protected abstract void saveToRepository(Object data);
@Override
public void execute() {
try {
Object data = crawlWithRetry();
saveToRepository(data);
logger.info("Crawling completed and saved to repository");
} catch (Exception e) {
logger.error("Crawling failed", e);
System.err.println("爬取失败: " + e.getMessage());
}
}
protected Object crawlWithRetry() throws Exception {
int attempts = 0;
while (attempts < maxRetries) {
try {
CrawlStrategy<?> strategy = getStrategy();
return strategy.crawl();
} catch (NetworkException e) {
attempts++;
if (attempts < maxRetries) {
logger.warn("Network error, retrying in {}ms (attempt {}/{})", retryDelayMs, attempts, maxRetries);
Thread.sleep(retryDelayMs);
} else {
logger.error("Max retries reached, giving up");
throw e;
}
}
}
throw new CrawlException("Max retries exceeded");
}
}

32
project/src/main/java/com/example/crawler/command/BookCommand.java

@ -0,0 +1,32 @@
package com.example.crawler.command;
import com.example.crawler.model.Book;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.BookCrawlStrategy;
import com.example.crawler.strategy.CrawlStrategy;
import java.util.List;
public class BookCommand extends BaseCrawlCommand {
public BookCommand(DataRepository repository) {
super(repository);
}
@Override
protected CrawlStrategy<?> getStrategy() {
return new BookCrawlStrategy();
}
@Override
@SuppressWarnings("unchecked")
protected void saveToRepository(Object data) {
repository.saveBooks((List<Book>) data);
System.out.println("成功爬取 " + ((List<Book>) data).size() + " 本书籍信息");
}
@Override
public String getName() {
return "爬取书籍信息";
}
}

20
project/src/main/java/com/example/crawler/command/Command.java

@ -0,0 +1,20 @@
package com.example.crawler.command;
/**
* 命令接口
* 定义命令执行的标准方法实现Command模式
*/
public interface Command {
/**
* 执行命令
*/
void execute();
/**
* 获取命令名称
*
* @return 命令名称
*/
String getName();
}

45
project/src/main/java/com/example/crawler/command/CrawlAllCommand.java

@ -0,0 +1,45 @@
package com.example.crawler.command;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.repository.DataRepository;
public class CrawlAllCommand implements Command {
private final DataRepository repository;
private final CrawlerController controller;
public CrawlAllCommand(CrawlerController controller) {
this.controller = controller;
this.repository = controller.getRepository();
}
@Override
public void execute() {
System.out.println("\n=== 开始爬取全部数据源 ===");
Command[] commands = {
new BookCommand(repository),
new NewsCommand(repository),
new CrawlRankingCommand(repository),
new WeatherCommand(repository)
};
for (Command command : commands) {
command.execute();
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
new SaveCommand(controller).execute();
System.out.println("\n=== 全部数据爬取完成 ===");
}
@Override
public String getName() {
return "爬取全部数据并保存";
}
}

104
project/src/main/java/com/example/crawler/command/CrawlAndAnalyzeAllCommand.java

@ -0,0 +1,104 @@
package com.example.crawler.command;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.service.BookAnalysisService;
import com.example.crawler.service.NewsAnalysisService;
import com.example.crawler.service.RankingAnalysisService;
import com.example.crawler.service.WeatherAnalysisService;
public class CrawlAndAnalyzeAllCommand implements Command {
private final DataRepository repository;
private final CrawlerController controller;
public CrawlAndAnalyzeAllCommand(CrawlerController controller) {
this.controller = controller;
this.repository = controller.getRepository();
}
@Override
public void execute() {
System.out.println("\n========== 爬取全部数据并生成分析 ==========\n");
System.out.println("第1步:爬取书籍信息...");
try {
BookCommand bookCommand = new BookCommand(repository);
bookCommand.execute();
} catch (Exception e) {
System.err.println("书籍爬取失败: " + e.getMessage());
}
System.out.println("\n第2步:爬取新闻信息...");
try {
NewsCommand newsCommand = new NewsCommand(repository);
newsCommand.execute();
} catch (Exception e) {
System.err.println("新闻爬取失败: " + e.getMessage());
}
System.out.println("\n第3步:爬取大学排名...");
try {
CrawlRankingCommand rankingCommand = new CrawlRankingCommand(repository);
rankingCommand.execute();
} catch (Exception e) {
System.err.println("大学排名爬取失败: " + e.getMessage());
}
System.out.println("\n第4步:爬取天气数据...");
try {
WeatherCommand weatherCommand = new WeatherCommand(repository);
weatherCommand.execute();
} catch (Exception e) {
System.err.println("天气数据爬取失败: " + e.getMessage());
}
System.out.println("\n========== 数据爬取完成,开始分析 ==========\n");
try {
BookAnalysisService bookService = new BookAnalysisService();
if (!repository.getBooks().isEmpty()) {
bookService.analyze(repository.getBooks());
}
} catch (Exception e) {
System.err.println("书籍分析失败: " + e.getMessage());
}
try {
NewsAnalysisService newsService = new NewsAnalysisService();
if (!repository.getNewsList().isEmpty()) {
newsService.analyze(repository.getNewsList());
}
} catch (Exception e) {
System.err.println("新闻分析失败: " + e.getMessage());
}
try {
RankingAnalysisService rankingService = new RankingAnalysisService();
if (!repository.getRankings().isEmpty()) {
rankingService.analyze(repository.getRankings());
}
} catch (Exception e) {
System.err.println("大学排名分析失败: " + e.getMessage());
}
try {
WeatherAnalysisService weatherService = new WeatherAnalysisService();
if (!repository.getWeatherList().isEmpty()) {
weatherService.analyze(repository.getWeatherList());
}
} catch (Exception e) {
System.err.println("天气分析失败: " + e.getMessage());
}
System.out.println("\n========== 全部完成 ==========");
System.out.println("原始数据已保存到 output/ 目录");
System.out.println("分析报告已保存到 reports/ 目录");
System.out.println("图表已保存到 charts/ 目录");
}
@Override
public String getName() {
return "爬取并分析全部数据";
}
}

32
project/src/main/java/com/example/crawler/command/CrawlRankingCommand.java

@ -0,0 +1,32 @@
package com.example.crawler.command;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.CrawlStrategy;
import com.example.crawler.strategy.UniversityRankCrawlStrategy;
import java.util.List;
public class CrawlRankingCommand extends BaseCrawlCommand {
public CrawlRankingCommand(DataRepository repository) {
super(repository);
}
@Override
protected CrawlStrategy<?> getStrategy() {
return new UniversityRankCrawlStrategy();
}
@Override
@SuppressWarnings("unchecked")
protected void saveToRepository(Object data) {
repository.saveRankings((List<UniversityRank>) data);
System.out.println("成功爬取 " + ((List<UniversityRank>) data).size() + " 条大学排名数据");
}
@Override
public String getName() {
return "爬取软科中国大学排名";
}
}

19
project/src/main/java/com/example/crawler/command/ExitCommand.java

@ -0,0 +1,19 @@
package com.example.crawler.command;
/**
* 退出命令
* // Command模式:退出命令
*/
public class ExitCommand implements Command {
@Override
public void execute() {
System.out.println("\n=== 感谢使用数据爬取系统 ===");
System.exit(0);
}
@Override
public String getName() {
return "退出";
}
}

77
project/src/main/java/com/example/crawler/command/GenerateAllAnalysisCommand.java

@ -0,0 +1,77 @@
package com.example.crawler.command;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.service.BookAnalysisService;
import com.example.crawler.service.NewsAnalysisService;
import com.example.crawler.service.RankingAnalysisService;
import com.example.crawler.service.WeatherAnalysisService;
public class GenerateAllAnalysisCommand implements Command {
private final DataRepository repository;
private final CrawlerController controller;
public GenerateAllAnalysisCommand(CrawlerController controller) {
this.controller = controller;
this.repository = controller.getRepository();
}
@Override
public void execute() {
System.out.println("\n========== 生成所有数据源分析报告 ==========\n");
try {
BookAnalysisService bookService = new BookAnalysisService();
if (!repository.getBooks().isEmpty()) {
bookService.analyze(repository.getBooks());
} else {
System.out.println("没有书籍数据,跳过书籍分析");
}
} catch (Exception e) {
System.err.println("书籍分析失败: " + e.getMessage());
}
try {
NewsAnalysisService newsService = new NewsAnalysisService();
if (!repository.getNewsList().isEmpty()) {
newsService.analyze(repository.getNewsList());
} else {
System.out.println("没有新闻数据,跳过新闻分析");
}
} catch (Exception e) {
System.err.println("新闻分析失败: " + e.getMessage());
}
try {
RankingAnalysisService rankingService = new RankingAnalysisService();
if (!repository.getRankings().isEmpty()) {
rankingService.analyze(repository.getRankings());
} else {
System.out.println("没有大学排名数据,跳过排名分析");
}
} catch (Exception e) {
System.err.println("大学排名分析失败: " + e.getMessage());
}
try {
WeatherAnalysisService weatherService = new WeatherAnalysisService();
if (!repository.getWeatherList().isEmpty()) {
weatherService.analyze(repository.getWeatherList());
} else {
System.out.println("没有天气数据,跳过天气分析");
}
} catch (Exception e) {
System.err.println("天气分析失败: " + e.getMessage());
}
System.out.println("\n========== 分析完成 ==========");
System.out.println("报告已保存到 reports/ 目录");
System.out.println("图表已保存到 charts/ 目录");
}
@Override
public String getName() {
return "生成所有分析报告";
}
}

32
project/src/main/java/com/example/crawler/command/NewsCommand.java

@ -0,0 +1,32 @@
package com.example.crawler.command;
import com.example.crawler.model.News;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.CrawlStrategy;
import com.example.crawler.strategy.NewsCrawlStrategy;
import java.util.List;
public class NewsCommand extends BaseCrawlCommand {
public NewsCommand(DataRepository repository) {
super(repository);
}
@Override
protected CrawlStrategy<?> getStrategy() {
return new NewsCrawlStrategy();
}
@Override
@SuppressWarnings("unchecked")
protected void saveToRepository(Object data) {
repository.saveNewsList((List<News>) data);
System.out.println("成功爬取 " + ((List<News>) data).size() + " 条新闻");
}
@Override
public String getName() {
return "爬取新浪国内新闻";
}
}

74
project/src/main/java/com/example/crawler/command/SaveCommand.java

@ -0,0 +1,74 @@
package com.example.crawler.command;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.model.Book;
import com.example.crawler.model.News;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.model.Weather;
import com.example.crawler.util.JsonUtil;
public class SaveCommand implements Command {
private final CrawlerController controller;
public SaveCommand(CrawlerController controller) {
this.controller = controller;
}
@Override
public void execute() {
System.out.println("\n=== 开始保存数据 ===");
try {
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
// 保存书籍数据
List<Book> books = controller.getBooks();
if (books != null && !books.isEmpty()) {
String bookFileName = CrawlerConstants.OUTPUT_DIR + "/books_" + timestamp + ".json";
JsonUtil.saveListToJsonFile(books, bookFileName);
System.out.println("书籍数据已保存到: " + bookFileName);
}
// 保存新闻数据
List<News> newsList = controller.getNewsList();
if (newsList != null && !newsList.isEmpty()) {
String newsFileName = CrawlerConstants.OUTPUT_DIR + "/news_" + timestamp + ".json";
JsonUtil.saveListToJsonFile(newsList, newsFileName);
System.out.println("新闻数据已保存到: " + newsFileName);
}
// 保存大学排名数据
List<UniversityRank> universityRankList = controller.getUniversityRankList();
if (universityRankList != null && !universityRankList.isEmpty()) {
String rankingFileName = CrawlerConstants.OUTPUT_DIR + "/university_ranking_" + timestamp + ".json";
JsonUtil.saveListToJsonFile(universityRankList, rankingFileName);
System.out.println("大学排名数据已保存到: " + rankingFileName);
}
// 保存天气数据
List<Weather> weatherList = controller.getWeatherList();
if (weatherList != null && !weatherList.isEmpty()) {
String weatherFileName = CrawlerConstants.OUTPUT_DIR + "/weather_" + timestamp + ".json";
JsonUtil.saveListToJsonFile(weatherList, weatherFileName);
System.out.println("天气数据已保存到: " + weatherFileName);
}
System.out.println("\n=== 数据保存完成 ===");
} catch (Exception e) {
System.err.println("保存数据失败: " + e.getMessage());
e.printStackTrace();
}
}
@Override
public String getName() {
return "保存当前数据到文件";
}
}

32
project/src/main/java/com/example/crawler/command/WeatherCommand.java

@ -0,0 +1,32 @@
package com.example.crawler.command;
import com.example.crawler.model.Weather;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.strategy.CrawlStrategy;
import com.example.crawler.strategy.WeatherCrawlStrategy;
import java.util.List;
public class WeatherCommand extends BaseCrawlCommand {
public WeatherCommand(DataRepository repository) {
super(repository);
}
@Override
protected CrawlStrategy<?> getStrategy() {
return new WeatherCrawlStrategy();
}
@Override
@SuppressWarnings("unchecked")
protected void saveToRepository(Object data) {
repository.saveWeatherList((List<Weather>) data);
System.out.println("成功爬取 " + ((List<Weather>) data).size() + " 个城市的天气信息");
}
@Override
public String getName() {
return "爬取天气数据";
}
}

31
project/src/main/java/com/example/crawler/constant/CrawlerConstants.java

@ -0,0 +1,31 @@
package com.example.crawler.constant;
import java.util.HashMap;
import java.util.Map;
public class CrawlerConstants {
public static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/125.0.0.0 Safari/537.36";
public static final String REFERER = "https://www.baidu.com";
public static final int TIMEOUT_MS = 10000;
public static final int MAX_RETRIES = 3;
public static final long DELAY_MS = 3000;
public static final String URL_BOOKS = "https://books.toscrape.com/";
public static final String URL_NEWS = "https://news.sina.com.cn/china/";
public static final String URL_RANKING = "https://www.shanghairanking.cn/rankings/bcur/202310";
public static final String URL_WEATHER_API = "https://api.open-meteo.com/v1/forecast";
public static final String OUTPUT_DIR = "output";
public static final String REPORTS_DIR = "reports";
public static final String CHARTS_DIR = "charts";
public static final Map<String, double[]> CITY_COORDINATES;
static {
CITY_COORDINATES = new HashMap<>();
CITY_COORDINATES.put("北京", new double[]{39.9042, 116.4074});
CITY_COORDINATES.put("上海", new double[]{31.2304, 121.4737});
CITY_COORDINATES.put("广州", new double[]{23.1291, 113.2644});
}
}

90
project/src/main/java/com/example/crawler/controller/CrawlerController.java

@ -0,0 +1,90 @@
package com.example.crawler.controller;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import com.example.crawler.command.BookCommand;
import com.example.crawler.command.Command;
import com.example.crawler.command.CrawlAllCommand;
import com.example.crawler.command.CrawlAndAnalyzeAllCommand;
import com.example.crawler.command.CrawlRankingCommand;
import com.example.crawler.command.ExitCommand;
import com.example.crawler.command.GenerateAllAnalysisCommand;
import com.example.crawler.command.NewsCommand;
import com.example.crawler.command.SaveCommand;
import com.example.crawler.command.WeatherCommand;
import com.example.crawler.model.Book;
import com.example.crawler.model.News;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.model.Weather;
import com.example.crawler.repository.DataRepository;
import com.example.crawler.view.CrawlerView;
public class CrawlerController {
private final CrawlerView view;
private final Map<Integer, Command> commandMap;
private final DataRepository repository;
public CrawlerController() {
this.view = new CrawlerView();
this.repository = DataRepository.getInstance();
this.commandMap = new HashMap<>();
initCommands();
}
private void initCommands() {
commandMap.put(1, new BookCommand(repository));
commandMap.put(2, new NewsCommand(repository));
commandMap.put(3, new CrawlRankingCommand(repository));
commandMap.put(4, new WeatherCommand(repository));
commandMap.put(5, new CrawlAllCommand(this));
commandMap.put(6, new SaveCommand(this));
commandMap.put(7, new GenerateAllAnalysisCommand(this));
commandMap.put(8, new CrawlAndAnalyzeAllCommand(this));
commandMap.put(9, new ExitCommand());
}
public void start() {
Scanner scanner = new Scanner(System.in);
while (true) {
view.showMenu();
int choice = view.getInput(scanner);
Command command = commandMap.get(choice);
if (command != null) {
command.execute();
} else {
view.showError("无效的选择,请输入1-9之间的数字");
}
if (choice != 9) {
view.pause(scanner);
}
}
}
public List<Book> getBooks() {
return repository.getBooks();
}
public List<News> getNewsList() {
return repository.getNewsList();
}
public List<UniversityRank> getUniversityRankList() {
return repository.getRankings();
}
public List<Weather> getWeatherList() {
return repository.getWeatherList();
}
public DataRepository getRepository() {
return repository;
}
}

16
project/src/main/java/com/example/crawler/exception/CrawlException.java

@ -0,0 +1,16 @@
package com.example.crawler.exception;
/**
* 爬虫异常基类
* 所有爬虫相关异常都继承此类
*/
public class CrawlException extends Exception {
public CrawlException(String message) {
super(message);
}
public CrawlException(String message, Throwable cause) {
super(message, cause);
}
}

16
project/src/main/java/com/example/crawler/exception/DataSaveException.java

@ -0,0 +1,16 @@
package com.example.crawler.exception;
/**
* 数据保存异常
* 用于处理文件写入失败JSON序列化失败等数据保存相关错误
*/
public class DataSaveException extends CrawlException {
public DataSaveException(String message) {
super(message);
}
public DataSaveException(String message, Throwable cause) {
super(message, cause);
}
}

16
project/src/main/java/com/example/crawler/exception/NetworkException.java

@ -0,0 +1,16 @@
package com.example.crawler.exception;
/**
* 网络异常
* 用于处理HTTP请求失败连接超时等网络相关错误
*/
public class NetworkException extends CrawlException {
public NetworkException(String message) {
super(message);
}
public NetworkException(String message, Throwable cause) {
super(message, cause);
}
}

16
project/src/main/java/com/example/crawler/exception/ParseException.java

@ -0,0 +1,16 @@
package com.example.crawler.exception;
/**
* 解析异常
* 用于处理HTML解析失败JSON解析失败等数据解析相关错误
*/
public class ParseException extends CrawlException {
public ParseException(String message) {
super(message);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
}

65
project/src/main/java/com/example/crawler/model/Book.java

@ -0,0 +1,65 @@
package com.example.crawler.model;
/**
* 书籍数据模型
* 存储toscrape.com网站的书籍信息
*/
public class Book {
private String title;
private String price;
private String availability;
private String rating;
public Book() {
}
public Book(String title, String price, String availability, String rating) {
this.title = title;
this.price = price;
this.availability = availability;
this.rating = rating;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getPrice() {
return price;
}
public void setPrice(String price) {
this.price = price;
}
public String getAvailability() {
return availability;
}
public void setAvailability(String availability) {
this.availability = availability;
}
public String getRating() {
return rating;
}
public void setRating(String rating) {
this.rating = rating;
}
@Override
public String toString() {
return "Book{" +
"title='" + title + '\'' +
", price='" + price + '\'' +
", availability='" + availability + '\'' +
", rating='" + rating + '\'' +
'}';
}
}

54
project/src/main/java/com/example/crawler/model/News.java

@ -0,0 +1,54 @@
package com.example.crawler.model;
/**
* 新闻数据模型
* 存储新浪新闻的国内新闻信息
*/
public class News {
private String title;
private String publishTime;
private String url;
public News() {
}
public News(String title, String publishTime, String url) {
this.title = title;
this.publishTime = publishTime;
this.url = url;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getPublishTime() {
return publishTime;
}
public void setPublishTime(String publishTime) {
this.publishTime = publishTime;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
@Override
public String toString() {
return "News{" +
"title='" + title + '\'' +
", publishTime='" + publishTime + '\'' +
", url='" + url + '\'' +
'}';
}
}

76
project/src/main/java/com/example/crawler/model/UniversityRank.java

@ -0,0 +1,76 @@
package com.example.crawler.model;
/**
* 大学排名数据模型
* 存储软科中国大学排名信息
*/
public class UniversityRank {
private Integer rank;
private String universityName;
private String totalScore;
private String province;
private String category;
public UniversityRank() {
}
public UniversityRank(Integer rank, String universityName, String totalScore, String province, String category) {
this.rank = rank;
this.universityName = universityName;
this.totalScore = totalScore;
this.province = province;
this.category = category;
}
public Integer getRank() {
return rank;
}
public void setRank(Integer rank) {
this.rank = rank;
}
public String getUniversityName() {
return universityName;
}
public void setUniversityName(String universityName) {
this.universityName = universityName;
}
public String getTotalScore() {
return totalScore;
}
public void setTotalScore(String totalScore) {
this.totalScore = totalScore;
}
public String getProvince() {
return province;
}
public void setProvince(String province) {
this.province = province;
}
public String getCategory() {
return category;
}
public void setCategory(String category) {
this.category = category;
}
@Override
public String toString() {
return "UniversityRank{" +
"rank=" + rank +
", universityName='" + universityName + '\'' +
", totalScore='" + totalScore + '\'' +
", province='" + province + '\'' +
", category='" + category + '\'' +
'}';
}
}

140
project/src/main/java/com/example/crawler/model/Weather.java

@ -0,0 +1,140 @@
package com.example.crawler.model;
import java.util.ArrayList;
import java.util.List;
/**
* 天气数据模型
* 存储 Open-Meteo API 的城市天气信息
* 数据来源Open-Meteo (CC BY 4.0)
*/
public class Weather {
private String cityName;
private double temperature;
private double humidity;
private double windSpeed;
private String weatherCode;
private List<String> hourlyTimes;
private List<Double> hourlyTemperatures;
private List<Integer> hourlyHumidities;
private List<Double> hourlyWindSpeeds;
public Weather() {
this.hourlyTimes = new ArrayList<>();
this.hourlyTemperatures = new ArrayList<>();
this.hourlyHumidities = new ArrayList<>();
this.hourlyWindSpeeds = new ArrayList<>();
}
public Weather(String cityName, double temperature, double humidity, double windSpeed, String weatherCode) {
this.cityName = cityName;
this.temperature = temperature;
this.humidity = humidity;
this.windSpeed = windSpeed;
this.weatherCode = weatherCode;
this.hourlyTimes = new ArrayList<>();
this.hourlyTemperatures = new ArrayList<>();
this.hourlyHumidities = new ArrayList<>();
this.hourlyWindSpeeds = new ArrayList<>();
}
public String getCityName() {
return cityName;
}
public void setCityName(String cityName) {
this.cityName = cityName;
}
public double getTemperature() {
return temperature;
}
public void setTemperature(double temperature) {
this.temperature = temperature;
}
public double getHumidity() {
return humidity;
}
public void setHumidity(double humidity) {
this.humidity = humidity;
}
public double getWindSpeed() {
return windSpeed;
}
public void setWindSpeed(double windSpeed) {
this.windSpeed = windSpeed;
}
public String getWeatherCode() {
return weatherCode;
}
public void setWeatherCode(String weatherCode) {
this.weatherCode = weatherCode;
}
public List<String> getHourlyTimes() {
return hourlyTimes;
}
public void setHourlyTimes(List<String> hourlyTimes) {
this.hourlyTimes = hourlyTimes;
}
public List<Double> getHourlyTemperatures() {
return hourlyTemperatures;
}
public void setHourlyTemperatures(List<Double> hourlyTemperatures) {
this.hourlyTemperatures = hourlyTemperatures;
}
public List<Integer> getHourlyHumidities() {
return hourlyHumidities;
}
public void setHourlyHumidities(List<Integer> hourlyHumidities) {
this.hourlyHumidities = hourlyHumidities;
}
public List<Double> getHourlyWindSpeeds() {
return hourlyWindSpeeds;
}
public void setHourlyWindSpeeds(List<Double> hourlyWindSpeeds) {
this.hourlyWindSpeeds = hourlyWindSpeeds;
}
public String getWeatherDescription() {
if (weatherCode == null) return "未知";
switch (weatherCode) {
case "0": return "晴";
case "1": case "2": case "3": return "多云";
case "45": case "48": return "雾";
case "51": case "53": case "55": return "小毛毛雨";
case "61": case "63": case "65": return "小雨";
case "80": case "81": case "82": return "阵雨";
case "95": return "雷暴";
case "96": case "99": return "雷暴加冰雹";
default: return "未知";
}
}
@Override
public String toString() {
return "Weather{" +
"cityName='" + cityName + '\'' +
", temperature=" + temperature +
", humidity=" + humidity +
", windSpeed=" + windSpeed +
", weatherCode='" + weatherCode + '\'' +
", weather='" + getWeatherDescription() + '\'' +
'}';
}
}

75
project/src/main/java/com/example/crawler/repository/DataRepository.java

@ -0,0 +1,75 @@
package com.example.crawler.repository;
import com.example.crawler.model.Book;
import com.example.crawler.model.News;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.model.Weather;
import java.util.ArrayList;
import java.util.List;
public class DataRepository {
private static DataRepository instance;
private List<Book> books;
private List<News> newsList;
private List<UniversityRank> rankings;
private List<Weather> weatherList;
private DataRepository() {
this.books = new ArrayList<>();
this.newsList = new ArrayList<>();
this.rankings = new ArrayList<>();
this.weatherList = new ArrayList<>();
}
public static synchronized DataRepository getInstance() {
if (instance == null) {
instance = new DataRepository();
}
return instance;
}
public List<Book> getBooks() {
return new ArrayList<>(books);
}
public void saveBooks(List<Book> books) {
this.books.clear();
this.books.addAll(books);
}
public List<News> getNewsList() {
return new ArrayList<>(newsList);
}
public void saveNewsList(List<News> newsList) {
this.newsList.clear();
this.newsList.addAll(newsList);
}
public List<UniversityRank> getRankings() {
return new ArrayList<>(rankings);
}
public void saveRankings(List<UniversityRank> rankings) {
this.rankings.clear();
this.rankings.addAll(rankings);
}
public List<Weather> getWeatherList() {
return new ArrayList<>(weatherList);
}
public void saveWeatherList(List<Weather> weatherList) {
this.weatherList.clear();
this.weatherList.addAll(weatherList);
}
public void clearAll() {
books.clear();
newsList.clear();
rankings.clear();
weatherList.clear();
}
}

171
project/src/main/java/com/example/crawler/service/BookAnalysisService.java

@ -0,0 +1,171 @@
package com.example.crawler.service;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.example.crawler.chart.ChartGenerator;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.model.Book;
import com.example.crawler.util.DataCleaner;
public class BookAnalysisService {
static {
File dir = new File(CrawlerConstants.REPORTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public void analyze(List<Book> books) {
if (books == null || books.isEmpty()) {
System.out.println("没有书籍数据可分析");
return;
}
System.out.println("\n========== 书籍数据分析 ==========");
System.out.println("共分析 " + books.size() + " 本书\n");
analyzePriceDistribution(books);
analyzeRatingDistribution(books);
analyzeStockStatus(books);
generateReport(books);
}
private void analyzePriceDistribution(List<Book> books) {
System.out.println("【价格分析】");
List<Double> prices = new ArrayList<>();
for (Book book : books) {
double price = DataCleaner.cleanPrice(book.getPrice());
if (price > 0) {
prices.add(price);
}
}
if (prices.isEmpty()) {
System.out.println("无法获取有效价格数据");
return;
}
double maxPrice = prices.stream().mapToDouble(Double::doubleValue).max().orElse(0);
double minPrice = prices.stream().mapToDouble(Double::doubleValue).min().orElse(0);
double avgPrice = prices.stream().mapToDouble(Double::doubleValue).average().orElse(0);
System.out.println("最高价: £" + String.format("%.2f", maxPrice));
System.out.println("最低价: £" + String.format("%.2f", minPrice));
System.out.println("平均价: £" + String.format("%.2f", avgPrice));
Map<String, Integer> priceRanges = new HashMap<>();
String[] ranges = {"0-10", "10-20", "20-30", "30-40", "40-50", "50+"};
for (String range : ranges) {
priceRanges.put(range, 0);
}
for (Double price : prices) {
if (price < 10) priceRanges.put("0-10", priceRanges.get("0-10") + 1);
else if (price < 20) priceRanges.put("10-20", priceRanges.get("10-20") + 1);
else if (price < 30) priceRanges.put("20-30", priceRanges.get("20-30") + 1);
else if (price < 40) priceRanges.put("30-40", priceRanges.get("30-40") + 1);
else if (price < 50) priceRanges.put("40-50", priceRanges.get("40-50") + 1);
else priceRanges.put("50+", priceRanges.get("50+") + 1);
}
System.out.println("\n价格区间分布:");
for (Map.Entry<String, Integer> entry : priceRanges.entrySet()) {
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本");
}
ChartGenerator.generatePriceHistogram(priceRanges, "price_histogram.png");
}
private void analyzeRatingDistribution(List<Book> books) {
System.out.println("\n【评分分析】");
Map<String, Integer> ratingCounts = new HashMap<>();
ratingCounts.put("5星", 0);
ratingCounts.put("4星", 0);
ratingCounts.put("3星", 0);
ratingCounts.put("2星", 0);
ratingCounts.put("1星", 0);
ratingCounts.put("未知", 0);
for (Book book : books) {
int rating = DataCleaner.cleanRating(book.getRating());
switch (rating) {
case 5: ratingCounts.put("5星", ratingCounts.get("5星") + 1); break;
case 4: ratingCounts.put("4星", ratingCounts.get("4星") + 1); break;
case 3: ratingCounts.put("3星", ratingCounts.get("3星") + 1); break;
case 2: ratingCounts.put("2星", ratingCounts.get("2星") + 1); break;
case 1: ratingCounts.put("1星", ratingCounts.get("1星") + 1); break;
default: ratingCounts.put("未知", ratingCounts.get("未知") + 1);
}
}
int total = books.size();
System.out.println("评分分布:");
for (Map.Entry<String, Integer> entry : ratingCounts.entrySet()) {
double percentage = (entry.getValue() * 100.0) / total;
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 本 (" + String.format("%.1f", percentage) + "%)");
}
ChartGenerator.generateRatingPieChart(ratingCounts, "rating_pie.png");
}
private void analyzeStockStatus(List<Book> books) {
System.out.println("\n【库存分析】");
int inStock = 0;
int outOfStock = 0;
for (Book book : books) {
String availability = book.getAvailability();
if (availability != null && availability.toLowerCase().contains("in stock")) {
inStock++;
} else {
outOfStock++;
}
}
System.out.println("有库存: " + inStock + " 本");
System.out.println("缺货: " + outOfStock + " 本");
}
private void generateReport(List<Book> books) {
String fileName = CrawlerConstants.REPORTS_DIR + "/book_analysis_report.txt";
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) {
writer.println("========== 书籍数据分析报告 ==========");
writer.println("生成时间: " + java.time.LocalDateTime.now());
writer.println("分析书籍总数: " + books.size());
writer.println();
List<Double> prices = books.stream()
.map(b -> DataCleaner.cleanPrice(b.getPrice()))
.filter(p -> p > 0)
.collect(Collectors.toList());
if (!prices.isEmpty()) {
writer.println("【价格统计】");
writer.println("最高价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).max().orElse(0)));
writer.println("最低价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).min().orElse(0)));
writer.println("平均价: £" + String.format("%.2f", prices.stream().mapToDouble(Double::doubleValue).average().orElse(0)));
writer.println();
}
writer.println("【库存统计】");
long inStock = books.stream().filter(b -> b.getAvailability() != null && b.getAvailability().toLowerCase().contains("in stock")).count();
writer.println("有库存: " + inStock + " 本");
writer.println("缺货: " + (books.size() - inStock) + " 本");
writer.println("\n报告生成完成");
System.out.println("\n报告已保存: " + fileName);
} catch (IOException e) {
System.err.println("生成报告失败: " + e.getMessage());
}
}
}

138
project/src/main/java/com/example/crawler/service/NewsAnalysisService.java

@ -0,0 +1,138 @@
package com.example.crawler.service;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.example.crawler.chart.ChartGenerator;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.model.News;
import com.example.crawler.util.DataCleaner;
public class NewsAnalysisService {
static {
File dir = new File(CrawlerConstants.REPORTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public void analyze(List<News> newsList) {
if (newsList == null || newsList.isEmpty()) {
System.out.println("没有新闻数据可分析");
return;
}
System.out.println("\n========== 新闻数据分析 ==========");
System.out.println("共分析 " + newsList.size() + " 条新闻\n");
analyzeTimeDistribution(newsList);
analyzeKeywords(newsList);
generateReport(newsList);
}
private void analyzeTimeDistribution(List<News> newsList) {
System.out.println("【发布时间分布】");
Map<Integer, Integer> hourDistribution = new HashMap<>();
for (int i = 0; i < 24; i++) {
hourDistribution.put(i, 0);
}
for (News news : newsList) {
try {
java.time.LocalDateTime dateTime = DataCleaner.cleanNewsTime(news.getPublishTime());
int hour = DataCleaner.extractHour(dateTime);
hourDistribution.put(hour, hourDistribution.get(hour) + 1);
} catch (Exception e) {
// 忽略解析失败的数据
}
}
System.out.println("\n按小时统计:");
for (int i = 0; i < 24; i++) {
int count = hourDistribution.get(i);
String bar = "*".repeat(Math.max(1, count));
System.out.printf(" %02d:00 - %02d:00: %3d %s%n", i, (i + 1) % 24, count, bar);
}
int peakHour = 0;
int peakCount = 0;
for (Map.Entry<Integer, Integer> entry : hourDistribution.entrySet()) {
if (entry.getValue() > peakCount) {
peakCount = entry.getValue();
peakHour = entry.getKey();
}
}
System.out.println("\n高峰时段: " + String.format("%02d:00", peakHour) + " (发布 " + peakCount + " 条新闻)");
ChartGenerator.generateNewsTimeTrend(hourDistribution, "news_time_trend.png");
}
private void analyzeKeywords(List<News> newsList) {
System.out.println("\n【关键词分析】");
Map<String, Integer> allWords = new HashMap<>();
for (News news : newsList) {
String title = DataCleaner.cleanTitle(news.getTitle());
String[] words = DataCleaner.extractWords(title);
Map<String, Integer> wordFreq = DataCleaner.countWordFrequency(words);
for (Map.Entry<String, Integer> entry : wordFreq.entrySet()) {
allWords.put(entry.getKey(), allWords.getOrDefault(entry.getKey(), 0) + entry.getValue());
}
}
List<Map.Entry<String, Integer>> sortedWords = allWords.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(20)
.collect(Collectors.toList());
System.out.println("\n高频词 TOP 10:");
for (int i = 0; i < Math.min(10, sortedWords.size()); i++) {
Map.Entry<String, Integer> entry = sortedWords.get(i);
System.out.printf(" %2d. %s: %d%n", i + 1, entry.getKey(), entry.getValue());
}
Map<String, Integer> top10 = sortedWords.stream()
.limit(10)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
ChartGenerator.generateWordFrequencyBarChart(top10, "news_top_words.png");
}
private void generateReport(List<News> newsList) {
String fileName = CrawlerConstants.REPORTS_DIR + "/news_analysis_report.txt";
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) {
writer.println("========== 新闻数据分析报告 ==========");
writer.println("生成时间: " + java.time.LocalDateTime.now());
writer.println("分析新闻总数: " + newsList.size());
writer.println();
Map<Integer, Integer> hourDistribution = new HashMap<>();
for (int i = 0; i < 24; i++) hourDistribution.put(i, 0);
for (News news : newsList) {
try {
int hour = DataCleaner.extractHour(DataCleaner.cleanNewsTime(news.getPublishTime()));
hourDistribution.put(hour, hourDistribution.get(hour) + 1);
} catch (Exception e) {}
}
writer.println("【发布时间分布】");
for (int i = 0; i < 24; i++) {
writer.println(String.format(" %02d:00 - %02d:00: %d 条", i, (i + 1) % 24, hourDistribution.get(i)));
}
writer.println("\n报告生成完成");
System.out.println("\n报告已保存: " + fileName);
} catch (IOException e) {
System.err.println("生成报告失败: " + e.getMessage());
}
}
}

189
project/src/main/java/com/example/crawler/service/RankingAnalysisService.java

@ -0,0 +1,189 @@
package com.example.crawler.service;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import com.example.crawler.chart.ChartGenerator;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.util.DataCleaner;
public class RankingAnalysisService {
static {
File dir = new File(CrawlerConstants.REPORTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public void analyze(List<UniversityRank> ranks) {
if (ranks == null || ranks.isEmpty()) {
System.out.println("没有大学排名数据可分析");
return;
}
System.out.println("\n========== 大学排名数据分析 ==========");
System.out.println("共分析 " + ranks.size() + " 所大学\n");
analyzeProvinceDistribution(ranks);
analyzeScoreDistribution(ranks);
analyzeCategoryDistribution(ranks);
generateReport(ranks);
}
private void analyzeProvinceDistribution(List<UniversityRank> ranks) {
System.out.println("【各省份上榜大学数量】");
Map<String, Integer> provinceCounts = new HashMap<>();
for (UniversityRank rank : ranks) {
String province = rank.getProvince();
if (province != null && !province.isEmpty()) {
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1);
}
}
List<Map.Entry<String, Integer>> sorted = provinceCounts.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.collect(Collectors.toList());
System.out.println("\n省份排行榜 TOP 10:");
int rankNum = 1;
for (Map.Entry<String, Integer> entry : sorted) {
if (rankNum > 10) break;
System.out.printf(" %2d. %s: %d 所大学%n", rankNum++, entry.getKey(), entry.getValue());
}
Map<String, Integer> top10 = sorted.stream()
.limit(10)
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
ChartGenerator.generateProvinceBarChart(top10, "province_bar.png");
}
private void analyzeScoreDistribution(List<UniversityRank> ranks) {
System.out.println("\n【总分分析】");
List<Double> scores = new ArrayList<>();
for (UniversityRank rank : ranks) {
double score = DataCleaner.cleanScore(rank.getTotalScore());
if (score > 0) {
scores.add(score);
}
}
if (scores.isEmpty()) {
System.out.println("无法获取有效分数数据");
return;
}
double maxScore = scores.stream().mapToDouble(Double::doubleValue).max().orElse(0);
double minScore = scores.stream().mapToDouble(Double::doubleValue).min().orElse(0);
double avgScore = scores.stream().mapToDouble(Double::doubleValue).average().orElse(0);
List<Double> sortedScores = scores.stream().sorted().collect(Collectors.toList());
double median = sortedScores.get(sortedScores.size() / 2);
System.out.println("最高分: " + String.format("%.2f", maxScore));
System.out.println("最低分: " + String.format("%.2f", minScore));
System.out.println("平均分: " + String.format("%.2f", avgScore));
System.out.println("中位数: " + String.format("%.2f", median));
Map<String, Integer> scoreRanges = new HashMap<>();
String[] ranges = {"0-20", "20-40", "40-60", "60-80", "80-100"};
for (String range : ranges) {
scoreRanges.put(range, 0);
}
for (Double score : scores) {
if (score < 20) scoreRanges.put("0-20", scoreRanges.get("0-20") + 1);
else if (score < 40) scoreRanges.put("20-40", scoreRanges.get("20-40") + 1);
else if (score < 60) scoreRanges.put("40-60", scoreRanges.get("40-60") + 1);
else if (score < 80) scoreRanges.put("60-80", scoreRanges.get("60-80") + 1);
else scoreRanges.put("80-100", scoreRanges.get("80-100") + 1);
}
System.out.println("\n分数区间分布:");
for (Map.Entry<String, Integer> entry : scoreRanges.entrySet()) {
System.out.println(" " + entry.getKey() + ": " + entry.getValue() + " 所");
}
ChartGenerator.generateScoreHistogram(scoreRanges, "score_boxplot.png");
}
private void analyzeCategoryDistribution(List<UniversityRank> ranks) {
System.out.println("\n【办学层次统计】");
Map<String, Integer> categoryCounts = new HashMap<>();
for (UniversityRank rank : ranks) {
String category = rank.getCategory();
if (category != null && !category.isEmpty()) {
categoryCounts.put(category, categoryCounts.getOrDefault(category, 0) + 1);
}
}
if (categoryCounts.isEmpty()) {
System.out.println("没有办学层次数据");
return;
}
List<Map.Entry<String, Integer>> sorted = categoryCounts.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.collect(Collectors.toList());
System.out.println("\n办学层次分布:");
for (Map.Entry<String, Integer> entry : sorted) {
System.out.printf(" %s: %d 所%n", entry.getKey(), entry.getValue());
}
}
private void generateReport(List<UniversityRank> ranks) {
String fileName = CrawlerConstants.REPORTS_DIR + "/ranking_analysis_report.txt";
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) {
writer.println("========== 大学排名数据分析报告 ==========");
writer.println("生成时间: " + java.time.LocalDateTime.now());
writer.println("分析大学总数: " + ranks.size());
writer.println();
Map<String, Integer> provinceCounts = new HashMap<>();
for (UniversityRank rank : ranks) {
String province = rank.getProvince();
if (province != null && !province.isEmpty()) {
provinceCounts.put(province, provinceCounts.getOrDefault(province, 0) + 1);
}
}
writer.println("【省份排行榜 TOP 10】");
provinceCounts.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(10)
.forEach(e -> writer.println(" " + e.getKey() + ": " + e.getValue() + " 所大学"));
List<Double> scores = ranks.stream()
.map(r -> DataCleaner.cleanScore(r.getTotalScore()))
.filter(s -> s > 0)
.collect(Collectors.toList());
if (!scores.isEmpty()) {
writer.println();
writer.println("【分数统计】");
writer.println("最高分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).max().orElse(0)));
writer.println("最低分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).min().orElse(0)));
writer.println("平均分: " + String.format("%.2f", scores.stream().mapToDouble(Double::doubleValue).average().orElse(0)));
}
writer.println("\n报告生成完成");
System.out.println("\n报告已保存: " + fileName);
} catch (IOException e) {
System.err.println("生成报告失败: " + e.getMessage());
}
}
}

163
project/src/main/java/com/example/crawler/service/WeatherAnalysisService.java

@ -0,0 +1,163 @@
package com.example.crawler.service;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.example.crawler.chart.ChartGenerator;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.model.Weather;
public class WeatherAnalysisService {
static {
File dir = new File(CrawlerConstants.REPORTS_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
public void analyze(List<Weather> weatherList) {
if (weatherList == null || weatherList.isEmpty()) {
System.out.println("没有天气数据可分析");
return;
}
System.out.println("\n========== 天气数据分析 ==========");
System.out.println("共分析 " + weatherList.size() + " 个城市\n");
analyzeCurrentWeather(weatherList);
analyzeTemperatureTrend(weatherList);
analyzeHumidityTrend(weatherList);
analyzeComfortIndex(weatherList);
generateReport(weatherList);
}
private void analyzeCurrentWeather(List<Weather> weatherList) {
System.out.println("【当前天气对比】");
System.out.println("┌──────────┬──────────┬──────────┬──────────┬──────────┬──────────┐");
System.out.println("│ 城市名称 │ 温度(°C)│ 湿度(%) │ 风速(km/h)│ 天气状况 │ 舒适度 │");
System.out.println("├──────────┼──────────┼──────────┼──────────┼──────────┼──────────┤");
for (Weather weather : weatherList) {
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity());
String comfortDesc = getComfortDescription(comfort);
System.out.printf("│ %-8s │ %8.1f │ %8.0f │ %8.1f │ %-8s │ %-8s │%n",
weather.getCityName(),
weather.getTemperature(),
weather.getHumidity(),
weather.getWindSpeed(),
weather.getWeatherDescription(),
comfortDesc);
}
System.out.println("└──────────┴──────────┴──────────┴──────────┴──────────┴──────────┘");
}
private void analyzeTemperatureTrend(List<Weather> weatherList) {
System.out.println("\n【未来24小时温度分析】");
Map<String, List<Double>> cityTemperatures = new HashMap<>();
for (Weather weather : weatherList) {
cityTemperatures.put(weather.getCityName(), weather.getHourlyTemperatures());
List<Double> temps = weather.getHourlyTemperatures();
if (!temps.isEmpty()) {
double maxTemp = temps.stream().mapToDouble(Double::doubleValue).max().orElse(0);
double minTemp = temps.stream().mapToDouble(Double::doubleValue).min().orElse(0);
double avgTemp = temps.stream().mapToDouble(Double::doubleValue).average().orElse(0);
int maxIndex = temps.indexOf(maxTemp);
int minIndex = temps.indexOf(minTemp);
String maxTime = maxIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(maxIndex) : "";
String minTime = minIndex < weather.getHourlyTimes().size() ? weather.getHourlyTimes().get(minIndex) : "";
System.out.printf(" %s: 最高 %.1f°C(%s) 最低 %.1f°C(%s) 平均 %.1f°C%n",
weather.getCityName(), maxTemp, maxTime, minTemp, minTime, avgTemp);
}
ChartGenerator.generateTemperatureTrend(
weather.getHourlyTimes(),
weather.getHourlyTemperatures(),
weather.getCityName(),
"temperature_" + weather.getCityName() + ".png"
);
}
ChartGenerator.generateMultiCityTemperatureComparison(cityTemperatures, "temperature_comparison.png");
}
private void analyzeHumidityTrend(List<Weather> weatherList) {
System.out.println("\n【未来24小时湿度分析】");
for (Weather weather : weatherList) {
List<Integer> humidities = weather.getHourlyHumidities();
if (!humidities.isEmpty()) {
double avgHumidity = humidities.stream().mapToInt(Integer::intValue).average().orElse(0);
System.out.printf(" %s: 平均湿度 %.0f%%%n", weather.getCityName(), avgHumidity);
}
}
}
private void analyzeComfortIndex(List<Weather> weatherList) {
System.out.println("\n【舒适度指数分析】");
System.out.println("(基于温度和湿度的体感舒适度计算,0-100分制)");
for (Weather weather : weatherList) {
double comfort = calculateComfortIndex(weather.getTemperature(), weather.getHumidity());
String description = getComfortDescription(comfort);
System.out.printf(" %s: %.1f分 (%s)%n", weather.getCityName(), comfort, description);
}
}
private double calculateComfortIndex(double temperature, double humidity) {
double tempDiff = Math.abs(temperature - 22);
double humDiff = Math.abs(humidity - 50);
double comfort = 100 - (tempDiff * 3 + humDiff * 0.5);
return Math.max(0, Math.min(100, comfort));
}
private String getComfortDescription(double comfort) {
if (comfort >= 80) return "非常舒适";
if (comfort >= 60) return "舒适";
if (comfort >= 40) return "一般";
if (comfort >= 20) return "不舒适";
return "极不舒适";
}
private void generateReport(List<Weather> weatherList) {
String fileName = CrawlerConstants.REPORTS_DIR + "/weather_analysis_report.txt";
try (PrintWriter writer = new PrintWriter(new FileWriter(fileName))) {
writer.println("========== 天气数据分析报告 ==========");
writer.println("生成时间: " + java.time.LocalDateTime.now());
writer.println("分析城市数量: " + weatherList.size());
writer.println("数据来源: Open-Meteo API (CC BY 4.0)");
writer.println();
writer.println("【多城市天气对比】");
for (Weather weather : weatherList) {
writer.println("\n城市: " + weather.getCityName());
writer.println(" 当前温度: " + String.format("%.1f°C", weather.getTemperature()));
writer.println(" 当前湿度: " + String.format("%.0f%%", weather.getHumidity()));
writer.println(" 风速: " + String.format("%.1f km/h", weather.getWindSpeed()));
writer.println(" 天气: " + weather.getWeatherDescription());
List<Double> temps = weather.getHourlyTemperatures();
if (!temps.isEmpty()) {
writer.println(" 24小时平均温度: " + String.format("%.1f°C", temps.stream().mapToDouble(Double::doubleValue).average().orElse(0)));
}
}
writer.println("\n报告生成完成");
System.out.println("\n报告已保存: " + fileName);
} catch (IOException e) {
System.err.println("生成报告失败: " + e.getMessage());
}
}
}

127
project/src/main/java/com/example/crawler/strategy/BookCrawlStrategy.java

@ -0,0 +1,127 @@
package com.example.crawler.strategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.exception.ParseException;
import com.example.crawler.model.Book;
import com.example.crawler.util.HttpUtil;
/**
* 书籍爬取策略
* // 策略模式:书籍信息爬取策略
*/
public class BookCrawlStrategy implements CrawlStrategy<Book> {
private static final String BASE_URL = "https://books.toscrape.com/";
private static final String PAGE_URL_FORMAT = "https://books.toscrape.com/catalogue/page-%d.html";
private static final int MAX_PAGES = 30; // 最大爬取页数
@Override
public List<Book> crawl() throws CrawlException {
List<Book> books = new ArrayList<>();
int pageNum = 1;
try {
while (true) {
// 达到最大页数限制时停止
if (pageNum > MAX_PAGES) {
System.out.println("已达到最大爬取页数限制(" + MAX_PAGES + "页),停止爬取");
break;
}
String url = pageNum == 1 ? BASE_URL : String.format(PAGE_URL_FORMAT, pageNum);
// 设置请求头
Map<String, String> headers = Map.of(
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
);
String html = HttpUtil.get(url, headers);
Document doc = Jsoup.parse(html);
Elements bookElements = doc.select(".product_pod");
// 如果没有书籍元素,说明已到达最后一页
if (bookElements.isEmpty()) {
System.out.println("第 " + pageNum + " 页没有书籍数据,停止爬取");
break;
}
for (Element bookElement : bookElements) {
Book book = parseBook(bookElement);
books.add(book);
}
System.out.println("已爬取第 " + pageNum + " 页,共 " + books.size() + " 本书");
// 设置请求间隔
HttpUtil.sleep(1);
pageNum++;
}
return books;
} catch (NetworkException e) {
// 如果是404错误且已经爬取了一些数据,返回已获取的数据
if (e.getMessage().contains("404") && !books.isEmpty()) {
System.out.println("第 " + pageNum + " 页不存在(404),返回已爬取的 " + books.size() + " 本书");
return books;
}
throw new NetworkException("爬取书籍信息时网络异常: " + e.getMessage(), e);
} catch (ParseException e) {
throw new ParseException("解析书籍信息时异常: " + e.getMessage(), e);
} catch (Exception e) {
throw new CrawlException("爬取书籍信息时发生未知异常: " + e.getMessage(), e);
}
}
/**
* 解析书籍元素
*/
private Book parseBook(Element bookElement) throws ParseException {
try {
// 获取书名
Element titleElement = bookElement.selectFirst("h3 a");
String title = titleElement != null ? titleElement.attr("title") : "未知书名";
// 获取价格
Element priceElement = bookElement.selectFirst(".price_color");
String price = priceElement != null ? priceElement.text() : "未知价格";
// 获取库存状态
Element availabilityElement = bookElement.selectFirst(".instock.availability");
String availability = availabilityElement != null ? availabilityElement.text().trim() : "未知库存";
// 获取星级评分
Element ratingElement = bookElement.selectFirst(".star-rating");
String rating = "未知";
if (ratingElement != null) {
String classAttr = ratingElement.attr("class");
if (classAttr.contains("One")) rating = "1星";
else if (classAttr.contains("Two")) rating = "2星";
else if (classAttr.contains("Three")) rating = "3星";
else if (classAttr.contains("Four")) rating = "4星";
else if (classAttr.contains("Five")) rating = "5星";
}
return new Book(title, price, availability, rating);
} catch (Exception e) {
throw new ParseException("解析书籍信息失败: " + e.getMessage(), e);
}
}
@Override
public String getDataSourceName() {
return "toscrape.com书籍信息";
}
}

27
project/src/main/java/com/example/crawler/strategy/CrawlStrategy.java

@ -0,0 +1,27 @@
package com.example.crawler.strategy;
import com.example.crawler.exception.CrawlException;
import java.util.List;
/**
* 爬取策略接口
* 定义爬取操作的标准方法实现策略模式
*/
public interface CrawlStrategy<T> {
/**
* 执行爬取操作
*
* @return 爬取到的数据列表
* @throws CrawlException 爬虫异常
*/
List<T> crawl() throws CrawlException;
/**
* 获取数据源名称
*
* @return 数据源名称
*/
String getDataSourceName();
}

151
project/src/main/java/com/example/crawler/strategy/NewsCrawlStrategy.java

@ -0,0 +1,151 @@
package com.example.crawler.strategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.exception.ParseException;
import com.example.crawler.model.News;
import com.example.crawler.util.HttpUtil;
/**
* 新浪新闻爬取策略
* // 策略模式:新浪新闻爬取策略
*/
public class NewsCrawlStrategy implements CrawlStrategy<News> {
private static final String NEWS_URL = "https://news.sina.com.cn/china/";
private static final int MAX_NEWS_COUNT = 20;
@Override
public List<News> crawl() throws CrawlException {
List<News> newsList = new ArrayList<>();
try {
// 设置请求头
Map<String, String> headers = Map.of(
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Referer", "https://news.sina.com.cn/",
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"
);
String html = HttpUtil.get(NEWS_URL, headers);
Document doc = Jsoup.parse(html);
// 新浪新闻页面结构可能变化,使用多种选择器尝试
Elements newsElements = doc.select(".news-item, .news-list li, .list-item, .feed-card-item");
// 如果上述选择器都没找到,尝试更通用的选择器
if (newsElements.isEmpty()) {
newsElements = doc.select("a[href*=sina.com.cn]");
}
int count = 0;
for (Element element : newsElements) {
if (count >= MAX_NEWS_COUNT) {
break;
}
try {
News news = parseNews(element);
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) {
newsList.add(news);
count++;
}
} catch (ParseException e) {
// 跳过解析失败的新闻,继续处理下一个
continue;
}
}
// 如果使用通用选择器获取的结果不够,尝试另一种方式
if (newsList.size() < MAX_NEWS_COUNT) {
Elements titleElements = doc.select("h2 a, h3 a, .title a, .news-title a");
for (Element element : titleElements) {
if (count >= MAX_NEWS_COUNT) {
break;
}
try {
News news = parseNewsFromTitleElement(element);
if (news != null && news.getTitle() != null && !news.getTitle().isEmpty()) {
newsList.add(news);
count++;
}
} catch (ParseException e) {
continue;
}
}
}
System.out.println("已爬取 " + newsList.size() + " 条新浪新闻");
return newsList;
} catch (NetworkException e) {
throw new NetworkException("爬取新浪新闻时网络异常: " + e.getMessage(), e);
} catch (Exception e) {
throw new CrawlException("爬取新浪新闻时发生未知异常: " + e.getMessage(), e);
}
}
/**
* 解析新闻元素
*/
private News parseNews(Element element) throws ParseException {
try {
String title = "";
String url = "";
String publishTime = "";
// 尝试获取标题和链接
Element linkElement = element.selectFirst("a");
if (linkElement != null) {
title = linkElement.text().trim();
url = linkElement.attr("abs:href");
}
// 尝试获取发布时间
Element timeElement = element.selectFirst(".time, .pubtime, span[class*=time]");
if (timeElement != null) {
publishTime = timeElement.text().trim();
}
if (title.isEmpty() || url.isEmpty()) {
return null;
}
return new News(title, publishTime, url);
} catch (Exception e) {
throw new ParseException("解析新闻信息失败: " + e.getMessage(), e);
}
}
/**
* 从标题元素解析新闻
*/
private News parseNewsFromTitleElement(Element element) throws ParseException {
try {
String title = element.text().trim();
String url = element.attr("abs:href");
if (title.isEmpty() || url.isEmpty()) {
return null;
}
return new News(title, "", url);
} catch (Exception e) {
throw new ParseException("解析新闻标题失败: " + e.getMessage(), e);
}
}
@Override
public String getDataSourceName() {
return "新浪国内新闻";
}
}

24
project/src/main/java/com/example/crawler/strategy/StrategyFactory.java

@ -0,0 +1,24 @@
package com.example.crawler.strategy;
import com.example.crawler.strategy.BookCrawlStrategy;
import com.example.crawler.strategy.NewsCrawlStrategy;
import com.example.crawler.strategy.UniversityRankCrawlStrategy;
import com.example.crawler.strategy.WeatherCrawlStrategy;
public class StrategyFactory {
public static CrawlStrategy<?> getStrategy(int choice) {
switch (choice) {
case 1:
return new BookCrawlStrategy();
case 2:
return new NewsCrawlStrategy();
case 3:
return new UniversityRankCrawlStrategy();
case 4:
return new WeatherCrawlStrategy();
default:
throw new IllegalArgumentException("Invalid choice: " + choice);
}
}
}

148
project/src/main/java/com/example/crawler/strategy/UniversityRankCrawlStrategy.java

@ -0,0 +1,148 @@
package com.example.crawler.strategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.exception.ParseException;
import com.example.crawler.model.UniversityRank;
import com.example.crawler.util.HttpUtil;
/**
* 软科中国大学排名爬取策略
* // 策略模式:软科中国大学排名爬取策略
*/
public class UniversityRankCrawlStrategy implements CrawlStrategy<UniversityRank> {
private static final String RANKING_URL = "https://www.shanghairanking.cn/rankings/bcur/2025";
@Override
public List<UniversityRank> crawl() throws CrawlException {
List<UniversityRank> rankings = new ArrayList<>();
try {
// 设置请求头
Map<String, String> headers = Map.of(
"User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
"Referer", "https://www.shanghairanking.cn/"
);
// 设置请求延迟
HttpUtil.sleep(3);
String html = HttpUtil.get(RANKING_URL, headers);
Document doc = Jsoup.parse(html);
// 提取表格数据
Elements rows = doc.select("table tbody tr");
if (rows.isEmpty()) {
// 如果第一个选择器失败,尝试其他可能的选择器
rows = doc.select(".rk-table tbody tr");
}
if (rows.isEmpty()) {
// 尝试更通用的选择器
rows = doc.select("tr");
}
int count = 0;
for (Element row : rows) {
try {
UniversityRank ranking = parseRow(row);
if (ranking != null && ranking.getRank() != null) {
rankings.add(ranking);
count++;
// 最多爬取200条数据
if (count >= 200) {
break;
}
}
} catch (ParseException e) {
// 跳过解析失败的行
continue;
}
}
System.out.println("已爬取 " + rankings.size() + " 条大学排名数据");
return rankings;
} catch (NetworkException e) {
throw new NetworkException("爬取软科大学排名时网络异常: " + e.getMessage(), e);
} catch (Exception e) {
throw new CrawlException("爬取软科大学排名时发生未知异常: " + e.getMessage(), e);
}
}
/**
* 解析表格行数据
*/
private UniversityRank parseRow(Element row) throws ParseException {
try {
Elements cells = row.select("td");
if (cells.size() < 4) {
return null;
}
// 第1列:排名
String rankStr = cells.get(0).text().trim();
Integer rank = null;
try {
rank = Integer.parseInt(rankStr);
} catch (NumberFormatException e) {
// 如果排名不是数字(如"1-3"这样的范围),尝试提取第一个数字
String numPart = rankStr.replaceAll("[^0-9]", "");
if (!numPart.isEmpty()) {
rank = Integer.parseInt(numPart);
}
}
if (rank == null) {
return null;
}
// 第2列:学校名称
String universityName = cells.get(1).text().trim();
// 第4列:总分
String totalScore = "";
if (cells.size() > 3) {
totalScore = cells.get(3).text().trim();
}
// 尝试提取省份和办学层次(第3列可能包含这些信息)
String province = "";
String category = "";
if (cells.size() > 2) {
String thirdColumn = cells.get(2).text().trim();
// 尝试解析省份和办学层次
String[] parts = thirdColumn.split("\\s+");
if (parts.length >= 1) {
province = parts[0];
}
if (parts.length >= 2) {
category = parts[1];
}
}
return new UniversityRank(rank, universityName, totalScore, province, category);
} catch (Exception e) {
throw new ParseException("解析大学排名行数据失败: " + e.getMessage(), e);
}
}
@Override
public String getDataSourceName() {
return "软科中国大学排名";
}
}

177
project/src/main/java/com/example/crawler/strategy/WeatherCrawlStrategy.java

@ -0,0 +1,177 @@
package com.example.crawler.strategy;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.example.crawler.constant.CrawlerConstants;
import com.example.crawler.exception.CrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.exception.ParseException;
import com.example.crawler.model.Weather;
import com.example.crawler.util.HttpUtil;
import com.google.gson.JsonArray;
import com.google.gson.JsonElement;
import com.google.gson.JsonObject;
import com.google.gson.JsonParser;
public class WeatherCrawlStrategy implements CrawlStrategy<Weather> {
@Override
public List<Weather> crawl() throws CrawlException {
List<Weather> weatherList = new ArrayList<>();
try {
for (Map.Entry<String, double[]> entry : CrawlerConstants.CITY_COORDINATES.entrySet()) {
String cityName = entry.getKey();
double[] coords = entry.getValue();
double latitude = coords[0];
double longitude = coords[1];
String weatherUrl = buildApiUrl(latitude, longitude);
Map<String, String> headers = Map.of(
"User-Agent", CrawlerConstants.USER_AGENT
);
String response = HttpUtil.get(weatherUrl, headers);
Weather weather = parseWeatherData(cityName, response);
weatherList.add(weather);
System.out.println("已获取 " + cityName + " 的天气信息");
HttpUtil.sleep(2);
}
return weatherList;
} catch (NetworkException e) {
throw new NetworkException("爬取天气数据时网络异常: " + e.getMessage(), e);
} catch (ParseException e) {
throw new ParseException("解析天气数据时异常: " + e.getMessage(), e);
} catch (Exception e) {
throw new CrawlException("爬取天气数据时发生未知异常: " + e.getMessage(), e);
}
}
private String buildApiUrl(double latitude, double longitude) {
return CrawlerConstants.URL_WEATHER_API + "?latitude=" + latitude +
"&longitude=" + longitude +
"&current_weather=true" +
"&hourly=temperature_2m,relative_humidity_2m,wind_speed_10m" +
"&forecast_days=1" +
"&timezone=Asia/Shanghai";
}
private Weather parseWeatherData(String cityName, String jsonData) throws ParseException {
try {
JsonObject obj = JsonParser.parseString(jsonData).getAsJsonObject();
Weather weather = new Weather();
weather.setCityName(cityName);
JsonObject currentWeather = obj.getAsJsonObject("current_weather");
if (currentWeather != null) {
weather.setTemperature(cleanTemperature(getJsonDouble(currentWeather, "temperature", 0)));
weather.setWindSpeed(cleanWindSpeed(getJsonDouble(currentWeather, "windspeed", 0)));
weather.setWeatherCode(String.valueOf(getJsonInt(currentWeather, "weathercode", -1)));
}
JsonObject hourly = obj.getAsJsonObject("hourly");
if (hourly != null) {
JsonArray times = hourly.getAsJsonArray("time");
JsonArray temps = hourly.getAsJsonArray("temperature_2m");
JsonArray humidities = hourly.getAsJsonArray("relative_humidity_2m");
JsonArray windSpeeds = hourly.getAsJsonArray("wind_speed_10m");
if (times != null && temps != null) {
int count = Math.min(times.size(), 24);
for (int i = 0; i < count; i++) {
weather.getHourlyTimes().add(cleanTimeString(getJsonString(times, i, "")));
weather.getHourlyTemperatures().add(cleanTemperature(getJsonDouble(temps, i, 0)));
}
}
if (humidities != null) {
int count = Math.min(humidities.size(), 24);
for (int i = 0; i < count; i++) {
weather.getHourlyHumidities().add(cleanHumidity(getJsonInt(humidities, i, 50)));
}
}
if (windSpeeds != null) {
int count = Math.min(windSpeeds.size(), 24);
for (int i = 0; i < count; i++) {
weather.getHourlyWindSpeeds().add(cleanWindSpeed(getJsonDouble(windSpeeds, i, 0)));
}
}
if (!weather.getHourlyHumidities().isEmpty()) {
weather.setHumidity(weather.getHourlyHumidities().get(0));
}
}
return weather;
} catch (Exception e) {
throw new ParseException("解析天气JSON数据失败: " + e.getMessage(), e);
}
}
private String getJsonString(JsonArray arr, int index, String defaultValue) {
if (arr == null || index >= arr.size()) return defaultValue;
JsonElement element = arr.get(index);
return element.isJsonNull() ? defaultValue : element.getAsString();
}
private double getJsonDouble(JsonObject obj, String key, double defaultValue) {
JsonElement element = obj.get(key);
if (element == null || element.isJsonNull()) return defaultValue;
return element.getAsDouble();
}
private int getJsonInt(JsonObject obj, String key, int defaultValue) {
JsonElement element = obj.get(key);
if (element == null || element.isJsonNull()) return defaultValue;
return element.getAsInt();
}
private double getJsonDouble(JsonArray arr, int index, double defaultValue) {
if (arr == null || index >= arr.size()) return defaultValue;
JsonElement element = arr.get(index);
if (element == null || element.isJsonNull()) return defaultValue;
return element.getAsDouble();
}
private int getJsonInt(JsonArray arr, int index, int defaultValue) {
if (arr == null || index >= arr.size()) return defaultValue;
JsonElement element = arr.get(index);
if (element == null || element.isJsonNull()) return defaultValue;
return element.getAsInt();
}
private double cleanTemperature(double temp) {
return Math.round(temp * 10.0) / 10.0;
}
private double cleanWindSpeed(double speed) {
return Math.round(speed * 10.0) / 10.0;
}
private int cleanHumidity(int humidity) {
if (humidity < 0) return 50;
if (humidity > 100) return 100;
return humidity;
}
private String cleanTimeString(String time) {
if (time == null || time.isEmpty()) return "";
if (time.contains("T")) {
return time.substring(time.indexOf("T") + 1, time.indexOf("T") + 6);
}
return time;
}
@Override
public String getDataSourceName() {
return "Open-Meteo 实时天气";
}
}

122
project/src/main/java/com/example/crawler/util/DataCleaner.java

@ -0,0 +1,122 @@
package com.example.crawler.util;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* 数据清洗工具类
* 提供各类数据的清洗方法
*/
public class DataCleaner {
private static final Map<String, String> STOP_WORDS = new HashMap<>();
static {
STOP_WORDS.put("的", "的");
STOP_WORDS.put("了", "了");
STOP_WORDS.put("是", "是");
STOP_WORDS.put("在", "在");
STOP_WORDS.put("和", "和");
STOP_WORDS.put("与", "与");
STOP_WORDS.put("对", "对");
STOP_WORDS.put("为", "为");
STOP_WORDS.put("有", "有");
STOP_WORDS.put("我", "我");
STOP_WORDS.put("你", "你");
STOP_WORDS.put("他", "他");
STOP_WORDS.put("她", "她");
STOP_WORDS.put("它", "它");
STOP_WORDS.put("这", "这");
STOP_WORDS.put("那", "那");
STOP_WORDS.put("就", "就");
STOP_WORDS.put("也", "也");
STOP_WORDS.put("都", "都");
STOP_WORDS.put("要", "要");
STOP_WORDS.put("会", "会");
STOP_WORDS.put("能", "能");
STOP_WORDS.put("可", "可");
STOP_WORDS.put("以", "以");
STOP_WORDS.put("说", "说");
STOP_WORDS.put("到", "到");
STOP_WORDS.put("来", "来");
STOP_WORDS.put("去", "去");
STOP_WORDS.put("着", "着");
STOP_WORDS.put("过", "过");
}
public static double cleanPrice(String price) {
if (price == null || price.isEmpty()) return 0.0;
String cleaned = price.replaceAll("[^0-9.]", "");
try {
return Double.parseDouble(cleaned);
} catch (NumberFormatException e) {
return 0.0;
}
}
public static int cleanRating(String ratingClass) {
if (ratingClass == null) return 0;
if (ratingClass.contains("Five")) return 5;
if (ratingClass.contains("Four")) return 4;
if (ratingClass.contains("Three")) return 3;
if (ratingClass.contains("Two")) return 2;
if (ratingClass.contains("One")) return 1;
return 0;
}
public static LocalDateTime cleanNewsTime(String timeStr) {
if (timeStr == null || timeStr.isEmpty()) return LocalDateTime.now();
try {
DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
return LocalDateTime.parse(timeStr, formatter);
} catch (Exception e) {
try {
DateTimeFormatter formatter2 = DateTimeFormatter.ofPattern("yyyy年MM月dd日 HH:mm");
return LocalDateTime.parse(timeStr, formatter2);
} catch (Exception e2) {
return LocalDateTime.now();
}
}
}
public static String cleanTitle(String title) {
if (title == null) return "";
return title.trim().replaceAll("\\s+", " ");
}
public static double cleanScore(String score) {
if (score == null || score.isEmpty()) return 0.0;
String cleaned = score.replaceAll("[^0-9.]", "");
try {
return Double.parseDouble(cleaned);
} catch (NumberFormatException e) {
return 0.0;
}
}
public static String[] extractWords(String text) {
if (text == null || text.isEmpty()) return new String[0];
String cleaned = text.replaceAll("[^\u4e00-\u9fa5a-zA-Z0-9]", " ");
return cleaned.split("\\s+");
}
public static boolean isStopWord(String word) {
return word == null || word.length() < 2 || STOP_WORDS.containsKey(word);
}
public static Map<String, Integer> countWordFrequency(String[] words) {
Map<String, Integer> frequency = new HashMap<>();
for (String word : words) {
if (isStopWord(word)) continue;
frequency.put(word, frequency.getOrDefault(word, 0) + 1);
}
return frequency;
}
public static int extractHour(LocalDateTime dateTime) {
return dateTime.getHour();
}
}

126
project/src/main/java/com/example/crawler/util/HttpUtil.java

@ -0,0 +1,126 @@
package com.example.crawler.util;
import com.example.crawler.exception.NetworkException;
import java.net.URI;
import java.net.http.HttpClient;
import java.net.http.HttpRequest;
import java.net.http.HttpResponse;
import java.time.Duration;
import java.util.Map;
/**
* HTTP工具类
* 封装HTTP请求操作使用Java 11内置HttpClient
*/
public class HttpUtil {
private static final HttpClient httpClient = HttpClient.newBuilder()
.connectTimeout(Duration.ofSeconds(30))
.followRedirects(HttpClient.Redirect.NORMAL)
.build();
private static final String DEFAULT_USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36";
/**
* 发送GET请求
*
* @param url 请求URL
* @return 响应内容
* @throws NetworkException 网络异常
*/
public static String get(String url) throws NetworkException {
return get(url, Map.of());
}
/**
* 发送GET请求带请求头
*
* @param url 请求URL
* @param headers 请求头
* @return 响应内容
* @throws NetworkException 网络异常
*/
public static String get(String url, Map<String, String> headers) throws NetworkException {
try {
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(30))
.GET();
// 添加默认User-Agent
if (!headers.containsKey("User-Agent")) {
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT);
}
// 添加自定义请求头
headers.forEach(requestBuilder::header);
HttpRequest request = requestBuilder.build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode());
}
return response.body();
} catch (NetworkException e) {
throw e;
} catch (Exception e) {
throw new NetworkException("网络请求失败: " + e.getMessage(), e);
}
}
/**
* 发送POST请求
*
* @param url 请求URL
* @param body 请求体
* @param headers 请求头
* @return 响应内容
* @throws NetworkException 网络异常
*/
public static String post(String url, String body, Map<String, String> headers) throws NetworkException {
try {
HttpRequest.Builder requestBuilder = HttpRequest.newBuilder()
.uri(URI.create(url))
.timeout(Duration.ofSeconds(30))
.header("Content-Type", "application/json")
.POST(HttpRequest.BodyPublishers.ofString(body));
// 添加默认User-Agent
if (!headers.containsKey("User-Agent")) {
requestBuilder.header("User-Agent", DEFAULT_USER_AGENT);
}
// 添加自定义请求头
headers.forEach(requestBuilder::header);
HttpRequest request = requestBuilder.build();
HttpResponse<String> response = httpClient.send(request, HttpResponse.BodyHandlers.ofString());
if (response.statusCode() != 200) {
throw new NetworkException("HTTP请求失败,状态码: " + response.statusCode());
}
return response.body();
} catch (NetworkException e) {
throw e;
} catch (Exception e) {
throw new NetworkException("网络请求失败: " + e.getMessage(), e);
}
}
/**
* 设置请求间隔避免对服务器造成压力
*
* @param seconds 间隔秒数
*/
public static void sleep(int seconds) {
try {
Thread.sleep(seconds * 1000L);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}

95
project/src/main/java/com/example/crawler/util/JsonUtil.java

@ -0,0 +1,95 @@
package com.example.crawler.util;
import com.example.crawler.exception.DataSaveException;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import java.io.FileWriter;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
/**
* JSON工具类
* 封装JSON序列化和文件读写操作
*/
public class JsonUtil {
private static final Gson gson = new GsonBuilder()
.setPrettyPrinting()
.disableHtmlEscaping()
.create();
/**
* 将对象序列化为JSON字符串
*
* @param obj 对象
* @return JSON字符串
*/
public static String toJson(Object obj) {
return gson.toJson(obj);
}
/**
* 将JSON字符串反序列化为对象
*
* @param json JSON字符串
* @param classOfT 目标类
* @param <T> 泛型类型
* @return 反序列化后的对象
*/
public static <T> T fromJson(String json, Class<T> classOfT) {
return gson.fromJson(json, classOfT);
}
/**
* 将对象保存为JSON文件
*
* @param obj 对象
* @param filePath 文件路径
* @throws DataSaveException 数据保存异常
*/
public static void saveToJsonFile(Object obj, String filePath) throws DataSaveException {
try {
// 确保目录存在
Path path = Paths.get(filePath);
Path parentDir = path.getParent();
if (parentDir != null && !Files.exists(parentDir)) {
Files.createDirectories(parentDir);
}
try (FileWriter writer = new FileWriter(filePath)) {
gson.toJson(obj, writer);
}
} catch (IOException e) {
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e);
}
}
/**
* 将列表保存为JSON文件
*
* @param list 列表
* @param filePath 文件路径
* @param <T> 泛型类型
* @throws DataSaveException 数据保存异常
*/
public static <T> void saveListToJsonFile(List<T> list, String filePath) throws DataSaveException {
try {
// 确保目录存在
Path path = Paths.get(filePath);
Path parentDir = path.getParent();
if (parentDir != null && !Files.exists(parentDir)) {
Files.createDirectories(parentDir);
}
try (FileWriter writer = new FileWriter(filePath)) {
gson.toJson(list, writer);
}
} catch (IOException e) {
throw new DataSaveException("保存JSON文件失败: " + e.getMessage(), e);
}
}
}

72
project/src/main/java/com/example/crawler/view/CrawlerView.java

@ -0,0 +1,72 @@
package com.example.crawler.view;
import java.util.Scanner;
/**
* 爬虫视图类
* // MVC模式:View层,负责CLI界面显示和用户交互
*/
public class CrawlerView {
/**
* 显示主菜单
*/
public void showMenu() {
System.out.println("\n=== 数据爬取与分析系统 ===");
System.out.println("1. 爬取书籍信息(toscrape.com)");
System.out.println("2. 爬取新浪国内新闻");
System.out.println("3. 爬取软科中国大学排名");
System.out.println("4. 爬取Open-Meteo实时天气");
System.out.println("5. 爬取全部数据并保存");
System.out.println("6. 保存当前数据到文件");
System.out.println("7. 生成所有数据源的分析报告与图表");
System.out.println("8. 爬取并分析所有数据(一键完成)");
System.out.println("9. 退出");
System.out.print("请选择操作:");
}
/**
* 获取用户输入
*
* @param scanner 输入扫描器
* @return 用户选择的数字
*/
public int getInput(Scanner scanner) {
try {
String input = scanner.nextLine().trim();
return Integer.parseInt(input);
} catch (NumberFormatException e) {
return -1; // 返回无效值
}
}
/**
* 显示错误信息
*
* @param message 错误信息
*/
public void showError(String message) {
System.err.println("错误: " + message);
}
/**
* 显示成功信息
*
* @param message 成功信息
*/
public void showSuccess(String message) {
System.out.println("成功: " + message);
}
/**
* 暂停并等待用户按回车键继续
*
* @param scanner 输入扫描器
*/
public void pause(Scanner scanner) {
System.out.print("\n按回车键继续...");
scanner.nextLine();
System.out.print("\033[H\033[2J");
System.out.flush();
}
}

359
project/src/main/java/com/university/Main.java

@ -1,359 +0,0 @@
package com.university;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Scanner;
import com.university.analysis.RankAnalyzer;
import com.university.crawler.UniversityRankCrawler;
import com.university.model.RankChange;
import com.university.model.University;
import com.university.model.UniversityComparison;
import com.university.storage.DataStorage;
import com.university.visualization.ChartGenerator;
import com.university.visualization.ConsoleReporter;
/**
* 主程序入口
* 整合所有模块提供交互式菜单
*/
public class Main {
// 核心组件
private final UniversityRankCrawler crawler;
private final DataStorage storage;
private final RankAnalyzer analyzer;
private final ChartGenerator chartGenerator;
private final ConsoleReporter reporter;
// 数据缓存
private Map<Integer, List<University>> dataCache;
private Scanner scanner;
public Main() {
this.crawler = new UniversityRankCrawler();
this.storage = new DataStorage();
this.analyzer = new RankAnalyzer();
this.chartGenerator = new ChartGenerator();
this.reporter = new ConsoleReporter();
this.dataCache = new HashMap<>();
this.scanner = new Scanner(System.in);
}
public static void main(String[] args) {
Main app = new Main();
app.run();
}
/**
* 运行主程序
*/
public void run() {
// 打印欢迎信息
reporter.printWelcome();
// 初始化数据
initializeData();
// 主循环
boolean running = true;
while (running) {
reporter.printMenu();
String choice = scanner.nextLine().trim();
switch (choice) {
case "1":
showTopN();
break;
case "2":
showByProvince();
break;
case "3":
searchUniversity();
break;
case "4":
showProvinceStatistics();
break;
case "5":
showScoreStatistics();
break;
case "6":
showRankChanges();
break;
case "7":
compareUniversities();
break;
case "8":
showYearlyTrend();
break;
case "9":
generateAllCharts();
break;
case "0":
running = false;
System.out.println("感谢使用,再见!");
break;
default:
System.out.println("无效选择,请重新输入!");
}
}
scanner.close();
}
/**
* 初始化数据
*/
private void initializeData() {
System.out.println("正在初始化数据...");
// 爬取2022-2024年的数据
int[] years = {2022, 2023, 2024};
for (int year : years) {
List<University> data;
// 先尝试从文件读取
if (storage.dataExists(year)) {
System.out.println("从文件加载 " + year + " 年数据...");
data = storage.readRawData(year);
} else {
// 文件不存在则爬取
System.out.println("爬取 " + year + " 年数据...");
data = crawler.crawlRankings(year);
// 保存到文件
storage.saveRawData(data, year);
}
dataCache.put(year, data);
}
System.out.println("数据初始化完成!\n");
}
/**
* 显示Top N
*/
private void showTopN() {
System.out.print("请输入要查看的年份(2022-2024): ");
int year = Integer.parseInt(scanner.nextLine().trim());
System.out.print("请输入要查看的数量: ");
int n = Integer.parseInt(scanner.nextLine().trim());
List<University> data = dataCache.get(year);
if (data == null) {
System.out.println("该年份数据不存在!");
return;
}
List<University> topN = analyzer.getTopN(data, n);
reporter.printUniversityList(topN, year + "年 Top " + n + " 高校");
// 生成图表
chartGenerator.generateTopNBarChart(data, year, n);
}
/**
* 按省份查看
*/
private void showByProvince() {
System.out.print("请输入要查看的年份(2022-2024): ");
int year = Integer.parseInt(scanner.nextLine().trim());
System.out.print("请输入省份名称: ");
String province = scanner.nextLine().trim();
List<University> data = dataCache.get(year);
if (data == null) {
System.out.println("该年份数据不存在!");
return;
}
List<University> result = analyzer.getByProvince(data, province);
if (result.isEmpty()) {
System.out.println("该省份没有高校数据!");
} else {
reporter.printUniversityList(result, year + "年 " + province + " 高校");
}
}
/**
* 搜索高校
*/
private void searchUniversity() {
System.out.print("请输入要查看的年份(2022-2024): ");
int year = Integer.parseInt(scanner.nextLine().trim());
System.out.print("请输入搜索关键词: ");
String keyword = scanner.nextLine().trim();
List<University> data = dataCache.get(year);
if (data == null) {
System.out.println("该年份数据不存在!");
return;
}
List<University> result = analyzer.searchUniversity(data, keyword);
if (result.isEmpty()) {
System.out.println("未找到匹配的高校!");
} else {
reporter.printUniversityList(result, "搜索结果");
}
}
/**
* 显示省份统计
*/
private void showProvinceStatistics() {
System.out.print("请输入要查看的年份(2022-2024): ");
int year = Integer.parseInt(scanner.nextLine().trim());
List<University> data = dataCache.get(year);
if (data == null) {
System.out.println("该年份数据不存在!");
return;
}
Map<String, Long> provinceCount = analyzer.countByProvince(data);
reporter.printProvinceStatistics(provinceCount, year + "年 省份分布统计");
// 生成图表
chartGenerator.generateProvincePieChart(provinceCount, year);
}
/**
* 显示分数统计
*/
private void showScoreStatistics() {
System.out.print("请输入要查看的年份(2022-2024): ");
int year = Integer.parseInt(scanner.nextLine().trim());
List<University> data = dataCache.get(year);
if (data == null) {
System.out.println("该年份数据不存在!");
return;
}
RankAnalyzer.ScoreStatistics stats = analyzer.getScoreStatistics(data);
reporter.printScoreStatistics(stats, year + "年 分数统计");
}
/**
* 显示排名变化
*/
private void showRankChanges() {
List<RankChange> changes = analyzer.calculateRankChanges(dataCache);
// 显示上升最快
List<RankChange> rising = analyzer.getFastestRising(changes, 5);
reporter.printRankChanges(rising, "排名上升最快 Top 5");
// 显示下降最快
List<RankChange> falling = analyzer.getFastestFalling(changes, 5);
reporter.printRankChanges(falling, "排名下降最快 Top 5");
// 生成图表
if (!rising.isEmpty()) {
chartGenerator.generateRankChangeChart(rising, "排名上升最快", "rank_rising.png");
}
if (!falling.isEmpty()) {
chartGenerator.generateRankChangeChart(falling, "排名下降最快", "rank_falling.png");
}
}
/**
* 对比两所高校
*/
private void compareUniversities() {
System.out.print("请输入要查看的年份(2022-2024): ");
int year = Integer.parseInt(scanner.nextLine().trim());
System.out.print("请输入第一所高校名称: ");
String name1 = scanner.nextLine().trim();
System.out.print("请输入第二所高校名称: ");
String name2 = scanner.nextLine().trim();
List<University> data = dataCache.get(year);
if (data == null) {
System.out.println("该年份数据不存在!");
return;
}
Optional<University> u1 = data.stream()
.filter(u -> u.getName().equals(name1))
.findFirst();
Optional<University> u2 = data.stream()
.filter(u -> u.getName().equals(name2))
.findFirst();
if (u1.isPresent() && u2.isPresent()) {
UniversityComparison comparison = analyzer.compareUniversities(u1.get(), u2.get());
reporter.printComparison(comparison);
} else {
System.out.println("未找到指定的高校!");
}
}
/**
* 显示某高校历年趋势
*/
private void showYearlyTrend() {
System.out.print("请输入高校名称: ");
String name = scanner.nextLine().trim();
List<University> history = analyzer.getUniversityHistory(dataCache, name);
if (history.isEmpty()) {
System.out.println("未找到该高校的数据!");
} else {
reporter.printYearlyTrend(history, name);
chartGenerator.generateRankTrendLineChart(history, name);
}
}
/**
* 生成所有图表
*/
private void generateAllCharts() {
System.out.println("正在生成所有图表...");
for (Map.Entry<Integer, List<University>> entry : dataCache.entrySet()) {
int year = entry.getKey();
List<University> data = entry.getValue();
// Top 10 柱状图
chartGenerator.generateTopNBarChart(data, year, 10);
// 省份分布饼图
Map<String, Long> provinceCount = analyzer.countByProvince(data);
chartGenerator.generateProvincePieChart(provinceCount, year);
}
// 排名变化图
List<RankChange> changes = analyzer.calculateRankChanges(dataCache);
List<RankChange> rising = analyzer.getFastestRising(changes, 10);
List<RankChange> falling = analyzer.getFastestFalling(changes, 10);
if (!rising.isEmpty()) {
chartGenerator.generateRankChangeChart(rising, "排名上升最快", "rank_rising.png");
}
if (!falling.isEmpty()) {
chartGenerator.generateRankChangeChart(falling, "排名下降最快", "rank_falling.png");
}
// 为Top 5高校生成历年趋势折线图
List<University> topUniversities = analyzer.getTopN(dataCache.get(2024), 5);
for (University u : topUniversities) {
List<University> history = analyzer.getUniversityHistory(dataCache, u.getName());
if (!history.isEmpty()) {
chartGenerator.generateRankTrendLineChart(history, u.getName());
}
}
System.out.println("所有图表生成完成!\n");
}
}

250
project/src/main/java/com/university/analysis/RankAnalyzer.java

@ -1,250 +0,0 @@
package com.university.analysis;
import com.university.model.RankChange;
import com.university.model.University;
import com.university.model.UniversityComparison;
import java.util.*;
import java.util.stream.Collectors;
/**
* 排名分析类
* 提供各种数据分析功能
*/
public class RankAnalyzer {
/**
* 获取Top N高校
*
* @param universities 高校列表
* @param n 数量
* @return Top N高校列表
*/
public List<University> getTopN(List<University> universities, int n) {
return universities.stream()
.sorted(Comparator.comparingInt(University::getRank))
.limit(n)
.collect(Collectors.toList());
}
/**
* 按省份统计高校数量
*
* @param universities 高校列表
* @return 省份-数量映射
*/
public Map<String, Long> countByProvince(List<University> universities) {
return universities.stream()
.collect(Collectors.groupingBy(
University::getProvince,
Collectors.counting()
));
}
/**
* 按省份统计平均分
*
* @param universities 高校列表
* @return 省份-平均分映射
*/
public Map<String, Double> averageScoreByProvince(List<University> universities) {
return universities.stream()
.collect(Collectors.groupingBy(
University::getProvince,
Collectors.averagingDouble(University::getScore)
));
}
/**
* 获取指定省份的高校
*
* @param universities 高校列表
* @param province 省份
* @return 该省份的高校列表
*/
public List<University> getByProvince(List<University> universities, String province) {
return universities.stream()
.filter(u -> u.getProvince().equals(province))
.sorted(Comparator.comparingInt(University::getRank))
.collect(Collectors.toList());
}
/**
* 搜索高校
*
* @param universities 高校列表
* @param keyword 关键词
* @return 匹配的高校列表
*/
public List<University> searchUniversity(List<University> universities, String keyword) {
return universities.stream()
.filter(u -> u.getName().contains(keyword))
.collect(Collectors.toList());
}
/**
* 获取分数统计信息
*
* @param universities 高校列表
* @return 统计信息
*/
public ScoreStatistics getScoreStatistics(List<University> universities) {
DoubleSummaryStatistics stats = universities.stream()
.mapToDouble(University::getScore)
.summaryStatistics();
return new ScoreStatistics(
stats.getCount(),
stats.getSum(),
stats.getAverage(),
stats.getMax(),
stats.getMin()
);
}
/**
* 计算历年排名变化
*
* @param dataMap 多年数据映射(年份->高校列表)
* @return 排名变化列表
*/
public List<RankChange> calculateRankChanges(Map<Integer, List<University>> dataMap) {
List<RankChange> changes = new ArrayList<>();
// 获取所有年份并排序
List<Integer> years = new ArrayList<>(dataMap.keySet());
Collections.sort(years);
if (years.size() < 2) {
return changes;
}
int startYear = years.get(0);
int endYear = years.get(years.size() - 1);
List<University> startData = dataMap.get(startYear);
List<University> endData = dataMap.get(endYear);
// 创建名称到高校的映射
Map<String, University> startMap = startData.stream()
.collect(Collectors.toMap(University::getName, u -> u));
Map<String, University> endMap = endData.stream()
.collect(Collectors.toMap(University::getName, u -> u));
// 计算每所高校的变化
for (String name : startMap.keySet()) {
if (endMap.containsKey(name)) {
University startUni = startMap.get(name);
University endUni = endMap.get(name);
RankChange change = new RankChange(
name,
startYear,
endYear,
startUni.getRank(),
endUni.getRank(),
startUni.getScore(),
endUni.getScore()
);
changes.add(change);
}
}
return changes;
}
/**
* 获取排名上升最快的高校
*
* @param changes 排名变化列表
* @param n 数量
* @return 上升最快的高校列表
*/
public List<RankChange> getFastestRising(List<RankChange> changes, int n) {
return changes.stream()
.filter(c -> c.getRankChange() > 0) // 只取排名上升的
.sorted(Comparator.comparingInt(RankChange::getRankChange).reversed())
.limit(n)
.collect(Collectors.toList());
}
/**
* 获取排名下降最快的高校
*
* @param changes 排名变化列表
* @param n 数量
* @return 下降最快的高校列表
*/
public List<RankChange> getFastestFalling(List<RankChange> changes, int n) {
return changes.stream()
.filter(c -> c.getRankChange() < 0) // 只取排名下降的
.sorted(Comparator.comparingInt(RankChange::getRankChange))
.limit(n)
.collect(Collectors.toList());
}
/**
* 对比两所高校
*
* @param u1 高校1
* @param u2 高校2
* @return 对比结果
*/
public UniversityComparison compareUniversities(University u1, University u2) {
return new UniversityComparison(u1, u2);
}
/**
* 获取某高校在多年数据中的信息
*
* @param dataMap 多年数据映射
* @param universityName 高校名称
* @return 该高校历年的信息列表
*/
public List<University> getUniversityHistory(Map<Integer, List<University>> dataMap,
String universityName) {
List<University> history = new ArrayList<>();
for (List<University> yearData : dataMap.values()) {
yearData.stream()
.filter(u -> u.getName().equals(universityName))
.findFirst()
.ifPresent(history::add);
}
// 按年份排序
history.sort(Comparator.comparingInt(University::getYear));
return history;
}
/**
* 分数统计信息内部类
*/
public static class ScoreStatistics {
private final long count;
private final double sum;
private final double average;
private final double max;
private final double min;
public ScoreStatistics(long count, double sum, double average, double max, double min) {
this.count = count;
this.sum = sum;
this.average = average;
this.max = max;
this.min = min;
}
public long getCount() { return count; }
public double getSum() { return sum; }
public double getAverage() { return average; }
public double getMax() { return max; }
public double getMin() { return min; }
@Override
public String toString() {
return String.format("统计信息: 数量=%d, 平均分=%.2f, 最高分=%.2f, 最低分=%.2f",
count, average, max, min);
}
}
}

153
project/src/main/java/com/university/crawler/UniversityRankCrawler.java

@ -1,153 +0,0 @@
package com.university.crawler;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.university.model.University;
/**
* 高校排名爬虫类
* 负责从网页抓取高校排名数据
*/
public class UniversityRankCrawler {
// 请求间隔时间(毫秒),防止请求过快被封
private static final int REQUEST_DELAY = 1000;
/**
* 爬取软科中国大学排名数据
* 分析软科官网HTML结构提取真实排名数据
*
* @param year 年份
* @return 高校列表
*/
public List<University> crawlRankings(int year) {
List<University> universities = new ArrayList<>();
try {
// 软科排名URL
String url = "https://www.shanghairanking.cn/rankings/bcur/" + year;
System.out.println("正在爬取 " + year + " 年高校排名数据...");
// 发送HTTP请求获取网页内容
Document doc = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
.timeout(15000)
.get();
// 分析HTML结构,提取排名数据
// 找到排名表格
Elements rows = doc.select("table.rk-table tbody tr");
for (Element row : rows) {
Elements cells = row.select("td");
if (cells.size() >= 5) {
try {
// 提取排名
String rankText = cells.get(0).text().trim();
rankText = rankText.replaceAll("[^0-9]", "");
if (rankText.isEmpty()) continue;
int rank = Integer.parseInt(rankText);
// 提取学校名称
String name = cells.get(1).text().trim();
// 提取省份
String province = cells.get(2).text().trim();
// 提取总分
String scoreText = cells.get(4).text().trim();
scoreText = scoreText.replaceAll("[^0-9.]", "");
if (scoreText.isEmpty()) continue;
double score = Double.parseDouble(scoreText);
// 创建高校对象
University university = new University(rank, name, province, score, year);
universities.add(university);
// 限制爬取数量,避免请求过多
if (universities.size() >= 100) break;
} catch (NumberFormatException e) {
// 跳过解析失败的行
continue;
}
}
}
// 请求间隔,避免被封
Thread.sleep(REQUEST_DELAY);
} catch (IOException e) {
System.err.println("爬取数据失败: " + e.getMessage());
System.out.println("将使用模拟数据...");
// 如果爬取失败,使用模拟数据
universities = generateMockData(year);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
System.out.println("成功获取 " + universities.size() + " 条数据");
return universities;
}
/**
* 爬取多年数据
*
* @param startYear 开始年份
* @param endYear 结束年份
* @return 多年数据集合
*/
public List<List<University>> crawlMultipleYears(int startYear, int endYear) {
List<List<University>> allData = new ArrayList<>();
for (int year = startYear; year <= endYear; year++) {
List<University> yearData = crawlRankings(year);
allData.add(yearData);
}
return allData;
}
/**
* 生成模拟数据(用于演示)
* 当真实网站无法访问时使用
*/
private List<University> generateMockData(int year) {
List<University> mockData = new ArrayList<>();
// 基础数据,每年的分数略有变化
double variation = (year - 2022) * 0.5;
mockData.add(new University(1, "清华大学", "北京", 852.5 + variation, year));
mockData.add(new University(2, "北京大学", "北京", 848.2 + variation, year));
mockData.add(new University(3, "浙江大学", "浙江", 822.5 + variation, year));
mockData.add(new University(4, "上海交通大学", "上海", 815.3 + variation, year));
mockData.add(new University(5, "复旦大学", "上海", 805.1 + variation, year));
mockData.add(new University(6, "南京大学", "江苏", 785.6 + variation, year));
mockData.add(new University(7, "中国科学技术大学", "安徽", 782.4 + variation, year));
mockData.add(new University(8, "华中科技大学", "湖北", 765.8 + variation, year));
mockData.add(new University(9, "武汉大学", "湖北", 758.2 + variation, year));
mockData.add(new University(10, "西安交通大学", "陕西", 752.6 + variation, year));
mockData.add(new University(11, "中山大学", "广东", 745.3 + variation, year));
mockData.add(new University(12, "四川大学", "四川", 738.9 + variation, year));
mockData.add(new University(13, "哈尔滨工业大学", "黑龙江", 732.5 + variation, year));
mockData.add(new University(14, "北京航空航天大学", "北京", 725.8 + variation, year));
mockData.add(new University(15, "东南大学", "江苏", 718.4 + variation, year));
mockData.add(new University(16, "北京理工大学", "北京", 712.6 + variation, year));
mockData.add(new University(17, "同济大学", "上海", 705.3 + variation, year));
mockData.add(new University(18, "中国人民大学", "北京", 698.5 + variation, year));
mockData.add(new University(19, "北京师范大学", "北京", 692.1 + variation, year));
mockData.add(new University(20, "南开大学", "天津", 685.7 + variation, year));
return mockData;
}
}

145
project/src/main/java/com/university/model/RankChange.java

@ -1,145 +0,0 @@
package com.university.model;
/**
* 排名变化实体类
* 用于存储高校历年排名变化信息
*/
public class RankChange {
// 学校名称
private String universityName;
// 起始年份
private int startYear;
// 结束年份
private int endYear;
// 起始排名
private int startRank;
// 结束排名
private int endRank;
// 排名变化(正数表示上升,负数表示下降)
private int rankChange;
// 起始分数
private double startScore;
// 结束分数
private double endScore;
// 分数变化
private double scoreChange;
public RankChange() {
}
public RankChange(String universityName, int startYear, int endYear,
int startRank, int endRank, double startScore, double endScore) {
this.universityName = universityName;
this.startYear = startYear;
this.endYear = endYear;
this.startRank = startRank;
this.endRank = endRank;
this.startScore = startScore;
this.endScore = endScore;
// 计算变化
this.rankChange = startRank - endRank; // 排名数字变小表示上升
this.scoreChange = endScore - startScore;
}
// Getters and Setters
public String getUniversityName() {
return universityName;
}
public void setUniversityName(String universityName) {
this.universityName = universityName;
}
public int getStartYear() {
return startYear;
}
public void setStartYear(int startYear) {
this.startYear = startYear;
}
public int getEndYear() {
return endYear;
}
public void setEndYear(int endYear) {
this.endYear = endYear;
}
public int getStartRank() {
return startRank;
}
public void setStartRank(int startRank) {
this.startRank = startRank;
}
public int getEndRank() {
return endRank;
}
public void setEndRank(int endRank) {
this.endRank = endRank;
}
public int getRankChange() {
return rankChange;
}
public void setRankChange(int rankChange) {
this.rankChange = rankChange;
}
public double getStartScore() {
return startScore;
}
public void setStartScore(double startScore) {
this.startScore = startScore;
}
public double getEndScore() {
return endScore;
}
public void setEndScore(double endScore) {
this.endScore = endScore;
}
public double getScoreChange() {
return scoreChange;
}
public void setScoreChange(double scoreChange) {
this.scoreChange = scoreChange;
}
/**
* 获取变化趋势描述
*/
public String getTrendDescription() {
if (rankChange > 0) {
return String.format("上升%d位", rankChange);
} else if (rankChange < 0) {
return String.format("下降%d位", Math.abs(rankChange));
} else {
return "排名不变";
}
}
@Override
public String toString() {
return String.format("%s: %d年(第%d名) -> %d年(第%d名), %s",
universityName, startYear, startRank, endYear, endRank, getTrendDescription());
}
}

120
project/src/main/java/com/university/model/University.java

@ -1,120 +0,0 @@
package com.university.model;
import java.util.Objects;
/**
* 高校实体类 (Java Bean)
* 用于封装高校排名数据
*/
public class University {
// 排名
private int rank;
// 学校名称
private String name;
// 所在省份
private String province;
// 总分
private double score;
// 年份
private int year;
// 无参构造方法(必须,用于反射创建对象)
public University() {
}
// 全参构造方法
public University(int rank, String name, String province, double score, int year) {
this.rank = rank;
this.name = name;
this.province = province;
this.score = score;
this.year = year;
}
// Getter和Setter方法
public int getRank() {
return rank;
}
public void setRank(int rank) {
this.rank = rank;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public String getProvince() {
return province;
}
public void setProvince(String province) {
this.province = province;
}
public double getScore() {
return score;
}
public void setScore(double score) {
this.score = score;
}
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
/**
* 计算排名变化
* @param previousRank 往年排名
* @return 排名变化(正数表示上升负数表示下降)
*/
public int calculateRankChange(int previousRank) {
return previousRank - this.rank;
}
/**
* 计算分数变化
* @param previousScore 往年分数
* @return 分数变化
*/
public double calculateScoreChange(double previousScore) {
return this.score - previousScore;
}
@Override
public String toString() {
return String.format("University{rank=%d, name='%s', province='%s', score=%.2f, year=%d}",
rank, name, province, score, year);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
University that = (University) o;
return rank == that.rank &&
Double.compare(that.score, score) == 0 &&
year == that.year &&
Objects.equals(name, that.name) &&
Objects.equals(province, that.province);
}
@Override
public int hashCode() {
return Objects.hash(rank, name, province, score, year);
}
}

171
project/src/main/java/com/university/model/UniversityComparison.java

@ -1,171 +0,0 @@
package com.university.model;
/**
* 高校对比实体类
* 用于存储两所高校的对比信息
*/
public class UniversityComparison {
// 第一所高校
private String universityName1;
// 第二所高校
private String universityName2;
// 年份
private int year;
// 高校1排名
private int rank1;
// 高校2排名
private int rank2;
// 高校1分数
private double score1;
// 高校2分数
private double score2;
// 高校1省份
private String province1;
// 高校2省份
private String province2;
// 排名差距
private int rankGap;
// 分数差距
private double scoreGap;
public UniversityComparison() {
}
public UniversityComparison(University u1, University u2) {
this.universityName1 = u1.getName();
this.universityName2 = u2.getName();
this.year = u1.getYear();
this.rank1 = u1.getRank();
this.rank2 = u2.getRank();
this.score1 = u1.getScore();
this.score2 = u2.getScore();
this.province1 = u1.getProvince();
this.province2 = u2.getProvince();
this.rankGap = Math.abs(rank1 - rank2);
this.scoreGap = Math.abs(score1 - score2);
}
// Getters and Setters
public String getUniversityName1() {
return universityName1;
}
public void setUniversityName1(String universityName1) {
this.universityName1 = universityName1;
}
public String getUniversityName2() {
return universityName2;
}
public void setUniversityName2(String universityName2) {
this.universityName2 = universityName2;
}
public int getYear() {
return year;
}
public void setYear(int year) {
this.year = year;
}
public int getRank1() {
return rank1;
}
public void setRank1(int rank1) {
this.rank1 = rank1;
}
public int getRank2() {
return rank2;
}
public void setRank2(int rank2) {
this.rank2 = rank2;
}
public double getScore1() {
return score1;
}
public void setScore1(double score1) {
this.score1 = score1;
}
public double getScore2() {
return score2;
}
public void setScore2(double score2) {
this.score2 = score2;
}
public String getProvince1() {
return province1;
}
public void setProvince1(String province1) {
this.province1 = province1;
}
public String getProvince2() {
return province2;
}
public void setProvince2(String province2) {
this.province2 = province2;
}
public int getRankGap() {
return rankGap;
}
public void setRankGap(int rankGap) {
this.rankGap = rankGap;
}
public double getScoreGap() {
return scoreGap;
}
public void setScoreGap(double scoreGap) {
this.scoreGap = scoreGap;
}
/**
* 获取排名较高的高校名称
*/
public String getHigherRankedUniversity() {
return rank1 < rank2 ? universityName1 : universityName2;
}
/**
* 获取对比结果描述
*/
public String getComparisonResult() {
String higherUni = getHigherRankedUniversity();
return String.format("%d年: %s 排名高于 %s %d位,分数相差 %.2f分",
year, higherUni,
higherUni.equals(universityName1) ? universityName2 : universityName1,
rankGap, scoreGap);
}
@Override
public String toString() {
return getComparisonResult();
}
}

202
project/src/main/java/com/university/storage/DataStorage.java

@ -1,202 +0,0 @@
package com.university.storage;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import com.opencsv.CSVReader;
import com.opencsv.CSVWriter;
import com.opencsv.bean.CsvToBean;
import com.opencsv.bean.CsvToBeanBuilder;
import com.opencsv.bean.StatefulBeanToCsv;
import com.opencsv.bean.StatefulBeanToCsvBuilder;
import com.opencsv.exceptions.CsvDataTypeMismatchException;
import com.opencsv.exceptions.CsvRequiredFieldEmptyException;
import com.opencsv.exceptions.CsvValidationException;
import com.university.model.University;
/**
* 数据存储类
* 负责数据的持久化存储(CSV格式)
*/
public class DataStorage {
// 数据存储目录
private static final String DATA_DIR = "data";
/**
* 构造方法确保数据目录存在
*/
public DataStorage() {
File dir = new File(DATA_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
/**
* 保存高校列表到CSV文件
*
* @param universities 高校列表
* @param year 年份
*/
public void saveToCsv(List<University> universities, int year) {
String filename = DATA_DIR + "/university_rank_" + year + ".csv";
try (Writer writer = new OutputStreamWriter(
new FileOutputStream(filename), StandardCharsets.UTF_8)) {
// 添加BOM,解决Excel中文乱码
writer.write('\ufeff');
// 创建CSV写入器
StatefulBeanToCsv<University> beanToCsv = new StatefulBeanToCsvBuilder<University>(writer)
.withQuotechar('"')
.withSeparator(',')
.withOrderedResults(true)
.build();
// 写入数据
beanToCsv.write(universities);
System.out.println("数据已保存到: " + filename);
} catch (IOException | CsvDataTypeMismatchException | CsvRequiredFieldEmptyException e) {
System.err.println("保存CSV文件失败: " + e.getMessage());
}
}
/**
* 从CSV文件读取高校列表
*
* @param year 年份
* @return 高校列表
*/
public List<University> readFromCsv(int year) {
String filename = DATA_DIR + "/university_rank_" + year + ".csv";
List<University> universities = new ArrayList<>();
try (Reader reader = new InputStreamReader(
new FileInputStream(filename), StandardCharsets.UTF_8)) {
// 创建CSV读取器
CsvToBean<University> csvToBean = new CsvToBeanBuilder<University>(reader)
.withType(University.class)
.withIgnoreLeadingWhiteSpace(true)
.build();
// 读取数据
universities = csvToBean.parse();
System.out.println("从 " + filename + " 读取了 " + universities.size() + " 条数据");
} catch (IOException e) {
System.err.println("读取CSV文件失败: " + e.getMessage());
}
return universities;
}
/**
* 保存原始数据(手动控制格式)
*
* @param universities 高校列表
* @param year 年份
*/
public void saveRawData(List<University> universities, int year) {
String filename = DATA_DIR + "/university_rank_" + year + ".csv";
try (CSVWriter writer = new CSVWriter(new OutputStreamWriter(
new FileOutputStream(filename), StandardCharsets.UTF_8))) {
// 写入表头
String[] header = {"排名", "学校名称", "省份", "总分", "年份"};
writer.writeNext(header);
// 写入数据
for (University u : universities) {
String[] row = {
String.valueOf(u.getRank()),
u.getName(),
u.getProvince(),
String.valueOf(u.getScore()),
String.valueOf(u.getYear())
};
writer.writeNext(row);
}
System.out.println("原始数据已保存到: " + filename);
} catch (IOException e) {
System.err.println("保存原始数据失败: " + e.getMessage());
}
}
/**
* 读取原始数据
*
* @param year 年份
* @return 高校列表
*/
public List<University> readRawData(int year) {
String filename = DATA_DIR + "/university_rank_" + year + ".csv";
List<University> universities = new ArrayList<>();
try (CSVReader reader = new CSVReader(new InputStreamReader(
new FileInputStream(filename), StandardCharsets.UTF_8))) {
// 跳过表头
reader.readNext();
// 读取数据行
String[] row;
while ((row = reader.readNext()) != null) {
if (row.length >= 5) {
University u = new University();
u.setRank(Integer.parseInt(row[0].trim()));
u.setName(row[1].trim());
u.setProvince(row[2].trim());
u.setScore(Double.parseDouble(row[3].trim()));
u.setYear(Integer.parseInt(row[4].trim()));
universities.add(u);
}
}
System.out.println("从 " + filename + " 读取了 " + universities.size() + " 条数据");
} catch (IOException | CsvValidationException e) {
System.err.println("读取原始数据失败: " + e.getMessage());
}
return universities;
}
/**
* 检查某年份的数据是否存在
*
* @param year 年份
* @return 是否存在
*/
public boolean dataExists(int year) {
File file = new File(DATA_DIR + "/university_rank_" + year + ".csv");
return file.exists();
}
/**
* 删除某年份的数据文件
*
* @param year 年份
*/
public void deleteData(int year) {
File file = new File(DATA_DIR + "/university_rank_" + year + ".csv");
if (file.exists() && file.delete()) {
System.out.println("已删除 " + year + " 年的数据文件");
}
}
}

299
project/src/main/java/com/university/visualization/ChartGenerator.java

@ -1,299 +0,0 @@
package com.university.visualization;
import com.university.model.RankChange;
import com.university.model.University;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.axis.CategoryAxis;
import org.jfree.chart.axis.NumberAxis;
import org.jfree.chart.plot.CategoryPlot;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.chart.renderer.category.BarRenderer;
import org.jfree.chart.renderer.category.LineAndShapeRenderer;
import org.jfree.data.category.DefaultCategoryDataset;
import java.awt.*;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
/**
* 图表生成类
* 使用JFreeChart生成各种统计图表
*/
public class ChartGenerator {
// 图表输出目录
private static final String CHART_DIR = "charts";
/**
* 构造方法确保图表目录存在
*/
public ChartGenerator() {
File dir = new File(CHART_DIR);
if (!dir.exists()) {
dir.mkdirs();
}
}
/**
* 生成Top N高校柱状图
*
* @param universities 高校列表
* @param year 年份
* @param n 数量
*/
public void generateTopNBarChart(List<University> universities, int year, int n) {
// 创建数据集
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
// 取前N名
int count = Math.min(n, universities.size());
for (int i = 0; i < count; i++) {
University u = universities.get(i);
dataset.addValue(u.getScore(), "总分", u.getName());
}
// 创建图表
JFreeChart chart = ChartFactory.createBarChart(
year + "年高校排名Top" + n, // 标题
"学校", // X轴标签
"总分", // Y轴标签
dataset, // 数据集
PlotOrientation.VERTICAL, // 方向
true, // 显示图例
true, // 显示工具提示
false // 不生成URL
);
// 美化图表
customizeBarChart(chart);
// 保存图表
saveChart(chart, "top" + n + "_" + year + ".png");
}
/**
* 生成省份分布饼图
*
* @param provinceCount 省份统计
* @param year 年份
*/
public void generateProvincePieChart(Map<String, Long> provinceCount, int year) {
// 创建饼图数据集
org.jfree.data.general.DefaultPieDataset<String> dataset =
new org.jfree.data.general.DefaultPieDataset<>();
// 添加数据
provinceCount.forEach(dataset::setValue);
// 创建饼图
JFreeChart chart = ChartFactory.createPieChart(
year + "年高校省份分布", // 标题
dataset, // 数据集
true, // 显示图例
true, // 显示工具提示
false // 不生成URL
);
// 获取饼图plot并设置标签
org.jfree.chart.plot.PiePlot plot = (org.jfree.chart.plot.PiePlot) chart.getPlot();
// 设置标签格式:省份名称 + 数量 + 百分比
plot.setLabelGenerator(new org.jfree.chart.labels.StandardPieSectionLabelGenerator(
"{0}: {1}所 ({2})",
java.text.NumberFormat.getIntegerInstance(),
java.text.NumberFormat.getPercentInstance()
));
// 设置标签字体
plot.setLabelFont(new Font("微软雅黑", Font.PLAIN, 12));
// 设置标签颜色
plot.setLabelPaint(Color.BLACK);
// 设置标签背景
plot.setLabelBackgroundPaint(new Color(255, 255, 255, 200));
// 设置标题字体
chart.getTitle().setFont(new Font("微软雅黑", Font.BOLD, 16));
// 保存图表
saveChart(chart, "province_distribution_" + year + ".png");
}
/**
* 生成历年排名变化折线图
*
* @param universityHistory 某高校历年数据
* @param universityName 高校名称
*/
public void generateRankTrendLineChart(List<University> universityHistory,
String universityName) {
// 创建数据集
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
// 添加数据(注意:排名越小越好,所以取负值让折线图向上表示进步)
for (University u : universityHistory) {
dataset.addValue(u.getRank(), "排名", String.valueOf(u.getYear()));
}
// 创建图表
JFreeChart chart = ChartFactory.createLineChart(
universityName + " 历年排名变化", // 标题
"年份", // X轴标签
"排名", // Y轴标签
dataset, // 数据集
PlotOrientation.VERTICAL, // 方向
true, // 显示图例
true, // 显示工具提示
false // 不生成URL
);
// 美化折线图
customizeLineChart(chart);
// 保存图表
saveChart(chart, "rank_trend_" + universityName + ".png");
}
/**
* 生成排名变化对比图
*
* @param changes 排名变化列表
* @param title 图表标题
* @param filename 文件名
*/
public void generateRankChangeChart(List<RankChange> changes, String title, String filename) {
// 创建数据集
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
// 添加数据
for (RankChange change : changes) {
dataset.addValue(change.getRankChange(), "排名变化", change.getUniversityName());
}
// 创建图表
JFreeChart chart = ChartFactory.createBarChart(
title,
"学校",
"排名变化(位)",
dataset,
PlotOrientation.HORIZONTAL,
true,
true,
false
);
// 美化
customizeBarChart(chart);
// 保存
saveChart(chart, filename);
}
/**
* 生成多高校对比图
*
* @param universities 高校列表
* @param year 年份
*/
public void generateComparisonChart(List<University> universities, int year) {
// 创建数据集
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
// 添加分数数据
for (University u : universities) {
dataset.addValue(u.getScore(), "总分", u.getName());
}
// 创建图表
JFreeChart chart = ChartFactory.createBarChart(
year + "年高校分数对比",
"学校",
"总分",
dataset,
PlotOrientation.VERTICAL,
true,
true,
false
);
customizeBarChart(chart);
saveChart(chart, "comparison_" + year + ".png");
}
/**
* 美化柱状图
*/
private void customizeBarChart(JFreeChart chart) {
CategoryPlot plot = chart.getCategoryPlot();
// 设置背景色
plot.setBackgroundPaint(Color.WHITE);
plot.setRangeGridlinePaint(Color.LIGHT_GRAY);
// 设置柱状图颜色
BarRenderer renderer = (BarRenderer) plot.getRenderer();
renderer.setSeriesPaint(0, new Color(79, 129, 189));
// 设置字体
CategoryAxis domainAxis = plot.getDomainAxis();
domainAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10));
domainAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12));
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis();
rangeAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10));
rangeAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12));
// 设置标题字体
chart.getTitle().setFont(new Font("微软雅黑", Font.BOLD, 16));
}
/**
* 美化折线图
*/
private void customizeLineChart(JFreeChart chart) {
CategoryPlot plot = chart.getCategoryPlot();
// 设置背景色
plot.setBackgroundPaint(Color.WHITE);
plot.setRangeGridlinePaint(Color.LIGHT_GRAY);
// 设置折线样式
LineAndShapeRenderer renderer = (LineAndShapeRenderer) plot.getRenderer();
renderer.setSeriesPaint(0, new Color(79, 129, 189));
renderer.setSeriesStroke(0, new BasicStroke(2.0f));
renderer.setSeriesShapesVisible(0, true);
// 设置字体
CategoryAxis domainAxis = plot.getDomainAxis();
domainAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10));
domainAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12));
NumberAxis rangeAxis = (NumberAxis) plot.getRangeAxis();
rangeAxis.setTickLabelFont(new Font("微软雅黑", Font.PLAIN, 10));
rangeAxis.setLabelFont(new Font("微软雅黑", Font.BOLD, 12));
// 设置标题字体
chart.getTitle().setFont(new Font("微软雅黑", Font.BOLD, 16));
}
/**
* 保存图表到文件
*
* @param chart 图表对象
* @param filename 文件名
*/
private void saveChart(JFreeChart chart, String filename) {
try {
File file = new File(CHART_DIR + "/" + filename);
ChartUtils.saveChartAsPNG(file, chart, 800, 600);
System.out.println("图表已保存: " + file.getAbsolutePath());
} catch (IOException e) {
System.err.println("保存图表失败: " + e.getMessage());
}
}
}

241
project/src/main/java/com/university/visualization/ConsoleReporter.java

@ -1,241 +0,0 @@
package com.university.visualization;
import com.university.analysis.RankAnalyzer;
import com.university.model.RankChange;
import com.university.model.University;
import com.university.model.UniversityComparison;
import java.util.List;
import java.util.Map;
/**
* 控制台报表类
* 格式化输出各种统计结果到控制台
*/
public class ConsoleReporter {
/**
* 打印分隔线
*/
private void printSeparator() {
System.out.println("=".repeat(80));
}
/**
* 打印高校列表
*
* @param universities 高校列表
* @param title 标题
*/
public void printUniversityList(List<University> universities, String title) {
printSeparator();
System.out.println("【" + title + "】");
printSeparator();
// 表头
System.out.printf("%-6s %-20s %-10s %-10s %-6s%n",
"排名", "学校名称", "省份", "总分", "年份");
System.out.println("-".repeat(80));
// 数据行
for (University u : universities) {
System.out.printf("%-6d %-20s %-10s %-10.2f %-6d%n",
u.getRank(),
truncate(u.getName(), 20),
u.getProvince(),
u.getScore(),
u.getYear());
}
System.out.println();
}
/**
* 打印省份统计
*
* @param provinceCount 省份统计
* @param title 标题
*/
public void printProvinceStatistics(Map<String, Long> provinceCount, String title) {
printSeparator();
System.out.println("【" + title + "】");
printSeparator();
System.out.printf("%-15s %-10s%n", "省份", "高校数量");
System.out.println("-".repeat(30));
// 按数量降序排序
provinceCount.entrySet().stream()
.sorted(Map.Entry.<String, Long>comparingByValue().reversed())
.forEach(entry -> System.out.printf("%-15s %-10d%n",
entry.getKey(), entry.getValue()));
System.out.println();
}
/**
* 打印分数统计
*
* @param statistics 统计信息
* @param title 标题
*/
public void printScoreStatistics(RankAnalyzer.ScoreStatistics statistics, String title) {
printSeparator();
System.out.println("【" + title + "】");
printSeparator();
System.out.printf("高校数量: %d%n", statistics.getCount());
System.out.printf("平均分数: %.2f%n", statistics.getAverage());
System.out.printf("最高分数: %.2f%n", statistics.getMax());
System.out.printf("最低分数: %.2f%n", statistics.getMin());
System.out.println();
}
/**
* 打印排名变化
*
* @param changes 排名变化列表
* @param title 标题
*/
public void printRankChanges(List<RankChange> changes, String title) {
printSeparator();
System.out.println("【" + title + "】");
printSeparator();
System.out.printf("%-20s %-8s %-8s %-12s %-12s%n",
"学校名称", "起始年", "结束年", "排名变化", "分数变化");
System.out.println("-".repeat(80));
for (RankChange change : changes) {
String rankChangeStr = change.getRankChange() > 0 ?
"↑" + change.getRankChange() :
(change.getRankChange() < 0 ?
"↓" + Math.abs(change.getRankChange()) :
"-");
System.out.printf("%-20s %-8d %-8d %-12s %+.2f%n",
truncate(change.getUniversityName(), 20),
change.getStartYear(),
change.getEndYear(),
rankChangeStr,
change.getScoreChange());
}
System.out.println();
}
/**
* 打印高校对比结果
*
* @param comparison 对比结果
*/
public void printComparison(UniversityComparison comparison) {
printSeparator();
System.out.println("【高校对比分析】");
printSeparator();
System.out.printf("对比年份: %d年%n%n", comparison.getYear());
System.out.println("学校信息:");
System.out.println("-".repeat(50));
System.out.printf("%-20s %-10s %-10s%n", "学校", "排名", "分数");
System.out.printf("%-20s %-10d %-10.2f%n",
comparison.getUniversityName1(),
comparison.getRank1(),
comparison.getScore1());
System.out.printf("%-20s %-10d %-10.2f%n",
comparison.getUniversityName2(),
comparison.getRank2(),
comparison.getScore2());
System.out.println();
System.out.println("对比结果:");
System.out.println("-".repeat(50));
System.out.printf("排名领先: %s (领先%d位)%n",
comparison.getHigherRankedUniversity(),
comparison.getRankGap());
System.out.printf("分数差距: %.2f分%n", comparison.getScoreGap());
System.out.println();
}
/**
* 打印历年趋势
*
* @param history 历年数据
* @param name 学校名称
*/
public void printYearlyTrend(List<University> history, String name) {
printSeparator();
System.out.println("【" + name + " 历年排名趋势】");
printSeparator();
System.out.printf("%-8s %-8s %-10s%n", "年份", "排名", "分数");
System.out.println("-".repeat(30));
University previous = null;
for (University u : history) {
String trend = "";
if (previous != null) {
int change = previous.getRank() - u.getRank();
if (change > 0) {
trend = "↑" + change;
} else if (change < 0) {
trend = "↓" + Math.abs(change);
} else {
trend = "-";
}
}
System.out.printf("%-8d %-8d %-10.2f %s%n",
u.getYear(), u.getRank(), u.getScore(), trend);
previous = u;
}
System.out.println();
}
/**
* 打印菜单
*/
public void printMenu() {
printSeparator();
System.out.println("【高校排名分析系统】");
printSeparator();
System.out.println("1. 查看Top N高校排名");
System.out.println("2. 按省份查看高校");
System.out.println("3. 搜索高校");
System.out.println("4. 查看省份分布统计");
System.out.println("5. 查看分数统计");
System.out.println("6. 查看历年排名变化");
System.out.println("7. 对比两所高校");
System.out.println("8. 查看某高校历年趋势");
System.out.println("9. 生成所有图表");
System.out.println("0. 退出系统");
printSeparator();
System.out.print("请选择功能(0-9): ");
}
/**
* 打印欢迎信息
*/
public void printWelcome() {
printSeparator();
System.out.println(" 欢迎使用高校排名分析系统");
System.out.println(" 本系统提供高校排名数据爬取、分析和可视化功能");
printSeparator();
System.out.println();
}
/**
* 截断字符串
*
* @param str 原字符串
* @param length 最大长度
* @return 截断后的字符串
*/
private String truncate(String str, int length) {
if (str == null) return "";
if (str.length() <= length) return str;
return str.substring(0, length - 3) + "...";
}
}

BIN
project/target/classes/com/example/crawler/Main.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/chart/ChartGenerator.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/BaseCrawlCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/BookCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/Command.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/CrawlAllCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/CrawlAndAnalyzeAllCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/CrawlRankingCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/ExitCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/GenerateAllAnalysisCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/NewsCommand.class

Binary file not shown.

BIN
project/target/classes/com/example/crawler/command/SaveCommand.class

Binary file not shown.

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save