65 changed files with 1985 additions and 383 deletions
@ -0,0 +1,10 @@ |
|||||
|
# 默认忽略的文件 |
||||
|
/shelf/ |
||||
|
/workspace.xml |
||||
|
# 已忽略包含查询文件的默认文件夹 |
||||
|
/queries/ |
||||
|
# Datasource local storage ignored files |
||||
|
/dataSources/ |
||||
|
/dataSources.local.xml |
||||
|
# 基于编辑器的 HTTP 客户端请求 |
||||
|
/httpRequests/ |
||||
@ -0,0 +1 @@ |
|||||
|
MovieMain.java |
||||
@ -0,0 +1,13 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="CompilerConfiguration"> |
||||
|
<annotationProcessing> |
||||
|
<profile name="Maven default annotation processors profile" enabled="true"> |
||||
|
<sourceOutputDir name="target/generated-sources/annotations" /> |
||||
|
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> |
||||
|
<outputRelativeToContentRoot value="true" /> |
||||
|
<module name="job-crawler" /> |
||||
|
</profile> |
||||
|
</annotationProcessing> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,7 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="Encoding"> |
||||
|
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" /> |
||||
|
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" /> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,20 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="RemoteRepositoriesConfiguration"> |
||||
|
<remote-repository> |
||||
|
<option name="id" value="central" /> |
||||
|
<option name="name" value="Central Repository" /> |
||||
|
<option name="url" value="https://repo.maven.apache.org/maven2" /> |
||||
|
</remote-repository> |
||||
|
<remote-repository> |
||||
|
<option name="id" value="central" /> |
||||
|
<option name="name" value="Maven Central repository" /> |
||||
|
<option name="url" value="https://repo1.maven.org/maven2" /> |
||||
|
</remote-repository> |
||||
|
<remote-repository> |
||||
|
<option name="id" value="jboss.community" /> |
||||
|
<option name="name" value="JBoss Community repository" /> |
||||
|
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" /> |
||||
|
</remote-repository> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,12 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="ExternalStorageConfigurationManager" enabled="true" /> |
||||
|
<component name="MavenProjectsManager"> |
||||
|
<option name="originalFiles"> |
||||
|
<list> |
||||
|
<option value="$PROJECT_DIR$/pom.xml" /> |
||||
|
</list> |
||||
|
</option> |
||||
|
</component> |
||||
|
<component name="ProjectRootManager" version="2" project-jdk-name="openjdk-25" project-jdk-type="JavaSDK" /> |
||||
|
</project> |
||||
@ -0,0 +1,8 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="ProjectModuleManager"> |
||||
|
<modules> |
||||
|
<module fileurl="file://$PROJECT_DIR$/job-crawler.iml" filepath="$PROJECT_DIR$/job-crawler.iml" /> |
||||
|
</modules> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,6 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project version="4"> |
||||
|
<component name="VcsDirectoryMappings"> |
||||
|
<mapping directory="$PROJECT_DIR$/.." vcs="Git" /> |
||||
|
</component> |
||||
|
</project> |
||||
@ -0,0 +1,3 @@ |
|||||
|
{ |
||||
|
"java.configuration.updateBuildConfiguration": "interactive" |
||||
|
} |
||||
Binary file not shown.
@ -1,44 +0,0 @@ |
|||||
package com.crawler; |
|
||||
|
|
||||
import com.crawler.model.Movie; |
|
||||
import com.crawler.spider.DoubanSpider; |
|
||||
import com.crawler.utils.DataUtils; |
|
||||
import com.crawler.ui.MovieResultDisplay; |
|
||||
|
|
||||
import java.util.List; |
|
||||
|
|
||||
public class MovieMain { |
|
||||
public static void main(String[] args) { |
|
||||
try { |
|
||||
System.out.println("开始爬取豆瓣电影Top250数据..."); |
|
||||
|
|
||||
// 1. 启动爬虫
|
|
||||
DoubanSpider spider = new DoubanSpider(); |
|
||||
List<Movie> movieList = spider.crawlMovies(); |
|
||||
|
|
||||
// 2. 清洗数据
|
|
||||
List<Movie> cleanedMovies = movieList.stream() |
|
||||
.map(DataUtils::cleanMovie) |
|
||||
.filter(movie -> movie != null) |
|
||||
.toList(); |
|
||||
|
|
||||
// 3. 保存数据到CSV文件
|
|
||||
DataUtils.writeMovieToCSV(cleanedMovies, "douban_movies.csv"); |
|
||||
System.out.println("数据已保存到 douban_movies.csv"); |
|
||||
|
|
||||
// 4. 展示结果
|
|
||||
MovieResultDisplay.displayResults(cleanedMovies); |
|
||||
|
|
||||
// 5. 生成图表
|
|
||||
MovieResultDisplay.generateRatingDistributionChart(cleanedMovies); |
|
||||
MovieResultDisplay.generateYearDistributionChart(cleanedMovies); |
|
||||
MovieResultDisplay.generateGenreDistributionChart(cleanedMovies); |
|
||||
MovieResultDisplay.generateYearRatingChart(cleanedMovies); |
|
||||
|
|
||||
System.out.println("\n爬虫任务完成!"); |
|
||||
|
|
||||
} catch (Exception e) { |
|
||||
e.printStackTrace(); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
@ -0,0 +1,178 @@ |
|||||
|
# 电影爬虫项目 - 继承与多态实现说明 |
||||
|
|
||||
|
## 项目简介 |
||||
|
|
||||
|
本项目是一个Java电影爬虫,从豆瓣电影Top250抓取数据,进行清洗、存储、分析,并生成多种图表展示结果。项目重点展示了面向对象编程中**继承**和**多态**的实现。 |
||||
|
|
||||
|
## 项目中的类 |
||||
|
|
||||
|
### 核心类 |
||||
|
|
||||
|
1. **MovieMain** (`src/main/java/com/crawler/MovieMain.java`) |
||||
|
- 项目主入口类 |
||||
|
- 负责协调爬虫、数据清洗、存储、展示和图表生成 |
||||
|
|
||||
|
2. **DoubanSpider** (`src/main/java/com/crawler/spider/DoubanSpider.java`) |
||||
|
- 爬虫实现类 |
||||
|
- 负责从豆瓣电影Top250页面爬取数据 |
||||
|
- 使用多线程并发爬取,提高效率 |
||||
|
|
||||
|
3. **Movie** (`src/main/java/com/crawler/chart/model/Movie.java`) |
||||
|
- 电影数据模型类 |
||||
|
- 存储电影的各种属性:排名、标题、评分、评价人数、导演、演员、年份、国家/地区、类型、简介 |
||||
|
|
||||
|
4. **MovieAnalyzer** (`src/main/java/com/crawler/analysis/MovieAnalyzer.java`) |
||||
|
- 数据分析工具类 |
||||
|
- 提供各种统计分析方法:评分分布、年份分布、类型分布、导演作品数量排行、平均评分、评分与年份相关性 |
||||
|
|
||||
|
5. **MovieResultDisplay** (`src/main/java/com/crawler/ui/MovieResultDisplay.java`) |
||||
|
- 结果显示和图表生成类 |
||||
|
- 在控制台显示统计结果 |
||||
|
- 生成各种图表:评分分布直方图、年份分布折线图、类型分布饼图、评分与年份相关性图表 |
||||
|
|
||||
|
6. **DataUtils** (`src/main/java/com/crawler/utils/DataUtils.java`) |
||||
|
- 数据工具类 |
||||
|
- 提供数据清洗和保存功能 |
||||
|
|
||||
|
### 图表相关类 |
||||
|
|
||||
|
1. **ChartGenerator** (`src/main/java/com/crawler/chart/ChartGenerator.java`) |
||||
|
- 图表生成器接口 |
||||
|
- 定义了生成图表的方法规范 |
||||
|
|
||||
|
2. **ChartManager** (`src/main/java/com/crawler/chart/ChartManager.java`) |
||||
|
- 图表管理器类 |
||||
|
- 负责管理和协调多个图表生成器 |
||||
|
|
||||
|
3. **实现类** |
||||
|
- `RatingDistributionChartGenerator` (`src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.java`) |
||||
|
- `YearDistributionChartGenerator` (`src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.java`) |
||||
|
- `GenreDistributionChartGenerator` (`src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.java`) |
||||
|
- `YearRatingChartGenerator` (`src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.java`) |
||||
|
|
||||
|
## 封装、多态、继承的实现 |
||||
|
|
||||
|
### 1. 封装 |
||||
|
|
||||
|
- **类封装**:每个类都封装了自己的属性和方法,提供了清晰的接口 |
||||
|
- **数据封装**:`Movie`类使用私有属性和公共的getter/setter方法 |
||||
|
- **功能封装**:不同功能模块被封装到不同的类中,如爬虫、分析、展示等 |
||||
|
|
||||
|
### 2. 继承 |
||||
|
|
||||
|
- **接口继承**:所有图表生成器实现类都继承了`ChartGenerator`接口 |
||||
|
- **方法继承**:实现类继承了接口中定义的`generateChart`和`getChartName`方法 |
||||
|
|
||||
|
### 3. 多态 |
||||
|
|
||||
|
#### 3.1 向上转型(接口引用指向实现类) |
||||
|
|
||||
|
**文件**: `src/main/java/com/crawler/MovieMain.java` |
||||
|
|
||||
|
```java |
||||
|
ChartGenerator ratingChart = new RatingDistributionChartGenerator(); |
||||
|
ChartGenerator yearChart = new YearDistributionChartGenerator(); |
||||
|
ChartGenerator genreChart = new GenreDistributionChartGenerator(); |
||||
|
ChartGenerator yearRatingChart = new YearRatingChartGenerator(); |
||||
|
``` |
||||
|
|
||||
|
#### 3.2 方法参数多态 |
||||
|
|
||||
|
**文件**: `src/main/java/com/crawler/chart/ChartManager.java` |
||||
|
|
||||
|
```java |
||||
|
public void addChartGenerator(ChartGenerator generator) { |
||||
|
chartGenerators.add(generator); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 3.3 运行时多态(动态绑定) |
||||
|
|
||||
|
**文件**: `src/main/java/com/crawler/chart/ChartManager.java` |
||||
|
|
||||
|
```java |
||||
|
public void generateAllCharts(List<Movie> movies) { |
||||
|
Movie[] movieArray = movies.toArray(new Movie[0]); |
||||
|
for (ChartGenerator generator : chartGenerators) { |
||||
|
System.out.println("生成图表: " + generator.getChartName()); |
||||
|
generator.generateChart(movieArray); // 运行时根据实际类型调用对应方法 |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 3.4 统一调用接口 |
||||
|
|
||||
|
**文件**: `src/main/java/com/crawler/MovieMain.java` |
||||
|
|
||||
|
```java |
||||
|
chartManager.addChartGenerator(ratingChart); |
||||
|
chartManager.addChartGenerator(yearChart); |
||||
|
chartManager.addChartGenerator(genreChart); |
||||
|
chartManager.addChartGenerator(yearRatingChart); |
||||
|
|
||||
|
chartManager.generateAllCharts(cleanedMovies); |
||||
|
``` |
||||
|
|
||||
|
## 继承与多态的优势 |
||||
|
|
||||
|
1. **代码复用**:所有图表生成器共享相同的接口方法 |
||||
|
2. **可扩展性**:新增图表类型只需实现接口,无需修改现有代码 |
||||
|
3. **统一管理**:`ChartManager` 可以统一管理不同类型的图表生成器 |
||||
|
4. **灵活性**:通过接口引用可以操作不同的实现类对象 |
||||
|
5. **可维护性**:代码结构清晰,职责分明 |
||||
|
|
||||
|
## 项目结构 |
||||
|
|
||||
|
``` |
||||
|
src/ |
||||
|
└── main/ |
||||
|
└── java/ |
||||
|
└── com/ |
||||
|
└── crawler/ |
||||
|
├── MovieMain.java # 主入口文件 |
||||
|
├── analysis/ |
||||
|
│ └── MovieAnalyzer.java # 数据分析工具 |
||||
|
├── chart/ |
||||
|
│ ├── ChartGenerator.java # 图表生成器接口 |
||||
|
│ ├── ChartManager.java # 图表管理器 |
||||
|
│ ├── model/ |
||||
|
│ │ └── Movie.java # 电影数据模型 |
||||
|
│ └── impl/ |
||||
|
│ ├── RatingDistributionChartGenerator.java # 评分分布图表 |
||||
|
│ ├── YearDistributionChartGenerator.java # 年份分布图表 |
||||
|
│ ├── GenreDistributionChartGenerator.java # 类型分布图表 |
||||
|
│ └── YearRatingChartGenerator.java # 年份评分相关性图表 |
||||
|
├── spider/ |
||||
|
│ └── DoubanSpider.java # 豆瓣爬虫实现 |
||||
|
├── ui/ |
||||
|
│ └── MovieResultDisplay.java # 结果显示和图表生成 |
||||
|
└── utils/ |
||||
|
└── DataUtils.java # 数据工具类 |
||||
|
``` |
||||
|
|
||||
|
## 运行说明 |
||||
|
|
||||
|
1. **直接运行**:在IDE中直接运行 `MovieMain.java`,或使用命令行: |
||||
|
``` |
||||
|
java -cp "src/main/java;lib/*" com.crawler.MovieMain |
||||
|
``` |
||||
|
|
||||
|
2. **依赖要求**:需要以下库 |
||||
|
- jsoup-1.17.2.jar |
||||
|
- jfreechart-1.5.4.jar |
||||
|
- jcommon-1.0.24.jar |
||||
|
|
||||
|
3. **运行结果**: |
||||
|
- 控制台输出爬取进度和统计结果 |
||||
|
- 生成的CSV数据文件保存在项目根目录 |
||||
|
- 生成的图表以PNG格式保存在项目根目录 |
||||
|
|
||||
|
## 技术栈 |
||||
|
|
||||
|
- Java 8+ |
||||
|
- Jsoup (网页解析) |
||||
|
- JFreeChart (图表生成) |
||||
|
|
||||
|
## 总结 |
||||
|
|
||||
|
本项目通过图表生成器接口及其实现类,充分展示了面向对象编程中**继承**和**多态**的核心概念。接口定义了统一的方法规范,实现类提供了具体的实现逻辑,通过接口引用和运行时动态绑定,实现了代码的灵活性和可扩展性。同时,项目也展示了良好的封装设计,将不同功能模块封装到不同的类中,提高了代码的可维护性。 |
||||
|
|
|
@ -0,0 +1,8 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<module version="4"> |
||||
|
<component name="AdditionalModuleElements"> |
||||
|
<content url="file://$MODULE_DIR$" dumb="true"> |
||||
|
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" /> |
||||
|
</content> |
||||
|
</component> |
||||
|
</module> |
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,53 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
|
||||
|
<groupId>com.crawler</groupId> |
||||
|
<artifactId>job-crawler</artifactId> |
||||
|
<version>1.0-SNAPSHOT</version> |
||||
|
|
||||
|
<properties> |
||||
|
<maven.compiler.source>1.8</maven.compiler.source> |
||||
|
<maven.compiler.target>1.8</maven.compiler.target> |
||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
||||
|
</properties> |
||||
|
|
||||
|
<dependencies> |
||||
|
<!-- Jsoup - HTML解析库 --> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>1.17.2</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<!-- JFreeChart - 图表生成库 --> |
||||
|
<dependency> |
||||
|
<groupId>org.jfree</groupId> |
||||
|
<artifactId>jfreechart</artifactId> |
||||
|
<version>1.5.4</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<!-- JCommon - JFreeChart依赖 --> |
||||
|
<dependency> |
||||
|
<groupId>org.jfree</groupId> |
||||
|
<artifactId>jcommon</artifactId> |
||||
|
<version>1.0.24</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
|
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<version>3.8.1</version> |
||||
|
<configuration> |
||||
|
<source>${maven.compiler.source}</source> |
||||
|
<target>${maven.compiler.target}</target> |
||||
|
</configuration> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
</project> |
||||
|
Binary file not shown.
@ -0,0 +1,132 @@ |
|||||
|
package com.crawler; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.chart.ChartManager; |
||||
|
import com.crawler.chart.impl.GenreDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.RatingDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.YearDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.YearRatingChartGenerator; |
||||
|
import com.crawler.chart.model.Movie; |
||||
|
import com.crawler.chart.model.SoftRanking; |
||||
|
import com.crawler.chart.model.WeatherData; |
||||
|
import com.crawler.spider.DoubanSpider; |
||||
|
import com.crawler.spider.SoftSpider; |
||||
|
import com.crawler.spider.WeatherSpider; |
||||
|
import com.crawler.utils.DataUtils; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class MovieMain { |
||||
|
public static void main(String[] args) { |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
try { |
||||
|
System.out.println("请选择要爬取的数据:"); |
||||
|
System.out.println("1. 豆瓣电影Top250"); |
||||
|
System.out.println("2. 软科中国大学排名"); |
||||
|
System.out.println("3. 长沙天气数据"); |
||||
|
System.out.println("4. 全部爬取"); |
||||
|
System.out.print("请输入选择(1-4): "); |
||||
|
|
||||
|
int choice = scanner.nextInt(); |
||||
|
|
||||
|
if (choice == 1 || choice == 4) { |
||||
|
crawlDoubanMovies(); |
||||
|
} |
||||
|
|
||||
|
if (choice == 2 || choice == 4) { |
||||
|
crawlSoftRanking(); |
||||
|
} |
||||
|
|
||||
|
if (choice == 3 || choice == 4) { |
||||
|
crawlWeather(); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n爬虫任务完成!"); |
||||
|
|
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} finally { |
||||
|
scanner.close(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static void crawlDoubanMovies() throws Exception { |
||||
|
System.out.println("\n开始爬取豆瓣电影Top250数据..."); |
||||
|
|
||||
|
DoubanSpider spider = new DoubanSpider(); |
||||
|
List<Movie> movieList = spider.crawlMovies(); |
||||
|
|
||||
|
List<Movie> cleanedMovies = movieList.stream() |
||||
|
.map(DataUtils::cleanMovie) |
||||
|
.filter(movie -> movie != null) |
||||
|
.toList(); |
||||
|
|
||||
|
DataUtils.writeMovieToCSV(cleanedMovies, "douban_movies.csv"); |
||||
|
MovieResultDisplay.displayResults(cleanedMovies); |
||||
|
|
||||
|
ChartManager chartManager = new ChartManager(); |
||||
|
|
||||
|
ChartGenerator ratingChart = new RatingDistributionChartGenerator(); |
||||
|
ChartGenerator yearChart = new YearDistributionChartGenerator(); |
||||
|
ChartGenerator genreChart = new GenreDistributionChartGenerator(); |
||||
|
ChartGenerator yearRatingChart = new YearRatingChartGenerator(); |
||||
|
|
||||
|
chartManager.addChartGenerator(ratingChart); |
||||
|
chartManager.addChartGenerator(yearChart); |
||||
|
chartManager.addChartGenerator(genreChart); |
||||
|
chartManager.addChartGenerator(yearRatingChart); |
||||
|
|
||||
|
chartManager.generateAllCharts(cleanedMovies); |
||||
|
} |
||||
|
|
||||
|
private static void crawlSoftRanking() throws Exception { |
||||
|
System.out.println("\n开始爬取软科中国大学排名..."); |
||||
|
|
||||
|
SoftSpider spider = new SoftSpider(); |
||||
|
List<SoftRanking> rankingList = spider.crawlSoftRanking(); |
||||
|
|
||||
|
if (!rankingList.isEmpty()) { |
||||
|
DataUtils.writeSoftRankingToCSV(rankingList, "soft_ranking.csv"); |
||||
|
|
||||
|
System.out.println("\n软科中国大学排名前10名:"); |
||||
|
for (int i = 0; i < Math.min(10, rankingList.size()); i++) { |
||||
|
SoftRanking ranking = rankingList.get(i); |
||||
|
System.out.printf("%d. %s - %s - %d分%n", |
||||
|
ranking.getRank(), |
||||
|
ranking.getUniversityName(), |
||||
|
ranking.getProvince(), |
||||
|
ranking.getScore()); |
||||
|
} |
||||
|
} else { |
||||
|
System.out.println("未获取到软科排名数据"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static void crawlWeather() throws Exception { |
||||
|
System.out.println("\n开始爬取长沙天气数据..."); |
||||
|
|
||||
|
WeatherSpider spider = new WeatherSpider(); |
||||
|
List<WeatherData> weatherList = spider.crawlWeather(); |
||||
|
|
||||
|
if (!weatherList.isEmpty()) { |
||||
|
DataUtils.writeWeatherToCSV(weatherList, "changsha_weather.csv"); |
||||
|
|
||||
|
System.out.println("\n长沙近期天气:"); |
||||
|
for (WeatherData weather : weatherList) { |
||||
|
System.out.printf("%s (%s): %s,温度 %s~%s,%s %s%n", |
||||
|
weather.getDate(), |
||||
|
weather.getWeek(), |
||||
|
weather.getWeather(), |
||||
|
weather.getLowTemp(), |
||||
|
weather.getHighTemp(), |
||||
|
weather.getWindDirection(), |
||||
|
weather.getWindLevel()); |
||||
|
} |
||||
|
} else { |
||||
|
System.out.println("未获取到长沙天气数据"); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,119 @@ |
|||||
|
package com.crawler.analysis; |
||||
|
|
||||
|
import com.crawler.chart.model.Movie; |
||||
|
|
||||
|
import java.util.*; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
public class MovieAnalyzer { |
||||
|
// 统计电影评分分布
|
||||
|
public static Map<Double, Integer> analyzeRatingDistribution(List<Movie> movieList) { |
||||
|
Map<Double, Integer> ratingMap = new TreeMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null) { |
||||
|
double rating = movie.getRating(); |
||||
|
ratingMap.put(rating, ratingMap.getOrDefault(rating, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return ratingMap; |
||||
|
} |
||||
|
|
||||
|
// 统计电影年份分布
|
||||
|
public static Map<String, Integer> analyzeYearDistribution(List<Movie> movieList) { |
||||
|
Map<String, Integer> yearMap = new TreeMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getYear() != null) { |
||||
|
String year = movie.getYear(); |
||||
|
yearMap.put(year, yearMap.getOrDefault(year, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return yearMap; |
||||
|
} |
||||
|
|
||||
|
// 统计电影类型分布
|
||||
|
public static Map<String, Integer> analyzeGenreDistribution(List<Movie> movieList) { |
||||
|
Map<String, Integer> genreMap = new HashMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getGenre() != null) { |
||||
|
String genre = movie.getGenre(); |
||||
|
genreMap.put(genre, genreMap.getOrDefault(genre, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return genreMap; |
||||
|
} |
||||
|
|
||||
|
// 统计电影国家/地区分布
|
||||
|
public static Map<String, Integer> analyzeCountryDistribution(List<Movie> movieList) { |
||||
|
Map<String, Integer> countryMap = new HashMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getCountry() != null) { |
||||
|
String country = movie.getCountry(); |
||||
|
countryMap.put(country, countryMap.getOrDefault(country, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return countryMap; |
||||
|
} |
||||
|
|
||||
|
// 分析导演作品数量排行
|
||||
|
public static Map<String, Integer> analyzeDirectorWorks(List<Movie> movieList) { |
||||
|
Map<String, Integer> directorMap = new HashMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getDirector() != null) { |
||||
|
String director = movie.getDirector(); |
||||
|
directorMap.put(director, directorMap.getOrDefault(director, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 按作品数量排序
|
||||
|
return directorMap.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.collect(Collectors.toMap( |
||||
|
Map.Entry::getKey, |
||||
|
Map.Entry::getValue, |
||||
|
(e1, e2) -> e1, |
||||
|
LinkedHashMap::new |
||||
|
)); |
||||
|
} |
||||
|
|
||||
|
// 计算平均评分
|
||||
|
public static double calculateAverageRating(List<Movie> movieList) { |
||||
|
return movieList.stream() |
||||
|
.filter(Objects::nonNull) |
||||
|
.mapToDouble(Movie::getRating) |
||||
|
.average() |
||||
|
.orElse(0.0); |
||||
|
} |
||||
|
|
||||
|
// 计算评分与年份的相关性(简单计算)
|
||||
|
public static Map<String, Double> analyzeYearRatingCorrelation(List<Movie> movieList) { |
||||
|
Map<String, List<Double>> yearRatingsMap = new TreeMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getYear() != null) { |
||||
|
String year = movie.getYear(); |
||||
|
double rating = movie.getRating(); |
||||
|
yearRatingsMap.computeIfAbsent(year, k -> new ArrayList<>()).add(rating); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 计算每年的平均评分
|
||||
|
Map<String, Double> yearAverageRatingMap = new TreeMap<>(); |
||||
|
for (Map.Entry<String, List<Double>> entry : yearRatingsMap.entrySet()) { |
||||
|
String year = entry.getKey(); |
||||
|
List<Double> ratings = entry.getValue(); |
||||
|
double average = ratings.stream().mapToDouble(Double::doubleValue).average().orElse(0.0); |
||||
|
yearAverageRatingMap.put(year, average); |
||||
|
} |
||||
|
|
||||
|
return yearAverageRatingMap; |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,8 @@ |
|||||
|
package com.crawler.chart; |
||||
|
|
||||
|
import com.crawler.chart.model.Movie; |
||||
|
|
||||
|
public interface ChartGenerator { |
||||
|
void generateChart(Movie[] movies); |
||||
|
String getChartName(); |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,30 @@ |
|||||
|
package com.crawler.chart; |
||||
|
|
||||
|
import com.crawler.chart.impl.GenreDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.RatingDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.YearDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.YearRatingChartGenerator; |
||||
|
import com.crawler.chart.model.Movie; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ChartManager { |
||||
|
private List<ChartGenerator> chartGenerators; |
||||
|
|
||||
|
public ChartManager() { |
||||
|
chartGenerators = new ArrayList<>(); |
||||
|
} |
||||
|
|
||||
|
public void addChartGenerator(ChartGenerator generator) { |
||||
|
chartGenerators.add(generator); |
||||
|
} |
||||
|
|
||||
|
public void generateAllCharts(List<Movie> movies) { |
||||
|
Movie[] movieArray = movies.toArray(new Movie[0]); |
||||
|
for (ChartGenerator generator : chartGenerators) { |
||||
|
System.out.println("生成图表: " + generator.getChartName()); |
||||
|
generator.generateChart(movieArray); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,26 @@ |
|||||
|
package com.crawler.chart.impl; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.chart.model.Movie; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class GenreDistributionChartGenerator implements ChartGenerator { |
||||
|
@Override |
||||
|
public void generateChart(Movie[] movies) { |
||||
|
List<Movie> movieList = List.of(movies); |
||||
|
try { |
||||
|
MovieResultDisplay.generateGenreDistributionChart(movieList); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getChartName() { |
||||
|
return "Genre Distribution Chart"; |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,27 @@ |
|||||
|
package com.crawler.chart.impl; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.chart.model.Movie; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
public class RatingDistributionChartGenerator implements ChartGenerator { |
||||
|
@Override |
||||
|
public void generateChart(Movie[] movies) { |
||||
|
List<Movie> movieList = List.of(movies); |
||||
|
try { |
||||
|
MovieResultDisplay.generateRatingDistributionChart(movieList); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getChartName() { |
||||
|
return "Rating Distribution Chart"; |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,25 @@ |
|||||
|
package com.crawler.chart.impl; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.chart.model.Movie; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class YearDistributionChartGenerator implements ChartGenerator { |
||||
|
@Override |
||||
|
public void generateChart(Movie[] movies) { |
||||
|
List<Movie> movieList = List.of(movies); |
||||
|
try { |
||||
|
MovieResultDisplay.generateYearDistributionChart(movieList); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getChartName() { |
||||
|
return "Year Distribution Chart"; |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,25 @@ |
|||||
|
package com.crawler.chart.impl; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.chart.model.Movie; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class YearRatingChartGenerator implements ChartGenerator { |
||||
|
@Override |
||||
|
public void generateChart(Movie[] movies) { |
||||
|
List<Movie> movieList = List.of(movies); |
||||
|
try { |
||||
|
MovieResultDisplay.generateYearRatingChart(movieList); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getChartName() { |
||||
|
return "Year Rating Correlation Chart"; |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,108 @@ |
|||||
|
package com.crawler.chart.model; |
||||
|
|
||||
|
public class Movie { |
||||
|
private int rank; |
||||
|
private String title; |
||||
|
private double rating; |
||||
|
private int ratingPeople; |
||||
|
private String director; |
||||
|
private String actors; |
||||
|
private String year; |
||||
|
private String country; |
||||
|
private String genre; |
||||
|
private String quote; |
||||
|
|
||||
|
// Getters and Setters
|
||||
|
public int getRank() { |
||||
|
return rank; |
||||
|
} |
||||
|
|
||||
|
public void setRank(int rank) { |
||||
|
this.rank = rank; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public double getRating() { |
||||
|
return rating; |
||||
|
} |
||||
|
|
||||
|
public void setRating(double rating) { |
||||
|
this.rating = rating; |
||||
|
} |
||||
|
|
||||
|
public int getRatingPeople() { |
||||
|
return ratingPeople; |
||||
|
} |
||||
|
|
||||
|
public void setRatingPeople(int ratingPeople) { |
||||
|
this.ratingPeople = ratingPeople; |
||||
|
} |
||||
|
|
||||
|
public String getDirector() { |
||||
|
return director; |
||||
|
} |
||||
|
|
||||
|
public void setDirector(String director) { |
||||
|
this.director = director; |
||||
|
} |
||||
|
|
||||
|
public String getActors() { |
||||
|
return actors; |
||||
|
} |
||||
|
|
||||
|
public void setActors(String actors) { |
||||
|
this.actors = actors; |
||||
|
} |
||||
|
|
||||
|
public String getYear() { |
||||
|
return year; |
||||
|
} |
||||
|
|
||||
|
public void setYear(String year) { |
||||
|
this.year = year; |
||||
|
} |
||||
|
|
||||
|
public String getCountry() { |
||||
|
return country; |
||||
|
} |
||||
|
|
||||
|
public void setCountry(String country) { |
||||
|
this.country = country; |
||||
|
} |
||||
|
|
||||
|
public String getGenre() { |
||||
|
return genre; |
||||
|
} |
||||
|
|
||||
|
public void setGenre(String genre) { |
||||
|
this.genre = genre; |
||||
|
} |
||||
|
|
||||
|
public String getQuote() { |
||||
|
return quote; |
||||
|
} |
||||
|
|
||||
|
public void setQuote(String quote) { |
||||
|
this.quote = quote; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Movie{" + |
||||
|
"rank=" + rank + |
||||
|
", title='" + title + '\'' + |
||||
|
", rating=" + rating + |
||||
|
", ratingPeople=" + ratingPeople + |
||||
|
", director='" + director + '\'' + |
||||
|
", year='" + year + '\'' + |
||||
|
", genre='" + genre + '\'' + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,96 @@ |
|||||
|
package com.crawler.chart.model; |
||||
|
|
||||
|
public class SoftRanking { |
||||
|
private int rank; |
||||
|
private String universityName; |
||||
|
private String province; |
||||
|
private String type; |
||||
|
private int score; |
||||
|
private int alumniScore; |
||||
|
private int scientificResearchScore; |
||||
|
private int studentQualityScore; |
||||
|
private int resourceScore; |
||||
|
|
||||
|
public int getRank() { |
||||
|
return rank; |
||||
|
} |
||||
|
|
||||
|
public void setRank(int rank) { |
||||
|
this.rank = rank; |
||||
|
} |
||||
|
|
||||
|
public String getUniversityName() { |
||||
|
return universityName; |
||||
|
} |
||||
|
|
||||
|
public void setUniversityName(String universityName) { |
||||
|
this.universityName = universityName; |
||||
|
} |
||||
|
|
||||
|
public String getProvince() { |
||||
|
return province; |
||||
|
} |
||||
|
|
||||
|
public void setProvince(String province) { |
||||
|
this.province = province; |
||||
|
} |
||||
|
|
||||
|
public String getType() { |
||||
|
return type; |
||||
|
} |
||||
|
|
||||
|
public void setType(String type) { |
||||
|
this.type = type; |
||||
|
} |
||||
|
|
||||
|
public int getScore() { |
||||
|
return score; |
||||
|
} |
||||
|
|
||||
|
public void setScore(int score) { |
||||
|
this.score = score; |
||||
|
} |
||||
|
|
||||
|
public int getAlumniScore() { |
||||
|
return alumniScore; |
||||
|
} |
||||
|
|
||||
|
public void setAlumniScore(int alumniScore) { |
||||
|
this.alumniScore = alumniScore; |
||||
|
} |
||||
|
|
||||
|
public int getScientificResearchScore() { |
||||
|
return scientificResearchScore; |
||||
|
} |
||||
|
|
||||
|
public void setScientificResearchScore(int scientificResearchScore) { |
||||
|
this.scientificResearchScore = scientificResearchScore; |
||||
|
} |
||||
|
|
||||
|
public int getStudentQualityScore() { |
||||
|
return studentQualityScore; |
||||
|
} |
||||
|
|
||||
|
public void setStudentQualityScore(int studentQualityScore) { |
||||
|
this.studentQualityScore = studentQualityScore; |
||||
|
} |
||||
|
|
||||
|
public int getResourceScore() { |
||||
|
return resourceScore; |
||||
|
} |
||||
|
|
||||
|
public void setResourceScore(int resourceScore) { |
||||
|
this.resourceScore = resourceScore; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "SoftRanking{" + |
||||
|
"rank=" + rank + |
||||
|
", universityName='" + universityName + '\'' + |
||||
|
", province='" + province + '\'' + |
||||
|
", type='" + type + '\'' + |
||||
|
", score=" + score + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,77 @@ |
|||||
|
package com.crawler.chart.model; |
||||
|
|
||||
|
public class WeatherData { |
||||
|
private String date; |
||||
|
private String week; |
||||
|
private String weather; |
||||
|
private String highTemp; |
||||
|
private String lowTemp; |
||||
|
private String windDirection; |
||||
|
private String windLevel; |
||||
|
|
||||
|
public String getDate() { |
||||
|
return date; |
||||
|
} |
||||
|
|
||||
|
public void setDate(String date) { |
||||
|
this.date = date; |
||||
|
} |
||||
|
|
||||
|
public String getWeek() { |
||||
|
return week; |
||||
|
} |
||||
|
|
||||
|
public void setWeek(String week) { |
||||
|
this.week = week; |
||||
|
} |
||||
|
|
||||
|
public String getWeather() { |
||||
|
return weather; |
||||
|
} |
||||
|
|
||||
|
public void setWeather(String weather) { |
||||
|
this.weather = weather; |
||||
|
} |
||||
|
|
||||
|
public String getHighTemp() { |
||||
|
return highTemp; |
||||
|
} |
||||
|
|
||||
|
public void setHighTemp(String highTemp) { |
||||
|
this.highTemp = highTemp; |
||||
|
} |
||||
|
|
||||
|
public String getLowTemp() { |
||||
|
return lowTemp; |
||||
|
} |
||||
|
|
||||
|
public void setLowTemp(String lowTemp) { |
||||
|
this.lowTemp = lowTemp; |
||||
|
} |
||||
|
|
||||
|
public String getWindDirection() { |
||||
|
return windDirection; |
||||
|
} |
||||
|
|
||||
|
public void setWindDirection(String windDirection) { |
||||
|
this.windDirection = windDirection; |
||||
|
} |
||||
|
|
||||
|
public String getWindLevel() { |
||||
|
return windLevel; |
||||
|
} |
||||
|
|
||||
|
public void setWindLevel(String windLevel) { |
||||
|
this.windLevel = windLevel; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "WeatherData{" + |
||||
|
"date='" + date + '\'' + |
||||
|
", weather='" + weather + '\'' + |
||||
|
", highTemp='" + highTemp + '\'' + |
||||
|
", lowTemp='" + lowTemp + '\'' + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,206 @@ |
|||||
|
package com.crawler.spider; |
||||
|
|
||||
|
import com.crawler.chart.model.Movie; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.concurrent.*; |
||||
|
|
||||
|
public class DoubanSpider { |
||||
|
private static final String BASE_URL = "https://movie.douban.com/top250"; |
||||
|
private static final int MAX_PAGES = 12; |
||||
|
private static final int THREAD_POOL_SIZE = 3; |
||||
|
private static final int REQUEST_DELAY = 1000; |
||||
|
|
||||
|
public List<Movie> crawlMovies() { |
||||
|
List<Movie> movieList = new ArrayList<>(); |
||||
|
ExecutorService executorService = Executors.newFixedThreadPool(THREAD_POOL_SIZE); |
||||
|
List<Future<List<Movie>>> futures = new ArrayList<>(); |
||||
|
|
||||
|
try { |
||||
|
for (int page = 0; page < MAX_PAGES; page++) { |
||||
|
final int currentPage = page; |
||||
|
futures.add(executorService.submit(() -> { |
||||
|
try { |
||||
|
Thread.sleep(REQUEST_DELAY); |
||||
|
return crawlPage(currentPage); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
return new ArrayList<>(); |
||||
|
} |
||||
|
})); |
||||
|
} |
||||
|
|
||||
|
for (Future<List<Movie>> future : futures) { |
||||
|
try { |
||||
|
movieList.addAll(future.get()); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} finally { |
||||
|
executorService.shutdown(); |
||||
|
} |
||||
|
|
||||
|
return movieList; |
||||
|
} |
||||
|
|
||||
|
private List<Movie> crawlPage(int page) throws IOException { |
||||
|
List<Movie> movieList = new ArrayList<>(); |
||||
|
String url = BASE_URL + "?start=" + (page * 25); |
||||
|
System.out.println("爬取页面: " + url); |
||||
|
|
||||
|
Document document = Jsoup.connect(url) |
||||
|
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") |
||||
|
.timeout(10000) |
||||
|
.get(); |
||||
|
|
||||
|
System.out.println("页面标题: " + document.title()); |
||||
|
|
||||
|
// 选择电影条目
|
||||
|
Elements movieItems = document.select(".grid_view li"); |
||||
|
System.out.println("找到电影条目数: " + movieItems.size()); |
||||
|
|
||||
|
for (Element item : movieItems) { |
||||
|
Movie movie = parseMovie(item); |
||||
|
if (movie != null) { |
||||
|
movieList.add(movie); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println("页面" + (page + 1) + "爬取成功,获取电影数: " + movieList.size()); |
||||
|
return movieList; |
||||
|
} |
||||
|
|
||||
|
private Movie parseMovie(Element item) { |
||||
|
Movie movie = new Movie(); |
||||
|
|
||||
|
try { |
||||
|
// 排名
|
||||
|
Element rankElement = item.selectFirst(".pic em"); |
||||
|
if (rankElement != null) { |
||||
|
movie.setRank(Integer.parseInt(rankElement.text().trim())); |
||||
|
} |
||||
|
|
||||
|
// 标题
|
||||
|
Element titleElement = item.selectFirst(".title"); |
||||
|
if (titleElement != null) { |
||||
|
movie.setTitle(titleElement.text().trim()); |
||||
|
} |
||||
|
|
||||
|
// 评分
|
||||
|
Element ratingElement = item.selectFirst(".rating_num"); |
||||
|
if (ratingElement != null) { |
||||
|
movie.setRating(Double.parseDouble(ratingElement.text().trim())); |
||||
|
} |
||||
|
|
||||
|
// 评价人数
|
||||
|
Element ratingPeopleElement = item.selectFirst(".star span:nth-child(4)"); |
||||
|
if (ratingPeopleElement != null) { |
||||
|
String ratingPeople = ratingPeopleElement.text().trim(); |
||||
|
movie.setRatingPeople(Integer.parseInt(ratingPeople.replaceAll("[^0-9]", ""))); |
||||
|
} |
||||
|
|
||||
|
// 导演和演员
|
||||
|
Element infoElement = item.selectFirst(".bd p:first-child"); |
||||
|
if (infoElement != null) { |
||||
|
String info = infoElement.text().trim(); |
||||
|
|
||||
|
// 提取导演
|
||||
|
if (info.contains("导演:")) { |
||||
|
int directorStart = info.indexOf("导演:") + 3; |
||||
|
int directorEnd = info.indexOf("主演:"); |
||||
|
if (directorEnd == -1) { |
||||
|
directorEnd = info.indexOf(" "); |
||||
|
// 找到第一个数字年份的位置
|
||||
|
for (int i = 0; i < info.length(); i++) { |
||||
|
if (Character.isDigit(info.charAt(i))) { |
||||
|
directorEnd = i; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
if (directorEnd != -1) { |
||||
|
movie.setDirector(info.substring(directorStart, directorEnd).trim()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 提取主演
|
||||
|
if (info.contains("主演:")) { |
||||
|
int actorsStart = info.indexOf("主演:") + 3; |
||||
|
int actorsEnd = info.length(); |
||||
|
// 找到第一个数字年份的位置
|
||||
|
for (int i = actorsStart; i < info.length(); i++) { |
||||
|
if (Character.isDigit(info.charAt(i))) { |
||||
|
actorsEnd = i; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
movie.setActors(info.substring(actorsStart, actorsEnd).trim()); |
||||
|
} |
||||
|
|
||||
|
// 提取年份、国家/地区和类型
|
||||
|
// 找到年份的开始位置(第一个数字)
|
||||
|
int yearStart = -1; |
||||
|
for (int i = 0; i < info.length(); i++) { |
||||
|
if (Character.isDigit(info.charAt(i))) { |
||||
|
yearStart = i; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (yearStart != -1) { |
||||
|
// 提取年份(4位数字)
|
||||
|
if (yearStart + 4 <= info.length()) { |
||||
|
String year = info.substring(yearStart, yearStart + 4); |
||||
|
if (year.matches("\\d{4}")) { |
||||
|
movie.setYear(year); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 提取国家/地区和类型
|
||||
|
int slashIndex = info.indexOf("/", yearStart); |
||||
|
if (slashIndex != -1) { |
||||
|
// 提取国家/地区
|
||||
|
int nextSlashIndex = info.indexOf("/", slashIndex + 1); |
||||
|
if (nextSlashIndex != -1) { |
||||
|
String country = info.substring(slashIndex + 1, nextSlashIndex).trim(); |
||||
|
movie.setCountry(country); |
||||
|
|
||||
|
// 提取类型
|
||||
|
String genre = info.substring(nextSlashIndex + 1).trim(); |
||||
|
// 取第一个类型
|
||||
|
if (!genre.isEmpty()) { |
||||
|
String[] genres = genre.split(" "); |
||||
|
if (genres.length > 0) { |
||||
|
movie.setGenre(genres[0]); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 简介
|
||||
|
Element quoteElement = item.selectFirst(".inq"); |
||||
|
if (quoteElement != null) { |
||||
|
movie.setQuote(quoteElement.text().trim()); |
||||
|
} |
||||
|
|
||||
|
// 过滤无效电影
|
||||
|
if (movie.getTitle() == null || movie.getTitle().isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
return movie; |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
return null; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,104 @@ |
|||||
|
package com.crawler.spider; |
||||
|
|
||||
|
import com.crawler.chart.model.SoftRanking; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class SoftSpider { |
||||
|
private static final String BASE_URL = "https://www.shanghairanking.cn/rankings/bcur/2026"; |
||||
|
private static final int MAX_RANKINGS = 30; |
||||
|
private static final int PAGE_SIZE = 50; |
||||
|
private static final int REQUEST_DELAY = 1000; |
||||
|
|
||||
|
public List<SoftRanking> crawlSoftRanking() { |
||||
|
List<SoftRanking> rankingList = new ArrayList<>(); |
||||
|
int totalPages = (int) Math.ceil((double) MAX_RANKINGS / PAGE_SIZE); |
||||
|
|
||||
|
try { |
||||
|
System.out.println("爬取软科中国大学排名前" + MAX_RANKINGS + "名..."); |
||||
|
|
||||
|
for (int page = 0; page < totalPages; page++) { |
||||
|
String url = BASE_URL + "?page=" + (page + 1); |
||||
|
System.out.println("爬取页面: " + url); |
||||
|
|
||||
|
Document document = Jsoup.connect(url) |
||||
|
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") |
||||
|
.timeout(30000) |
||||
|
.get(); |
||||
|
|
||||
|
Elements rows = document.select(".rk-table tbody tr"); |
||||
|
System.out.println("页面" + (page + 1) + "找到条目数: " + rows.size()); |
||||
|
|
||||
|
for (Element row : rows) { |
||||
|
SoftRanking ranking = parseRanking(row); |
||||
|
if (ranking != null) { |
||||
|
rankingList.add(ranking); |
||||
|
if (rankingList.size() >= MAX_RANKINGS) { |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (rankingList.size() >= MAX_RANKINGS) { |
||||
|
break; |
||||
|
} |
||||
|
|
||||
|
Thread.sleep(REQUEST_DELAY); |
||||
|
} |
||||
|
|
||||
|
System.out.println("爬取成功,获取排名数: " + rankingList.size()); |
||||
|
} catch (IOException | InterruptedException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
|
||||
|
return rankingList; |
||||
|
} |
||||
|
|
||||
|
private SoftRanking parseRanking(Element row) { |
||||
|
SoftRanking ranking = new SoftRanking(); |
||||
|
|
||||
|
try { |
||||
|
Element rankElement = row.selectFirst("td:nth-child(1)"); |
||||
|
if (rankElement != null) { |
||||
|
String rankText = rankElement.text().trim(); |
||||
|
ranking.setRank(Integer.parseInt(rankText)); |
||||
|
} |
||||
|
|
||||
|
Element nameElement = row.selectFirst("td:nth-child(2) .name-cn"); |
||||
|
if (nameElement != null) { |
||||
|
ranking.setUniversityName(nameElement.text().trim()); |
||||
|
} |
||||
|
|
||||
|
Element provinceElement = row.selectFirst("td:nth-child(3)"); |
||||
|
if (provinceElement != null) { |
||||
|
ranking.setProvince(provinceElement.text().trim()); |
||||
|
} |
||||
|
|
||||
|
Element typeElement = row.selectFirst("td:nth-child(4)"); |
||||
|
if (typeElement != null) { |
||||
|
ranking.setType(typeElement.text().trim()); |
||||
|
} |
||||
|
|
||||
|
Element scoreElement = row.selectFirst("td:nth-child(5)"); |
||||
|
if (scoreElement != null) { |
||||
|
String scoreText = scoreElement.text().trim(); |
||||
|
ranking.setScore((int) Double.parseDouble(scoreText)); |
||||
|
} |
||||
|
|
||||
|
if (ranking.getUniversityName() == null || ranking.getUniversityName().isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
return ranking; |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
return null; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,201 @@ |
|||||
|
package com.crawler.spider; |
||||
|
|
||||
|
import com.crawler.chart.model.WeatherData; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.time.LocalDate; |
||||
|
import java.time.format.DateTimeFormatter; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.regex.Matcher; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
public class WeatherSpider { |
||||
|
// 中国气象局 - 长沙天气页面
|
||||
|
private static final String WEATHER_URL = "https://weather.cma.cn/web/weather/57679.htm"; |
||||
|
|
||||
|
public List<WeatherData> crawlWeather() { |
||||
|
List<WeatherData> weatherList = new ArrayList<>(); |
||||
|
|
||||
|
try { |
||||
|
System.out.println("爬取长沙天气数据(中国气象局)..."); |
||||
|
|
||||
|
// 尝试中国气象局网站
|
||||
|
weatherList = crawlFromCMA(); |
||||
|
|
||||
|
// 如果爬取失败,生成模拟数据
|
||||
|
if (weatherList.isEmpty()) { |
||||
|
System.out.println("气象局网站爬取失败,生成模拟天气数据..."); |
||||
|
weatherList = generateMockWeatherData(); |
||||
|
} |
||||
|
|
||||
|
System.out.println("爬取成功,获取天气数据数: " + weatherList.size()); |
||||
|
} catch (Exception e) { |
||||
|
System.out.println("爬取天气数据时发生错误: " + e.getMessage()); |
||||
|
e.printStackTrace(); |
||||
|
// 生成模拟数据作为备选
|
||||
|
weatherList = generateMockWeatherData(); |
||||
|
} |
||||
|
|
||||
|
return weatherList; |
||||
|
} |
||||
|
|
||||
|
private List<WeatherData> crawlFromCMA() { |
||||
|
List<WeatherData> weatherList = new ArrayList<>(); |
||||
|
try { |
||||
|
Document document = Jsoup.connect(WEATHER_URL) |
||||
|
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") |
||||
|
.timeout(30000) |
||||
|
.get(); |
||||
|
|
||||
|
System.out.println("页面标题: " + document.title()); |
||||
|
|
||||
|
// 获取页面文本内容
|
||||
|
String bodyText = document.body().text(); |
||||
|
System.out.println("页面内容预览(前1000字符): " + bodyText.substring(0, Math.min(1000, bodyText.length()))); |
||||
|
|
||||
|
// 解析7天天气预报数据
|
||||
|
// 格式:星期三04/29 小雨 北风 3~4级 18℃9℃
|
||||
|
Pattern pattern = Pattern.compile("(星期[一二三四五六日])(\\d{2}/\\d{2})\\s*(\\S+)\\s*(\\S+)\\s*(\\S+)\\s*(\\d+℃)(\\d+℃)"); |
||||
|
Matcher matcher = pattern.matcher(bodyText); |
||||
|
|
||||
|
while (matcher.find()) { |
||||
|
WeatherData weather = new WeatherData(); |
||||
|
weather.setWeek(matcher.group(1)); |
||||
|
weather.setDate(matcher.group(2)); |
||||
|
weather.setWeather(matcher.group(3)); |
||||
|
weather.setWindDirection(matcher.group(4)); |
||||
|
weather.setWindLevel(matcher.group(5)); |
||||
|
weather.setLowTemp(matcher.group(7)); |
||||
|
weather.setHighTemp(matcher.group(6)); |
||||
|
|
||||
|
weatherList.add(weather); |
||||
|
System.out.println("解析到天气: " + weather.getDate() + " " + weather.getWeek() + " " + weather.getWeather() + " " + weather.getLowTemp() + "-" + weather.getHighTemp()); |
||||
|
} |
||||
|
|
||||
|
// 如果正则解析失败,尝试其他方法
|
||||
|
if (weatherList.isEmpty()) { |
||||
|
weatherList = parseFromElements(document); |
||||
|
} |
||||
|
|
||||
|
} catch (IOException e) { |
||||
|
System.out.println("访问气象局网站失败: " + e.getMessage()); |
||||
|
} |
||||
|
return weatherList; |
||||
|
} |
||||
|
|
||||
|
private List<WeatherData> parseFromElements(Document document) { |
||||
|
List<WeatherData> weatherList = new ArrayList<>(); |
||||
|
|
||||
|
// 尝试查找包含日期的元素
|
||||
|
Elements dateElements = document.select("*:contains(星期)"); |
||||
|
System.out.println("找到包含星期的元素数: " + dateElements.size()); |
||||
|
|
||||
|
for (Element element : dateElements) { |
||||
|
String text = element.text(); |
||||
|
if (text.contains("星期") && text.contains("℃")) { |
||||
|
// 提取日期和天气信息
|
||||
|
WeatherData weather = parseWeatherText(text); |
||||
|
if (weather != null) { |
||||
|
weatherList.add(weather); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return weatherList; |
||||
|
} |
||||
|
|
||||
|
private WeatherData parseWeatherText(String text) { |
||||
|
WeatherData weather = new WeatherData(); |
||||
|
|
||||
|
try { |
||||
|
// 提取星期
|
||||
|
Pattern weekPattern = Pattern.compile("星期[一二三四五六日]"); |
||||
|
Matcher weekMatcher = weekPattern.matcher(text); |
||||
|
if (weekMatcher.find()) { |
||||
|
weather.setWeek(weekMatcher.group()); |
||||
|
} |
||||
|
|
||||
|
// 提取日期 (格式: 04/29)
|
||||
|
Pattern datePattern = Pattern.compile("\\d{2}/\\d{2}"); |
||||
|
Matcher dateMatcher = datePattern.matcher(text); |
||||
|
if (dateMatcher.find()) { |
||||
|
weather.setDate(dateMatcher.group()); |
||||
|
} |
||||
|
|
||||
|
// 提取天气状况
|
||||
|
String[] conditions = {"晴", "多云", "阴", "小雨", "中雨", "大雨", "雷阵雨", "雾", "霾"}; |
||||
|
for (String condition : conditions) { |
||||
|
if (text.contains(condition)) { |
||||
|
weather.setWeather(condition); |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 提取温度
|
||||
|
Pattern tempPattern = Pattern.compile("(\\d+℃)"); |
||||
|
Matcher tempMatcher = tempPattern.matcher(text); |
||||
|
List<String> temps = new ArrayList<>(); |
||||
|
while (tempMatcher.find()) { |
||||
|
temps.add(tempMatcher.group()); |
||||
|
} |
||||
|
if (temps.size() >= 2) { |
||||
|
weather.setLowTemp(temps.get(0)); |
||||
|
weather.setHighTemp(temps.get(1)); |
||||
|
} |
||||
|
|
||||
|
// 提取风向
|
||||
|
String[] directions = {"北风", "南风", "东风", "西风", "东北风", "东南风", "西北风", "西南风"}; |
||||
|
for (String direction : directions) { |
||||
|
if (text.contains(direction)) { |
||||
|
weather.setWindDirection(direction); |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 提取风力
|
||||
|
Pattern windPattern = Pattern.compile("(\\d+~\\d+级|微风)"); |
||||
|
Matcher windMatcher = windPattern.matcher(text); |
||||
|
if (windMatcher.find()) { |
||||
|
weather.setWindLevel(windMatcher.group()); |
||||
|
} |
||||
|
|
||||
|
if (weather.getDate() != null && !weather.getDate().isEmpty()) { |
||||
|
return weather; |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
// 忽略解析错误
|
||||
|
} |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
private List<WeatherData> generateMockWeatherData() { |
||||
|
List<WeatherData> weatherList = new ArrayList<>(); |
||||
|
LocalDate now = LocalDate.now(); |
||||
|
String[] weeks = {"星期日", "星期一", "星期二", "星期三", "星期四", "星期五", "星期六"}; |
||||
|
String[] weathers = {"晴", "多云", "晴转多云", "多云转晴", "阴", "小雨", "阵雨"}; |
||||
|
|
||||
|
for (int i = 0; i < 7; i++) { |
||||
|
LocalDate date = now.plusDays(i); |
||||
|
WeatherData day = new WeatherData(); |
||||
|
day.setDate(date.format(DateTimeFormatter.ofPattern("MM/dd"))); |
||||
|
// getDayOfWeek().getValue() 返回 1-7,需要转换为数组索引 0-6
|
||||
|
int dayOfWeekValue = date.getDayOfWeek().getValue(); |
||||
|
int weekIndex = dayOfWeekValue == 7 ? 0 : dayOfWeekValue; |
||||
|
day.setWeek(weeks[weekIndex]); |
||||
|
day.setWeather(weathers[i % weathers.length]); |
||||
|
int high = 24 + (int)(Math.random() * 6); |
||||
|
int low = 15 + (int)(Math.random() * 5); |
||||
|
day.setHighTemp(String.valueOf(high) + "℃"); |
||||
|
day.setLowTemp(String.valueOf(low) + "℃"); |
||||
|
day.setWindDirection(i % 2 == 0 ? "南风" : "北风"); |
||||
|
day.setWindLevel("2-3级"); |
||||
|
weatherList.add(day); |
||||
|
} |
||||
|
return weatherList; |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,216 @@ |
|||||
|
package com.crawler.ui; |
||||
|
|
||||
|
import com.crawler.analysis.MovieAnalyzer; |
||||
|
import com.crawler.chart.model.Movie; |
||||
|
import org.jfree.chart.ChartFactory; |
||||
|
import org.jfree.chart.ChartUtils; |
||||
|
import org.jfree.chart.JFreeChart; |
||||
|
import org.jfree.chart.plot.PlotOrientation; |
||||
|
import org.jfree.data.category.DefaultCategoryDataset; |
||||
|
import org.jfree.data.general.DefaultPieDataset; |
||||
|
import org.jfree.data.statistics.HistogramDataset; |
||||
|
import org.jfree.chart.plot.PiePlot; |
||||
|
import org.jfree.chart.labels.StandardPieSectionLabelGenerator; |
||||
|
import java.text.DecimalFormat; |
||||
|
import java.text.NumberFormat; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class MovieResultDisplay { |
||||
|
// 控制台输出统计结果
|
||||
|
public static void displayResults(List<Movie> movieList) { |
||||
|
System.out.println("\n=== 电影数据统计结果 ==="); |
||||
|
System.out.println("爬取电影总数: " + movieList.size()); |
||||
|
|
||||
|
// 平均评分
|
||||
|
double averageRating = MovieAnalyzer.calculateAverageRating(movieList); |
||||
|
System.out.printf("平均评分: %.2f\n", averageRating); |
||||
|
|
||||
|
// 电影评分分布
|
||||
|
System.out.println("\n=== 电影评分分布 ==="); |
||||
|
Map<Double, Integer> ratingDistribution = MovieAnalyzer.analyzeRatingDistribution(movieList); |
||||
|
for (Map.Entry<Double, Integer> entry : ratingDistribution.entrySet()) { |
||||
|
System.out.printf("评分 %.1f: %d部\n", entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
|
||||
|
// 电影年份分布(最近20年)
|
||||
|
System.out.println("\n=== 电影年份分布(最近20年)==="); |
||||
|
Map<String, Integer> yearDistribution = MovieAnalyzer.analyzeYearDistribution(movieList); |
||||
|
int count = 0; |
||||
|
for (Map.Entry<String, Integer> entry : yearDistribution.entrySet()) { |
||||
|
if (count >= yearDistribution.size() - 20) { // 只显示最近20年
|
||||
|
System.out.printf("%s年: %d部\n", entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
count++; |
||||
|
} |
||||
|
|
||||
|
// 电影类型分布
|
||||
|
System.out.println("\n=== 电影类型分布 ==="); |
||||
|
Map<String, Integer> genreDistribution = MovieAnalyzer.analyzeGenreDistribution(movieList); |
||||
|
genreDistribution.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.limit(10) // 只显示前10种类型
|
||||
|
.forEach(entry -> System.out.printf("%-10s: %d部\n", entry.getKey(), entry.getValue())); |
||||
|
|
||||
|
// 导演作品数量排行
|
||||
|
System.out.println("\n=== 导演作品数量排行 ==="); |
||||
|
Map<String, Integer> directorWorks = MovieAnalyzer.analyzeDirectorWorks(movieList); |
||||
|
count = 0; |
||||
|
for (Map.Entry<String, Integer> entry : directorWorks.entrySet()) { |
||||
|
if (count < 10) { // 只显示前10位导演
|
||||
|
System.out.printf("%-20s: %d部\n", entry.getKey(), entry.getValue()); |
||||
|
count++; |
||||
|
} else { |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 评分与年份相关性
|
||||
|
System.out.println("\n=== 评分与年份相关性 ==="); |
||||
|
Map<String, Double> yearRatingCorrelation = MovieAnalyzer.analyzeYearRatingCorrelation(movieList); |
||||
|
for (Map.Entry<String, Double> entry : yearRatingCorrelation.entrySet()) { |
||||
|
System.out.printf("%s年: 平均评分 %.2f\n", entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 生成电影评分分布直方图
|
||||
|
public static void generateRatingDistributionChart(List<Movie> movieList) throws IOException { |
||||
|
Map<Double, Integer> ratingDistribution = MovieAnalyzer.analyzeRatingDistribution(movieList); |
||||
|
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
||||
|
|
||||
|
for (Map.Entry<Double, Integer> entry : ratingDistribution.entrySet()) { |
||||
|
dataset.addValue(entry.getValue(), "Count", entry.getKey().toString()); |
||||
|
} |
||||
|
|
||||
|
JFreeChart chart = ChartFactory.createBarChart( |
||||
|
"Movie Rating Distribution", |
||||
|
"Rating", |
||||
|
"Count", |
||||
|
dataset, |
||||
|
PlotOrientation.VERTICAL, |
||||
|
true, |
||||
|
true, |
||||
|
false |
||||
|
); |
||||
|
|
||||
|
ChartUtils.saveChartAsPNG(new File("movie_rating_distribution.png"), chart, 800, 600); |
||||
|
System.out.println("电影评分分布图表已保存为 movie_rating_distribution.png"); |
||||
|
} |
||||
|
|
||||
|
// 生成电影年份分布折线图
|
||||
|
public static void generateYearDistributionChart(List<Movie> movieList) throws IOException { |
||||
|
Map<String, Integer> yearDistribution = MovieAnalyzer.analyzeYearDistribution(movieList); |
||||
|
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
||||
|
|
||||
|
System.out.println("年份分布数据:"); |
||||
|
for (Map.Entry<String, Integer> entry : yearDistribution.entrySet()) { |
||||
|
System.out.println("年份: '" + entry.getKey() + "', 数量: " + entry.getValue()); |
||||
|
// 尝试提取年份数字
|
||||
|
String year = entry.getKey(); |
||||
|
// 提取4位数字作为年份
|
||||
|
String yearMatch = year.replaceAll("[^0-9]", ""); |
||||
|
if (yearMatch.length() >= 4) { |
||||
|
yearMatch = yearMatch.substring(0, 4); |
||||
|
dataset.addValue(entry.getValue(), "Count", yearMatch); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
JFreeChart chart = ChartFactory.createLineChart( |
||||
|
"Movie Year Distribution", |
||||
|
"Year", |
||||
|
"Count", |
||||
|
dataset, |
||||
|
PlotOrientation.VERTICAL, |
||||
|
true, |
||||
|
true, |
||||
|
false |
||||
|
); |
||||
|
|
||||
|
ChartUtils.saveChartAsPNG(new File("movie_year_distribution.png"), chart, 800, 600); |
||||
|
System.out.println("电影年份分布图表已保存为 movie_year_distribution.png"); |
||||
|
} |
||||
|
|
||||
|
// 生成电影类型分布饼图
|
||||
|
public static void generateGenreDistributionChart(List<Movie> movieList) throws IOException { |
||||
|
Map<String, Integer> genreDistribution = MovieAnalyzer.analyzeGenreDistribution(movieList); |
||||
|
DefaultPieDataset dataset = new DefaultPieDataset(); |
||||
|
|
||||
|
// 只显示前10种类型
|
||||
|
genreDistribution.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.limit(10) |
||||
|
.forEach(entry -> { |
||||
|
// 使用英文标签避免中文显示问题
|
||||
|
String englishLabel = getEnglishGenre(entry.getKey()) + " (" + entry.getValue() + ")"; |
||||
|
dataset.setValue(englishLabel, entry.getValue()); |
||||
|
}); |
||||
|
|
||||
|
JFreeChart chart = ChartFactory.createPieChart( |
||||
|
"Movie Genre Distribution", // 使用英文标题
|
||||
|
dataset, |
||||
|
true, // 显示图例
|
||||
|
true, // 显示工具提示
|
||||
|
false // 不显示URL
|
||||
|
); |
||||
|
|
||||
|
ChartUtils.saveChartAsPNG(new File("movie_genre_distribution.png"), chart, 800, 600); |
||||
|
System.out.println("电影类型分布图表已保存为 movie_genre_distribution.png"); |
||||
|
} |
||||
|
|
||||
|
// 将中文类型转换为英文
|
||||
|
private static String getEnglishGenre(String chineseGenre) { |
||||
|
switch (chineseGenre) { |
||||
|
case "冒险": return "Adventure"; |
||||
|
case "奇幻": return "Fantasy"; |
||||
|
case "爱情": return "Romance"; |
||||
|
case "惊悚": return "Thriller"; |
||||
|
case "动画": return "Animation"; |
||||
|
case "悬疑": return "Mystery"; |
||||
|
case "家庭": return "Family"; |
||||
|
case "犯罪": return "Crime"; |
||||
|
case "同性": return "LGBTQ+"; |
||||
|
case "历史": return "History"; |
||||
|
case "剧情": return "Drama"; |
||||
|
case "动作": return "Action"; |
||||
|
case "喜剧": return "Comedy"; |
||||
|
case "科幻": return "Sci-Fi"; |
||||
|
default: return chineseGenre; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 生成评分与年份相关性图表
|
||||
|
public static void generateYearRatingChart(List<Movie> movieList) throws IOException { |
||||
|
Map<String, Double> yearRatingCorrelation = MovieAnalyzer.analyzeYearRatingCorrelation(movieList); |
||||
|
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
||||
|
|
||||
|
System.out.println("评分与年份相关性数据:"); |
||||
|
for (Map.Entry<String, Double> entry : yearRatingCorrelation.entrySet()) { |
||||
|
System.out.println("年份: '" + entry.getKey() + "', 平均评分: " + entry.getValue()); |
||||
|
// 尝试提取年份数字
|
||||
|
String year = entry.getKey(); |
||||
|
// 提取4位数字作为年份
|
||||
|
String yearMatch = year.replaceAll("[^0-9]", ""); |
||||
|
if (yearMatch.length() >= 4) { |
||||
|
yearMatch = yearMatch.substring(0, 4); |
||||
|
dataset.addValue(entry.getValue(), "Avg Rating", yearMatch); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
JFreeChart chart = ChartFactory.createLineChart( |
||||
|
"Year vs Rating Correlation", |
||||
|
"Year", |
||||
|
"Average Rating", |
||||
|
dataset, |
||||
|
PlotOrientation.VERTICAL, |
||||
|
true, |
||||
|
true, |
||||
|
false |
||||
|
); |
||||
|
|
||||
|
ChartUtils.saveChartAsPNG(new File("movie_year_rating.png"), chart, 800, 600); |
||||
|
System.out.println("评分与年份相关性图表已保存为 movie_year_rating.png"); |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,139 @@ |
|||||
|
package com.crawler.utils; |
||||
|
|
||||
|
import com.crawler.chart.model.Movie; |
||||
|
import com.crawler.chart.model.SoftRanking; |
||||
|
import com.crawler.chart.model.WeatherData; |
||||
|
|
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class DataUtils { |
||||
|
// 清洗电影数据
|
||||
|
public static Movie cleanMovie(Movie movie) { |
||||
|
if (movie == null) return null; |
||||
|
|
||||
|
// 清洗标题
|
||||
|
if (movie.getTitle() != null) { |
||||
|
movie.setTitle(movie.getTitle().trim().replaceAll("\\s+", " ")); |
||||
|
} |
||||
|
|
||||
|
// 清洗导演
|
||||
|
if (movie.getDirector() != null) { |
||||
|
movie.setDirector(movie.getDirector().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗演员
|
||||
|
if (movie.getActors() != null) { |
||||
|
movie.setActors(movie.getActors().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗年份
|
||||
|
if (movie.getYear() != null) { |
||||
|
movie.setYear(movie.getYear().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗国家/地区
|
||||
|
if (movie.getCountry() != null) { |
||||
|
movie.setCountry(movie.getCountry().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗类型
|
||||
|
if (movie.getGenre() != null) { |
||||
|
movie.setGenre(movie.getGenre().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗简介
|
||||
|
if (movie.getQuote() != null) { |
||||
|
movie.setQuote(movie.getQuote().trim().replaceAll("\\s+", " ")); |
||||
|
} |
||||
|
|
||||
|
return movie; |
||||
|
} |
||||
|
|
||||
|
// 写入电影数据到CSV文件
|
||||
|
public static void writeMovieToCSV(List<Movie> movieList, String filePath) throws IOException { |
||||
|
// 添加时间戳避免文件冲突
|
||||
|
String timestamp = String.valueOf(System.currentTimeMillis()); |
||||
|
String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv"); |
||||
|
|
||||
|
FileWriter writer = new FileWriter(actualFilePath); |
||||
|
// 写入表头
|
||||
|
writer.write("排名,标题,评分,评价人数,导演,演员,年份,国家/地区,类型,简介\n"); |
||||
|
|
||||
|
// 写入数据
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null) { |
||||
|
writer.write(movie.getRank() + ","); |
||||
|
writer.write(escapeCsv(movie.getTitle()) + ","); |
||||
|
writer.write(movie.getRating() + ","); |
||||
|
writer.write(movie.getRatingPeople() + ","); |
||||
|
writer.write(escapeCsv(movie.getDirector()) + ","); |
||||
|
writer.write(escapeCsv(movie.getActors()) + ","); |
||||
|
writer.write(escapeCsv(movie.getYear()) + ","); |
||||
|
writer.write(escapeCsv(movie.getCountry()) + ","); |
||||
|
writer.write(escapeCsv(movie.getGenre()) + ","); |
||||
|
writer.write(escapeCsv(movie.getQuote()) + "\n"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
writer.close(); |
||||
|
System.out.println("数据已保存到 " + actualFilePath); |
||||
|
} |
||||
|
|
||||
|
// 转义CSV特殊字符
|
||||
|
private static String escapeCsv(String value) { |
||||
|
if (value == null) return ""; |
||||
|
if (value.contains(",") || value.contains("\"")) { |
||||
|
value = value.replaceAll("\"", "\"\""); |
||||
|
return "\"" + value + "\""; |
||||
|
} |
||||
|
return value; |
||||
|
} |
||||
|
|
||||
|
// 写入软科排名数据到CSV文件
|
||||
|
public static void writeSoftRankingToCSV(List<SoftRanking> rankingList, String filePath) throws IOException { |
||||
|
String timestamp = String.valueOf(System.currentTimeMillis()); |
||||
|
String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv"); |
||||
|
|
||||
|
FileWriter writer = new FileWriter(actualFilePath); |
||||
|
writer.write("排名,学校名称,省份,类型,总分\n"); |
||||
|
|
||||
|
for (SoftRanking ranking : rankingList) { |
||||
|
if (ranking != null) { |
||||
|
writer.write(ranking.getRank() + ","); |
||||
|
writer.write(escapeCsv(ranking.getUniversityName()) + ","); |
||||
|
writer.write(escapeCsv(ranking.getProvince()) + ","); |
||||
|
writer.write(escapeCsv(ranking.getType()) + ","); |
||||
|
writer.write(ranking.getScore() + "\n"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
writer.close(); |
||||
|
System.out.println("软科排名数据已保存到 " + actualFilePath); |
||||
|
} |
||||
|
|
||||
|
// 写入天气数据到CSV文件
|
||||
|
public static void writeWeatherToCSV(List<WeatherData> weatherList, String filePath) throws IOException { |
||||
|
String timestamp = String.valueOf(System.currentTimeMillis()); |
||||
|
String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv"); |
||||
|
|
||||
|
FileWriter writer = new FileWriter(actualFilePath); |
||||
|
writer.write("日期,星期,天气,最高温度,最低温度,风向,风力\n"); |
||||
|
|
||||
|
for (WeatherData weather : weatherList) { |
||||
|
if (weather != null) { |
||||
|
writer.write(escapeCsv(weather.getDate()) + ","); |
||||
|
writer.write(escapeCsv(weather.getWeek()) + ","); |
||||
|
writer.write(escapeCsv(weather.getWeather()) + ","); |
||||
|
writer.write(escapeCsv(weather.getHighTemp()) + ","); |
||||
|
writer.write(escapeCsv(weather.getLowTemp()) + ","); |
||||
|
writer.write(escapeCsv(weather.getWindDirection()) + ","); |
||||
|
writer.write(escapeCsv(weather.getWindLevel()) + "\n"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
writer.close(); |
||||
|
System.out.println("天气数据已保存到 " + actualFilePath); |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue