Browse Source

实验2-徐景旺-202414010701

main
XuJingwang 1 week ago
parent
commit
84a1b42f7e
  1. 59
      project1/MovieMain.java
  2. 136
      project1/README.md
  3. 119
      project1/analysis/MovieAnalyzer.java
  4. 8
      project1/chart/ChartGenerator.java
  5. 30
      project1/chart/ChartManager.java
  6. 25
      project1/chart/impl/GenreDistributionChartGenerator.java
  7. 27
      project1/chart/impl/RatingDistributionChartGenerator.java
  8. 25
      project1/chart/impl/YearDistributionChartGenerator.java
  9. 25
      project1/chart/impl/YearRatingChartGenerator.java
  10. 108
      project1/model/Movie.java
  11. 55
      project1/pom.xml
  12. 206
      project1/spider/DoubanSpider.java
  13. 216
      project1/ui/MovieResultDisplay.java
  14. 91
      project1/utils/DataUtils.java

59
project1/MovieMain.java

@ -0,0 +1,59 @@
package com.crawler;
import com.crawler.chart.ChartGenerator;
import com.crawler.chart.ChartManager;
import com.crawler.chart.impl.GenreDistributionChartGenerator;
import com.crawler.chart.impl.RatingDistributionChartGenerator;
import com.crawler.chart.impl.YearDistributionChartGenerator;
import com.crawler.chart.impl.YearRatingChartGenerator;
import com.crawler.model.Movie;
import com.crawler.spider.DoubanSpider;
import com.crawler.utils.DataUtils;
import com.crawler.ui.MovieResultDisplay;
import java.util.List;
public class MovieMain {
public static void main(String[] args) {
try {
System.out.println("开始爬取豆瓣电影Top250数据...");
// 1. 启动爬虫
DoubanSpider spider = new DoubanSpider();
List<Movie> movieList = spider.crawlMovies();
// 2. 清洗数据
List<Movie> cleanedMovies = movieList.stream()
.map(DataUtils::cleanMovie)
.filter(movie -> movie != null)
.toList();
// 3. 保存数据到CSV文件
DataUtils.writeMovieToCSV(cleanedMovies, "douban_movies.csv");
System.out.println("数据已保存到 douban_movies.csv");
// 4. 展示结果
MovieResultDisplay.displayResults(cleanedMovies);
// 5. 使用多态生成图表
ChartManager chartManager = new ChartManager();
ChartGenerator ratingChart = new RatingDistributionChartGenerator();
ChartGenerator yearChart = new YearDistributionChartGenerator();
ChartGenerator genreChart = new GenreDistributionChartGenerator();
ChartGenerator yearRatingChart = new YearRatingChartGenerator();
chartManager.addChartGenerator(ratingChart);
chartManager.addChartGenerator(yearChart);
chartManager.addChartGenerator(genreChart);
chartManager.addChartGenerator(yearRatingChart);
chartManager.generateAllCharts(cleanedMovies);
System.out.println("\n爬虫任务完成!");
} catch (Exception e) {
e.printStackTrace();
}
}
}

136
project1/README.md

@ -0,0 +1,136 @@
# 电影爬虫项目 - 继承与多态实现说明
## 项目简介
本项目是一个Java电影爬虫,从豆瓣电影Top250抓取数据,进行清洗、存储、分析,并生成多种图表展示结果。项目重点展示了面向对象编程中**继承**和**多态**的实现。
## 继承与多态实现
### 1. 接口继承
#### 1.1 核心接口定义
**文件**: `src/main/java/com/crawler/chart/ChartGenerator.java`
```java
public interface ChartGenerator {
void generateChart(Movie[] movies);
String getChartName();
}
```
#### 1.2 实现类继承
| 实现类 | 文件位置 | 继承关系 |
|-------|---------|----------|
| `RatingDistributionChartGenerator` | `src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.java` | 实现 `ChartGenerator` 接口 |
| `YearDistributionChartGenerator` | `src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.java` | 实现 `ChartGenerator` 接口 |
| `GenreDistributionChartGenerator` | `src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.java` | 实现 `ChartGenerator` 接口 |
| `YearRatingChartGenerator` | `src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.java` | 实现 `ChartGenerator` 接口 |
### 2. 多态实现
#### 2.1 向上转型(接口引用指向实现类)
**文件**: `src/main/java/com/crawler/MovieMain.java` (第41-44行)
```java
ChartGenerator ratingChart = new RatingDistributionChartGenerator();
ChartGenerator yearChart = new YearDistributionChartGenerator();
ChartGenerator genreChart = new GenreDistributionChartGenerator();
ChartGenerator yearRatingChart = new YearRatingChartGenerator();
```
#### 2.2 方法参数多态
**文件**: `src/main/java/com/crawler/chart/ChartManager.java` (第12-13行)
```java
public void addChartGenerator(ChartGenerator generator) {
chartGenerators.add(generator);
}
```
#### 2.3 运行时多态(动态绑定)
**文件**: `src/main/java/com/crawler/chart/ChartManager.java` (第21-25行)
```java
public void generateAllCharts(List<Movie> movies) {
Movie[] movieArray = movies.toArray(new Movie[0]);
for (ChartGenerator generator : chartGenerators) {
System.out.println("生成图表: " + generator.getChartName());
generator.generateChart(movieArray); // 运行时根据实际类型调用对应方法
}
}
```
#### 2.4 统一调用接口
**文件**: `src/main/java/com/crawler/MovieMain.java` (第46-51行)
```java
chartManager.addChartGenerator(ratingChart);
chartManager.addChartGenerator(yearChart);
chartManager.addChartGenerator(genreChart);
chartManager.addChartGenerator(yearRatingChart);
chartManager.generateAllCharts(cleanedMovies);
```
## 继承与多态的优势
1. **代码复用**:所有图表生成器共享相同的接口方法
2. **可扩展性**:新增图表类型只需实现接口,无需修改现有代码
3. **统一管理**:`ChartManager` 可以统一管理不同类型的图表生成器
4. **灵活性**:通过接口引用可以操作不同的实现类对象
5. **可维护性**:代码结构清晰,职责分明
## 项目结构
```
src/
└── main/
└── java/
└── com/
└── crawler/
├── MovieMain.java # 主入口文件
├── model/
│ └── Movie.java # 电影数据模型
├── spider/
│ └── DoubanSpider.java # 豆瓣爬虫实现
├── analysis/
│ └── MovieAnalyzer.java # 数据分析工具
├── ui/
│ └── MovieResultDisplay.java # 结果显示和图表生成
├── utils/
│ └── DataUtils.java # 数据工具类
└── chart/
├── ChartGenerator.java # 图表生成器接口
├── ChartManager.java # 图表管理器
└── impl/
├── RatingDistributionChartGenerator.java # 评分分布图表
├── YearDistributionChartGenerator.java # 年份分布图表
├── GenreDistributionChartGenerator.java # 类型分布图表
└── YearRatingChartGenerator.java # 年份评分相关性图表
```
## 运行说明
1. **直接运行**:在IDE中直接运行 `MovieMain.java`
2. **依赖要求**:需要Jsoup和JFreeChart库
3. **运行结果**
- 控制台输出爬取进度和图表生成信息
- 生成的CSV数据文件保存在项目目录
- 生成的图表以PNG格式保存在项目目录
## 技术栈
- Java 8+
- Jsoup (网页解析)
- JFreeChart (图表生成)
- Maven (依赖管理)
## 总结
本项目通过图表生成器接口及其实现类,充分展示了面向对象编程中**继承**和**多态**的核心概念。接口定义了统一的方法规范,实现类提供了具体的实现逻辑,通过接口引用和运行时动态绑定,实现了代码的灵活性和可扩展性。

119
project1/analysis/MovieAnalyzer.java

@ -0,0 +1,119 @@
package com.crawler.analysis;
import com.crawler.model.Movie;
import java.util.*;
import java.util.stream.Collectors;
public class MovieAnalyzer {
// 统计电影评分分布
public static Map<Double, Integer> analyzeRatingDistribution(List<Movie> movieList) {
Map<Double, Integer> ratingMap = new TreeMap<>();
for (Movie movie : movieList) {
if (movie != null) {
double rating = movie.getRating();
ratingMap.put(rating, ratingMap.getOrDefault(rating, 0) + 1);
}
}
return ratingMap;
}
// 统计电影年份分布
public static Map<String, Integer> analyzeYearDistribution(List<Movie> movieList) {
Map<String, Integer> yearMap = new TreeMap<>();
for (Movie movie : movieList) {
if (movie != null && movie.getYear() != null) {
String year = movie.getYear();
yearMap.put(year, yearMap.getOrDefault(year, 0) + 1);
}
}
return yearMap;
}
// 统计电影类型分布
public static Map<String, Integer> analyzeGenreDistribution(List<Movie> movieList) {
Map<String, Integer> genreMap = new HashMap<>();
for (Movie movie : movieList) {
if (movie != null && movie.getGenre() != null) {
String genre = movie.getGenre();
genreMap.put(genre, genreMap.getOrDefault(genre, 0) + 1);
}
}
return genreMap;
}
// 统计电影国家/地区分布
public static Map<String, Integer> analyzeCountryDistribution(List<Movie> movieList) {
Map<String, Integer> countryMap = new HashMap<>();
for (Movie movie : movieList) {
if (movie != null && movie.getCountry() != null) {
String country = movie.getCountry();
countryMap.put(country, countryMap.getOrDefault(country, 0) + 1);
}
}
return countryMap;
}
// 分析导演作品数量排行
public static Map<String, Integer> analyzeDirectorWorks(List<Movie> movieList) {
Map<String, Integer> directorMap = new HashMap<>();
for (Movie movie : movieList) {
if (movie != null && movie.getDirector() != null) {
String director = movie.getDirector();
directorMap.put(director, directorMap.getOrDefault(director, 0) + 1);
}
}
// 按作品数量排序
return directorMap.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.collect(Collectors.toMap(
Map.Entry::getKey,
Map.Entry::getValue,
(e1, e2) -> e1,
LinkedHashMap::new
));
}
// 计算平均评分
public static double calculateAverageRating(List<Movie> movieList) {
return movieList.stream()
.filter(Objects::nonNull)
.mapToDouble(Movie::getRating)
.average()
.orElse(0.0);
}
// 计算评分与年份的相关性(简单计算)
public static Map<String, Double> analyzeYearRatingCorrelation(List<Movie> movieList) {
Map<String, List<Double>> yearRatingsMap = new TreeMap<>();
for (Movie movie : movieList) {
if (movie != null && movie.getYear() != null) {
String year = movie.getYear();
double rating = movie.getRating();
yearRatingsMap.computeIfAbsent(year, k -> new ArrayList<>()).add(rating);
}
}
// 计算每年的平均评分
Map<String, Double> yearAverageRatingMap = new TreeMap<>();
for (Map.Entry<String, List<Double>> entry : yearRatingsMap.entrySet()) {
String year = entry.getKey();
List<Double> ratings = entry.getValue();
double average = ratings.stream().mapToDouble(Double::doubleValue).average().orElse(0.0);
yearAverageRatingMap.put(year, average);
}
return yearAverageRatingMap;
}
}

8
project1/chart/ChartGenerator.java

@ -0,0 +1,8 @@
package com.crawler.chart;
import com.crawler.model.Movie;
public interface ChartGenerator {
void generateChart(Movie[] movies);
String getChartName();
}

30
project1/chart/ChartManager.java

@ -0,0 +1,30 @@
package com.crawler.chart;
import com.crawler.chart.impl.GenreDistributionChartGenerator;
import com.crawler.chart.impl.RatingDistributionChartGenerator;
import com.crawler.chart.impl.YearDistributionChartGenerator;
import com.crawler.chart.impl.YearRatingChartGenerator;
import com.crawler.model.Movie;
import java.util.ArrayList;
import java.util.List;
public class ChartManager {
private List<ChartGenerator> chartGenerators;
public ChartManager() {
chartGenerators = new ArrayList<>();
}
public void addChartGenerator(ChartGenerator generator) {
chartGenerators.add(generator);
}
public void generateAllCharts(List<Movie> movies) {
Movie[] movieArray = movies.toArray(new Movie[0]);
for (ChartGenerator generator : chartGenerators) {
System.out.println("生成图表: " + generator.getChartName());
generator.generateChart(movieArray);
}
}
}

25
project1/chart/impl/GenreDistributionChartGenerator.java

@ -0,0 +1,25 @@
package com.crawler.chart.impl;
import com.crawler.chart.ChartGenerator;
import com.crawler.model.Movie;
import com.crawler.ui.MovieResultDisplay;
import java.io.IOException;
import java.util.List;
public class GenreDistributionChartGenerator implements ChartGenerator {
@Override
public void generateChart(Movie[] movies) {
List<Movie> movieList = List.of(movies);
try {
MovieResultDisplay.generateGenreDistributionChart(movieList);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public String getChartName() {
return "Genre Distribution Chart";
}
}

27
project1/chart/impl/RatingDistributionChartGenerator.java

@ -0,0 +1,27 @@
package com.crawler.chart.impl;
import com.crawler.chart.ChartGenerator;
import com.crawler.model.Movie;
import com.crawler.ui.MovieResultDisplay;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class RatingDistributionChartGenerator implements ChartGenerator {
@Override
public void generateChart(Movie[] movies) {
List<Movie> movieList = List.of(movies);
try {
MovieResultDisplay.generateRatingDistributionChart(movieList);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public String getChartName() {
return "Rating Distribution Chart";
}
}

25
project1/chart/impl/YearDistributionChartGenerator.java

@ -0,0 +1,25 @@
package com.crawler.chart.impl;
import com.crawler.chart.ChartGenerator;
import com.crawler.model.Movie;
import com.crawler.ui.MovieResultDisplay;
import java.io.IOException;
import java.util.List;
public class YearDistributionChartGenerator implements ChartGenerator {
@Override
public void generateChart(Movie[] movies) {
List<Movie> movieList = List.of(movies);
try {
MovieResultDisplay.generateYearDistributionChart(movieList);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public String getChartName() {
return "Year Distribution Chart";
}
}

25
project1/chart/impl/YearRatingChartGenerator.java

@ -0,0 +1,25 @@
package com.crawler.chart.impl;
import com.crawler.chart.ChartGenerator;
import com.crawler.model.Movie;
import com.crawler.ui.MovieResultDisplay;
import java.io.IOException;
import java.util.List;
public class YearRatingChartGenerator implements ChartGenerator {
@Override
public void generateChart(Movie[] movies) {
List<Movie> movieList = List.of(movies);
try {
MovieResultDisplay.generateYearRatingChart(movieList);
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public String getChartName() {
return "Year Rating Correlation Chart";
}
}

108
project1/model/Movie.java

@ -0,0 +1,108 @@
package com.crawler.model;
public class Movie {
private int rank;
private String title;
private double rating;
private int ratingPeople;
private String director;
private String actors;
private String year;
private String country;
private String genre;
private String quote;
// Getters and Setters
public int getRank() {
return rank;
}
public void setRank(int rank) {
this.rank = rank;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public double getRating() {
return rating;
}
public void setRating(double rating) {
this.rating = rating;
}
public int getRatingPeople() {
return ratingPeople;
}
public void setRatingPeople(int ratingPeople) {
this.ratingPeople = ratingPeople;
}
public String getDirector() {
return director;
}
public void setDirector(String director) {
this.director = director;
}
public String getActors() {
return actors;
}
public void setActors(String actors) {
this.actors = actors;
}
public String getYear() {
return year;
}
public void setYear(String year) {
this.year = year;
}
public String getCountry() {
return country;
}
public void setCountry(String country) {
this.country = country;
}
public String getGenre() {
return genre;
}
public void setGenre(String genre) {
this.genre = genre;
}
public String getQuote() {
return quote;
}
public void setQuote(String quote) {
this.quote = quote;
}
@Override
public String toString() {
return "Movie{" +
"rank=" + rank +
", title='" + title + '\'' +
", rating=" + rating +
", ratingPeople=" + ratingPeople +
", director='" + director + '\'' +
", year='" + year + '\'' +
", genre='" + genre + '\'' +
'}';
}
}

55
project1/pom.xml

@ -0,0 +1,55 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.crawler</groupId>
<artifactId>job-crawler</artifactId>
<version>1.0-SNAPSHOT</version>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<!-- Jsoup - HTML解析库 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>
<!-- JFreeChart - 图表生成库 -->
<dependency>
<groupId>org.jfree</groupId>
<artifactId>jfreechart</artifactId>
<version>1.5.4</version>
</dependency>
<!-- JCommon - JFreeChart依赖 -->
<dependency>
<groupId>org.jfree</groupId>
<artifactId>jcommon</artifactId>
<version>1.0.24</version>
</dependency>
</dependencies>
<build>
<sourceDirectory>src/main/java</sourceDirectory>
<outputDirectory>target/classes</outputDirectory>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
<configuration>
<source>${maven.compiler.source}</source>
<target>${maven.compiler.target}</target>
</configuration>
</plugin>
</plugins>
</build>
</project>

206
project1/spider/DoubanSpider.java

@ -0,0 +1,206 @@
package com.crawler.spider;
import com.crawler.model.Movie;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.*;
public class DoubanSpider {
private static final String BASE_URL = "https://movie.douban.com/top250";
private static final int MAX_PAGES = 10;
private static final int THREAD_POOL_SIZE = 3;
private static final int REQUEST_DELAY = 1000;
public List<Movie> crawlMovies() {
List<Movie> movieList = new ArrayList<>();
ExecutorService executorService = Executors.newFixedThreadPool(THREAD_POOL_SIZE);
List<Future<List<Movie>>> futures = new ArrayList<>();
try {
for (int page = 0; page < MAX_PAGES; page++) {
final int currentPage = page;
futures.add(executorService.submit(() -> {
try {
Thread.sleep(REQUEST_DELAY);
return crawlPage(currentPage);
} catch (Exception e) {
e.printStackTrace();
return new ArrayList<>();
}
}));
}
for (Future<List<Movie>> future : futures) {
try {
movieList.addAll(future.get());
} catch (Exception e) {
e.printStackTrace();
}
}
} finally {
executorService.shutdown();
}
return movieList;
}
private List<Movie> crawlPage(int page) throws IOException {
List<Movie> movieList = new ArrayList<>();
String url = BASE_URL + "?start=" + (page * 25);
System.out.println("爬取页面: " + url);
Document document = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
.timeout(10000)
.get();
System.out.println("页面标题: " + document.title());
// 选择电影条目
Elements movieItems = document.select(".grid_view li");
System.out.println("找到电影条目数: " + movieItems.size());
for (Element item : movieItems) {
Movie movie = parseMovie(item);
if (movie != null) {
movieList.add(movie);
}
}
System.out.println("页面" + (page + 1) + "爬取成功,获取电影数: " + movieList.size());
return movieList;
}
private Movie parseMovie(Element item) {
Movie movie = new Movie();
try {
// 排名
Element rankElement = item.selectFirst(".pic em");
if (rankElement != null) {
movie.setRank(Integer.parseInt(rankElement.text().trim()));
}
// 标题
Element titleElement = item.selectFirst(".title");
if (titleElement != null) {
movie.setTitle(titleElement.text().trim());
}
// 评分
Element ratingElement = item.selectFirst(".rating_num");
if (ratingElement != null) {
movie.setRating(Double.parseDouble(ratingElement.text().trim()));
}
// 评价人数
Element ratingPeopleElement = item.selectFirst(".star span:nth-child(4)");
if (ratingPeopleElement != null) {
String ratingPeople = ratingPeopleElement.text().trim();
movie.setRatingPeople(Integer.parseInt(ratingPeople.replaceAll("[^0-9]", "")));
}
// 导演和演员
Element infoElement = item.selectFirst(".bd p:first-child");
if (infoElement != null) {
String info = infoElement.text().trim();
// 提取导演
if (info.contains("导演:")) {
int directorStart = info.indexOf("导演:") + 3;
int directorEnd = info.indexOf("主演:");
if (directorEnd == -1) {
directorEnd = info.indexOf(" ");
// 找到第一个数字年份的位置
for (int i = 0; i < info.length(); i++) {
if (Character.isDigit(info.charAt(i))) {
directorEnd = i;
break;
}
}
}
if (directorEnd != -1) {
movie.setDirector(info.substring(directorStart, directorEnd).trim());
}
}
// 提取主演
if (info.contains("主演:")) {
int actorsStart = info.indexOf("主演:") + 3;
int actorsEnd = info.length();
// 找到第一个数字年份的位置
for (int i = actorsStart; i < info.length(); i++) {
if (Character.isDigit(info.charAt(i))) {
actorsEnd = i;
break;
}
}
movie.setActors(info.substring(actorsStart, actorsEnd).trim());
}
// 提取年份、国家/地区和类型
// 找到年份的开始位置(第一个数字)
int yearStart = -1;
for (int i = 0; i < info.length(); i++) {
if (Character.isDigit(info.charAt(i))) {
yearStart = i;
break;
}
}
if (yearStart != -1) {
// 提取年份(4位数字)
if (yearStart + 4 <= info.length()) {
String year = info.substring(yearStart, yearStart + 4);
if (year.matches("\\d{4}")) {
movie.setYear(year);
}
}
// 提取国家/地区和类型
int slashIndex = info.indexOf("/", yearStart);
if (slashIndex != -1) {
// 提取国家/地区
int nextSlashIndex = info.indexOf("/", slashIndex + 1);
if (nextSlashIndex != -1) {
String country = info.substring(slashIndex + 1, nextSlashIndex).trim();
movie.setCountry(country);
// 提取类型
String genre = info.substring(nextSlashIndex + 1).trim();
// 取第一个类型
if (!genre.isEmpty()) {
String[] genres = genre.split(" ");
if (genres.length > 0) {
movie.setGenre(genres[0]);
}
}
}
}
}
}
// 简介
Element quoteElement = item.selectFirst(".inq");
if (quoteElement != null) {
movie.setQuote(quoteElement.text().trim());
}
// 过滤无效电影
if (movie.getTitle() == null || movie.getTitle().isEmpty()) {
return null;
}
return movie;
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
}

216
project1/ui/MovieResultDisplay.java

@ -0,0 +1,216 @@
package com.crawler.ui;
import com.crawler.analysis.MovieAnalyzer;
import com.crawler.model.Movie;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.data.category.DefaultCategoryDataset;
import org.jfree.data.general.DefaultPieDataset;
import org.jfree.data.statistics.HistogramDataset;
import org.jfree.chart.plot.PiePlot;
import org.jfree.chart.labels.StandardPieSectionLabelGenerator;
import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Map;
public class MovieResultDisplay {
// 控制台输出统计结果
public static void displayResults(List<Movie> movieList) {
System.out.println("\n=== 电影数据统计结果 ===");
System.out.println("爬取电影总数: " + movieList.size());
// 平均评分
double averageRating = MovieAnalyzer.calculateAverageRating(movieList);
System.out.printf("平均评分: %.2f\n", averageRating);
// 电影评分分布
System.out.println("\n=== 电影评分分布 ===");
Map<Double, Integer> ratingDistribution = MovieAnalyzer.analyzeRatingDistribution(movieList);
for (Map.Entry<Double, Integer> entry : ratingDistribution.entrySet()) {
System.out.printf("评分 %.1f: %d部\n", entry.getKey(), entry.getValue());
}
// 电影年份分布(最近20年)
System.out.println("\n=== 电影年份分布(最近20年)===");
Map<String, Integer> yearDistribution = MovieAnalyzer.analyzeYearDistribution(movieList);
int count = 0;
for (Map.Entry<String, Integer> entry : yearDistribution.entrySet()) {
if (count >= yearDistribution.size() - 20) { // 只显示最近20年
System.out.printf("%s年: %d部\n", entry.getKey(), entry.getValue());
}
count++;
}
// 电影类型分布
System.out.println("\n=== 电影类型分布 ===");
Map<String, Integer> genreDistribution = MovieAnalyzer.analyzeGenreDistribution(movieList);
genreDistribution.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(10) // 只显示前10种类型
.forEach(entry -> System.out.printf("%-10s: %d部\n", entry.getKey(), entry.getValue()));
// 导演作品数量排行
System.out.println("\n=== 导演作品数量排行 ===");
Map<String, Integer> directorWorks = MovieAnalyzer.analyzeDirectorWorks(movieList);
count = 0;
for (Map.Entry<String, Integer> entry : directorWorks.entrySet()) {
if (count < 10) { // 只显示前10位导演
System.out.printf("%-20s: %d部\n", entry.getKey(), entry.getValue());
count++;
} else {
break;
}
}
// 评分与年份相关性
System.out.println("\n=== 评分与年份相关性 ===");
Map<String, Double> yearRatingCorrelation = MovieAnalyzer.analyzeYearRatingCorrelation(movieList);
for (Map.Entry<String, Double> entry : yearRatingCorrelation.entrySet()) {
System.out.printf("%s年: 平均评分 %.2f\n", entry.getKey(), entry.getValue());
}
}
// 生成电影评分分布直方图
public static void generateRatingDistributionChart(List<Movie> movieList) throws IOException {
Map<Double, Integer> ratingDistribution = MovieAnalyzer.analyzeRatingDistribution(movieList);
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
for (Map.Entry<Double, Integer> entry : ratingDistribution.entrySet()) {
dataset.addValue(entry.getValue(), "Count", entry.getKey().toString());
}
JFreeChart chart = ChartFactory.createBarChart(
"Movie Rating Distribution",
"Rating",
"Count",
dataset,
PlotOrientation.VERTICAL,
true,
true,
false
);
ChartUtils.saveChartAsPNG(new File("movie_rating_distribution.png"), chart, 800, 600);
System.out.println("电影评分分布图表已保存为 movie_rating_distribution.png");
}
// 生成电影年份分布折线图
public static void generateYearDistributionChart(List<Movie> movieList) throws IOException {
Map<String, Integer> yearDistribution = MovieAnalyzer.analyzeYearDistribution(movieList);
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
System.out.println("年份分布数据:");
for (Map.Entry<String, Integer> entry : yearDistribution.entrySet()) {
System.out.println("年份: '" + entry.getKey() + "', 数量: " + entry.getValue());
// 尝试提取年份数字
String year = entry.getKey();
// 提取4位数字作为年份
String yearMatch = year.replaceAll("[^0-9]", "");
if (yearMatch.length() >= 4) {
yearMatch = yearMatch.substring(0, 4);
dataset.addValue(entry.getValue(), "Count", yearMatch);
}
}
JFreeChart chart = ChartFactory.createLineChart(
"Movie Year Distribution",
"Year",
"Count",
dataset,
PlotOrientation.VERTICAL,
true,
true,
false
);
ChartUtils.saveChartAsPNG(new File("movie_year_distribution.png"), chart, 800, 600);
System.out.println("电影年份分布图表已保存为 movie_year_distribution.png");
}
// 生成电影类型分布饼图
public static void generateGenreDistributionChart(List<Movie> movieList) throws IOException {
Map<String, Integer> genreDistribution = MovieAnalyzer.analyzeGenreDistribution(movieList);
DefaultPieDataset dataset = new DefaultPieDataset();
// 只显示前10种类型
genreDistribution.entrySet().stream()
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed())
.limit(10)
.forEach(entry -> {
// 使用英文标签避免中文显示问题
String englishLabel = getEnglishGenre(entry.getKey()) + " (" + entry.getValue() + ")";
dataset.setValue(englishLabel, entry.getValue());
});
JFreeChart chart = ChartFactory.createPieChart(
"Movie Genre Distribution", // 使用英文标题
dataset,
true, // 显示图例
true, // 显示工具提示
false // 不显示URL
);
ChartUtils.saveChartAsPNG(new File("movie_genre_distribution.png"), chart, 800, 600);
System.out.println("电影类型分布图表已保存为 movie_genre_distribution.png");
}
// 将中文类型转换为英文
private static String getEnglishGenre(String chineseGenre) {
switch (chineseGenre) {
case "冒险": return "Adventure";
case "奇幻": return "Fantasy";
case "爱情": return "Romance";
case "惊悚": return "Thriller";
case "动画": return "Animation";
case "悬疑": return "Mystery";
case "家庭": return "Family";
case "犯罪": return "Crime";
case "同性": return "LGBTQ+";
case "历史": return "History";
case "剧情": return "Drama";
case "动作": return "Action";
case "喜剧": return "Comedy";
case "科幻": return "Sci-Fi";
default: return chineseGenre;
}
}
// 生成评分与年份相关性图表
public static void generateYearRatingChart(List<Movie> movieList) throws IOException {
Map<String, Double> yearRatingCorrelation = MovieAnalyzer.analyzeYearRatingCorrelation(movieList);
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
System.out.println("评分与年份相关性数据:");
for (Map.Entry<String, Double> entry : yearRatingCorrelation.entrySet()) {
System.out.println("年份: '" + entry.getKey() + "', 平均评分: " + entry.getValue());
// 尝试提取年份数字
String year = entry.getKey();
// 提取4位数字作为年份
String yearMatch = year.replaceAll("[^0-9]", "");
if (yearMatch.length() >= 4) {
yearMatch = yearMatch.substring(0, 4);
dataset.addValue(entry.getValue(), "Avg Rating", yearMatch);
}
}
JFreeChart chart = ChartFactory.createLineChart(
"Year vs Rating Correlation",
"Year",
"Average Rating",
dataset,
PlotOrientation.VERTICAL,
true,
true,
false
);
ChartUtils.saveChartAsPNG(new File("movie_year_rating.png"), chart, 800, 600);
System.out.println("评分与年份相关性图表已保存为 movie_year_rating.png");
}
}

91
project1/utils/DataUtils.java

@ -0,0 +1,91 @@
package com.crawler.utils;
import com.crawler.model.Movie;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
public class DataUtils {
// 清洗电影数据
public static Movie cleanMovie(Movie movie) {
if (movie == null) return null;
// 清洗标题
if (movie.getTitle() != null) {
movie.setTitle(movie.getTitle().trim().replaceAll("\\s+", " "));
}
// 清洗导演
if (movie.getDirector() != null) {
movie.setDirector(movie.getDirector().trim());
}
// 清洗演员
if (movie.getActors() != null) {
movie.setActors(movie.getActors().trim());
}
// 清洗年份
if (movie.getYear() != null) {
movie.setYear(movie.getYear().trim());
}
// 清洗国家/地区
if (movie.getCountry() != null) {
movie.setCountry(movie.getCountry().trim());
}
// 清洗类型
if (movie.getGenre() != null) {
movie.setGenre(movie.getGenre().trim());
}
// 清洗简介
if (movie.getQuote() != null) {
movie.setQuote(movie.getQuote().trim().replaceAll("\\s+", " "));
}
return movie;
}
// 写入电影数据到CSV文件
public static void writeMovieToCSV(List<Movie> movieList, String filePath) throws IOException {
// 添加时间戳避免文件冲突
String timestamp = String.valueOf(System.currentTimeMillis());
String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv");
FileWriter writer = new FileWriter(actualFilePath);
// 写入表头
writer.write("排名,标题,评分,评价人数,导演,演员,年份,国家/地区,类型,简介\n");
// 写入数据
for (Movie movie : movieList) {
if (movie != null) {
writer.write(movie.getRank() + ",");
writer.write(escapeCsv(movie.getTitle()) + ",");
writer.write(movie.getRating() + ",");
writer.write(movie.getRatingPeople() + ",");
writer.write(escapeCsv(movie.getDirector()) + ",");
writer.write(escapeCsv(movie.getActors()) + ",");
writer.write(escapeCsv(movie.getYear()) + ",");
writer.write(escapeCsv(movie.getCountry()) + ",");
writer.write(escapeCsv(movie.getGenre()) + ",");
writer.write(escapeCsv(movie.getQuote()) + "\n");
}
}
writer.close();
System.out.println("数据已保存到 " + actualFilePath);
}
// 转义CSV特殊字符
private static String escapeCsv(String value) {
if (value == null) return "";
if (value.contains(",") || value.contains("\"")) {
value = value.replaceAll("\"", "\"\"");
return "\"" + value + "\"";
}
return value;
}
}
Loading…
Cancel
Save