14 changed files with 1130 additions and 0 deletions
@ -0,0 +1,59 @@ |
|||||
|
package com.crawler; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.chart.ChartManager; |
||||
|
import com.crawler.chart.impl.GenreDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.RatingDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.YearDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.YearRatingChartGenerator; |
||||
|
import com.crawler.model.Movie; |
||||
|
import com.crawler.spider.DoubanSpider; |
||||
|
import com.crawler.utils.DataUtils; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
public class MovieMain { |
||||
|
public static void main(String[] args) { |
||||
|
try { |
||||
|
System.out.println("开始爬取豆瓣电影Top250数据..."); |
||||
|
|
||||
|
// 1. 启动爬虫
|
||||
|
DoubanSpider spider = new DoubanSpider(); |
||||
|
List<Movie> movieList = spider.crawlMovies(); |
||||
|
|
||||
|
// 2. 清洗数据
|
||||
|
List<Movie> cleanedMovies = movieList.stream() |
||||
|
.map(DataUtils::cleanMovie) |
||||
|
.filter(movie -> movie != null) |
||||
|
.toList(); |
||||
|
|
||||
|
// 3. 保存数据到CSV文件
|
||||
|
DataUtils.writeMovieToCSV(cleanedMovies, "douban_movies.csv"); |
||||
|
System.out.println("数据已保存到 douban_movies.csv"); |
||||
|
|
||||
|
// 4. 展示结果
|
||||
|
MovieResultDisplay.displayResults(cleanedMovies); |
||||
|
|
||||
|
// 5. 使用多态生成图表
|
||||
|
ChartManager chartManager = new ChartManager(); |
||||
|
|
||||
|
ChartGenerator ratingChart = new RatingDistributionChartGenerator(); |
||||
|
ChartGenerator yearChart = new YearDistributionChartGenerator(); |
||||
|
ChartGenerator genreChart = new GenreDistributionChartGenerator(); |
||||
|
ChartGenerator yearRatingChart = new YearRatingChartGenerator(); |
||||
|
|
||||
|
chartManager.addChartGenerator(ratingChart); |
||||
|
chartManager.addChartGenerator(yearChart); |
||||
|
chartManager.addChartGenerator(genreChart); |
||||
|
chartManager.addChartGenerator(yearRatingChart); |
||||
|
|
||||
|
chartManager.generateAllCharts(cleanedMovies); |
||||
|
|
||||
|
System.out.println("\n爬虫任务完成!"); |
||||
|
|
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,136 @@ |
|||||
|
# 电影爬虫项目 - 继承与多态实现说明 |
||||
|
|
||||
|
## 项目简介 |
||||
|
|
||||
|
本项目是一个Java电影爬虫,从豆瓣电影Top250抓取数据,进行清洗、存储、分析,并生成多种图表展示结果。项目重点展示了面向对象编程中**继承**和**多态**的实现。 |
||||
|
|
||||
|
## 继承与多态实现 |
||||
|
|
||||
|
### 1. 接口继承 |
||||
|
|
||||
|
#### 1.1 核心接口定义 |
||||
|
|
||||
|
**文件**: `src/main/java/com/crawler/chart/ChartGenerator.java` |
||||
|
|
||||
|
```java |
||||
|
public interface ChartGenerator { |
||||
|
void generateChart(Movie[] movies); |
||||
|
String getChartName(); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 1.2 实现类继承 |
||||
|
|
||||
|
| 实现类 | 文件位置 | 继承关系 | |
||||
|
|-------|---------|----------| |
||||
|
| `RatingDistributionChartGenerator` | `src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.java` | 实现 `ChartGenerator` 接口 | |
||||
|
| `YearDistributionChartGenerator` | `src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.java` | 实现 `ChartGenerator` 接口 | |
||||
|
| `GenreDistributionChartGenerator` | `src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.java` | 实现 `ChartGenerator` 接口 | |
||||
|
| `YearRatingChartGenerator` | `src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.java` | 实现 `ChartGenerator` 接口 | |
||||
|
|
||||
|
### 2. 多态实现 |
||||
|
|
||||
|
#### 2.1 向上转型(接口引用指向实现类) |
||||
|
|
||||
|
**文件**: `src/main/java/com/crawler/MovieMain.java` (第41-44行) |
||||
|
|
||||
|
```java |
||||
|
ChartGenerator ratingChart = new RatingDistributionChartGenerator(); |
||||
|
ChartGenerator yearChart = new YearDistributionChartGenerator(); |
||||
|
ChartGenerator genreChart = new GenreDistributionChartGenerator(); |
||||
|
ChartGenerator yearRatingChart = new YearRatingChartGenerator(); |
||||
|
``` |
||||
|
|
||||
|
#### 2.2 方法参数多态 |
||||
|
|
||||
|
**文件**: `src/main/java/com/crawler/chart/ChartManager.java` (第12-13行) |
||||
|
|
||||
|
```java |
||||
|
public void addChartGenerator(ChartGenerator generator) { |
||||
|
chartGenerators.add(generator); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 2.3 运行时多态(动态绑定) |
||||
|
|
||||
|
**文件**: `src/main/java/com/crawler/chart/ChartManager.java` (第21-25行) |
||||
|
|
||||
|
```java |
||||
|
public void generateAllCharts(List<Movie> movies) { |
||||
|
Movie[] movieArray = movies.toArray(new Movie[0]); |
||||
|
for (ChartGenerator generator : chartGenerators) { |
||||
|
System.out.println("生成图表: " + generator.getChartName()); |
||||
|
generator.generateChart(movieArray); // 运行时根据实际类型调用对应方法 |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 2.4 统一调用接口 |
||||
|
|
||||
|
**文件**: `src/main/java/com/crawler/MovieMain.java` (第46-51行) |
||||
|
|
||||
|
```java |
||||
|
chartManager.addChartGenerator(ratingChart); |
||||
|
chartManager.addChartGenerator(yearChart); |
||||
|
chartManager.addChartGenerator(genreChart); |
||||
|
chartManager.addChartGenerator(yearRatingChart); |
||||
|
|
||||
|
chartManager.generateAllCharts(cleanedMovies); |
||||
|
``` |
||||
|
|
||||
|
## 继承与多态的优势 |
||||
|
|
||||
|
1. **代码复用**:所有图表生成器共享相同的接口方法 |
||||
|
2. **可扩展性**:新增图表类型只需实现接口,无需修改现有代码 |
||||
|
3. **统一管理**:`ChartManager` 可以统一管理不同类型的图表生成器 |
||||
|
4. **灵活性**:通过接口引用可以操作不同的实现类对象 |
||||
|
5. **可维护性**:代码结构清晰,职责分明 |
||||
|
|
||||
|
## 项目结构 |
||||
|
|
||||
|
``` |
||||
|
src/ |
||||
|
└── main/ |
||||
|
└── java/ |
||||
|
└── com/ |
||||
|
└── crawler/ |
||||
|
├── MovieMain.java # 主入口文件 |
||||
|
├── model/ |
||||
|
│ └── Movie.java # 电影数据模型 |
||||
|
├── spider/ |
||||
|
│ └── DoubanSpider.java # 豆瓣爬虫实现 |
||||
|
├── analysis/ |
||||
|
│ └── MovieAnalyzer.java # 数据分析工具 |
||||
|
├── ui/ |
||||
|
│ └── MovieResultDisplay.java # 结果显示和图表生成 |
||||
|
├── utils/ |
||||
|
│ └── DataUtils.java # 数据工具类 |
||||
|
└── chart/ |
||||
|
├── ChartGenerator.java # 图表生成器接口 |
||||
|
├── ChartManager.java # 图表管理器 |
||||
|
└── impl/ |
||||
|
├── RatingDistributionChartGenerator.java # 评分分布图表 |
||||
|
├── YearDistributionChartGenerator.java # 年份分布图表 |
||||
|
├── GenreDistributionChartGenerator.java # 类型分布图表 |
||||
|
└── YearRatingChartGenerator.java # 年份评分相关性图表 |
||||
|
``` |
||||
|
|
||||
|
## 运行说明 |
||||
|
|
||||
|
1. **直接运行**:在IDE中直接运行 `MovieMain.java` |
||||
|
2. **依赖要求**:需要Jsoup和JFreeChart库 |
||||
|
3. **运行结果**: |
||||
|
- 控制台输出爬取进度和图表生成信息 |
||||
|
- 生成的CSV数据文件保存在项目目录 |
||||
|
- 生成的图表以PNG格式保存在项目目录 |
||||
|
|
||||
|
## 技术栈 |
||||
|
|
||||
|
- Java 8+ |
||||
|
- Jsoup (网页解析) |
||||
|
- JFreeChart (图表生成) |
||||
|
- Maven (依赖管理) |
||||
|
|
||||
|
## 总结 |
||||
|
|
||||
|
本项目通过图表生成器接口及其实现类,充分展示了面向对象编程中**继承**和**多态**的核心概念。接口定义了统一的方法规范,实现类提供了具体的实现逻辑,通过接口引用和运行时动态绑定,实现了代码的灵活性和可扩展性。 |
||||
@ -0,0 +1,119 @@ |
|||||
|
package com.crawler.analysis; |
||||
|
|
||||
|
import com.crawler.model.Movie; |
||||
|
|
||||
|
import java.util.*; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
public class MovieAnalyzer { |
||||
|
// 统计电影评分分布
|
||||
|
public static Map<Double, Integer> analyzeRatingDistribution(List<Movie> movieList) { |
||||
|
Map<Double, Integer> ratingMap = new TreeMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null) { |
||||
|
double rating = movie.getRating(); |
||||
|
ratingMap.put(rating, ratingMap.getOrDefault(rating, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return ratingMap; |
||||
|
} |
||||
|
|
||||
|
// 统计电影年份分布
|
||||
|
public static Map<String, Integer> analyzeYearDistribution(List<Movie> movieList) { |
||||
|
Map<String, Integer> yearMap = new TreeMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getYear() != null) { |
||||
|
String year = movie.getYear(); |
||||
|
yearMap.put(year, yearMap.getOrDefault(year, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return yearMap; |
||||
|
} |
||||
|
|
||||
|
// 统计电影类型分布
|
||||
|
public static Map<String, Integer> analyzeGenreDistribution(List<Movie> movieList) { |
||||
|
Map<String, Integer> genreMap = new HashMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getGenre() != null) { |
||||
|
String genre = movie.getGenre(); |
||||
|
genreMap.put(genre, genreMap.getOrDefault(genre, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return genreMap; |
||||
|
} |
||||
|
|
||||
|
// 统计电影国家/地区分布
|
||||
|
public static Map<String, Integer> analyzeCountryDistribution(List<Movie> movieList) { |
||||
|
Map<String, Integer> countryMap = new HashMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getCountry() != null) { |
||||
|
String country = movie.getCountry(); |
||||
|
countryMap.put(country, countryMap.getOrDefault(country, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return countryMap; |
||||
|
} |
||||
|
|
||||
|
// 分析导演作品数量排行
|
||||
|
public static Map<String, Integer> analyzeDirectorWorks(List<Movie> movieList) { |
||||
|
Map<String, Integer> directorMap = new HashMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getDirector() != null) { |
||||
|
String director = movie.getDirector(); |
||||
|
directorMap.put(director, directorMap.getOrDefault(director, 0) + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 按作品数量排序
|
||||
|
return directorMap.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.collect(Collectors.toMap( |
||||
|
Map.Entry::getKey, |
||||
|
Map.Entry::getValue, |
||||
|
(e1, e2) -> e1, |
||||
|
LinkedHashMap::new |
||||
|
)); |
||||
|
} |
||||
|
|
||||
|
// 计算平均评分
|
||||
|
public static double calculateAverageRating(List<Movie> movieList) { |
||||
|
return movieList.stream() |
||||
|
.filter(Objects::nonNull) |
||||
|
.mapToDouble(Movie::getRating) |
||||
|
.average() |
||||
|
.orElse(0.0); |
||||
|
} |
||||
|
|
||||
|
// 计算评分与年份的相关性(简单计算)
|
||||
|
public static Map<String, Double> analyzeYearRatingCorrelation(List<Movie> movieList) { |
||||
|
Map<String, List<Double>> yearRatingsMap = new TreeMap<>(); |
||||
|
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null && movie.getYear() != null) { |
||||
|
String year = movie.getYear(); |
||||
|
double rating = movie.getRating(); |
||||
|
yearRatingsMap.computeIfAbsent(year, k -> new ArrayList<>()).add(rating); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 计算每年的平均评分
|
||||
|
Map<String, Double> yearAverageRatingMap = new TreeMap<>(); |
||||
|
for (Map.Entry<String, List<Double>> entry : yearRatingsMap.entrySet()) { |
||||
|
String year = entry.getKey(); |
||||
|
List<Double> ratings = entry.getValue(); |
||||
|
double average = ratings.stream().mapToDouble(Double::doubleValue).average().orElse(0.0); |
||||
|
yearAverageRatingMap.put(year, average); |
||||
|
} |
||||
|
|
||||
|
return yearAverageRatingMap; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,8 @@ |
|||||
|
package com.crawler.chart; |
||||
|
|
||||
|
import com.crawler.model.Movie; |
||||
|
|
||||
|
public interface ChartGenerator { |
||||
|
void generateChart(Movie[] movies); |
||||
|
String getChartName(); |
||||
|
} |
||||
@ -0,0 +1,30 @@ |
|||||
|
package com.crawler.chart; |
||||
|
|
||||
|
import com.crawler.chart.impl.GenreDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.RatingDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.YearDistributionChartGenerator; |
||||
|
import com.crawler.chart.impl.YearRatingChartGenerator; |
||||
|
import com.crawler.model.Movie; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ChartManager { |
||||
|
private List<ChartGenerator> chartGenerators; |
||||
|
|
||||
|
public ChartManager() { |
||||
|
chartGenerators = new ArrayList<>(); |
||||
|
} |
||||
|
|
||||
|
public void addChartGenerator(ChartGenerator generator) { |
||||
|
chartGenerators.add(generator); |
||||
|
} |
||||
|
|
||||
|
public void generateAllCharts(List<Movie> movies) { |
||||
|
Movie[] movieArray = movies.toArray(new Movie[0]); |
||||
|
for (ChartGenerator generator : chartGenerators) { |
||||
|
System.out.println("生成图表: " + generator.getChartName()); |
||||
|
generator.generateChart(movieArray); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.crawler.chart.impl; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.model.Movie; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class GenreDistributionChartGenerator implements ChartGenerator { |
||||
|
@Override |
||||
|
public void generateChart(Movie[] movies) { |
||||
|
List<Movie> movieList = List.of(movies); |
||||
|
try { |
||||
|
MovieResultDisplay.generateGenreDistributionChart(movieList); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getChartName() { |
||||
|
return "Genre Distribution Chart"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
package com.crawler.chart.impl; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.model.Movie; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
public class RatingDistributionChartGenerator implements ChartGenerator { |
||||
|
@Override |
||||
|
public void generateChart(Movie[] movies) { |
||||
|
List<Movie> movieList = List.of(movies); |
||||
|
try { |
||||
|
MovieResultDisplay.generateRatingDistributionChart(movieList); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getChartName() { |
||||
|
return "Rating Distribution Chart"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.crawler.chart.impl; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.model.Movie; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class YearDistributionChartGenerator implements ChartGenerator { |
||||
|
@Override |
||||
|
public void generateChart(Movie[] movies) { |
||||
|
List<Movie> movieList = List.of(movies); |
||||
|
try { |
||||
|
MovieResultDisplay.generateYearDistributionChart(movieList); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getChartName() { |
||||
|
return "Year Distribution Chart"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.crawler.chart.impl; |
||||
|
|
||||
|
import com.crawler.chart.ChartGenerator; |
||||
|
import com.crawler.model.Movie; |
||||
|
import com.crawler.ui.MovieResultDisplay; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class YearRatingChartGenerator implements ChartGenerator { |
||||
|
@Override |
||||
|
public void generateChart(Movie[] movies) { |
||||
|
List<Movie> movieList = List.of(movies); |
||||
|
try { |
||||
|
MovieResultDisplay.generateYearRatingChart(movieList); |
||||
|
} catch (IOException e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getChartName() { |
||||
|
return "Year Rating Correlation Chart"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,108 @@ |
|||||
|
package com.crawler.model; |
||||
|
|
||||
|
public class Movie { |
||||
|
private int rank; |
||||
|
private String title; |
||||
|
private double rating; |
||||
|
private int ratingPeople; |
||||
|
private String director; |
||||
|
private String actors; |
||||
|
private String year; |
||||
|
private String country; |
||||
|
private String genre; |
||||
|
private String quote; |
||||
|
|
||||
|
// Getters and Setters
|
||||
|
public int getRank() { |
||||
|
return rank; |
||||
|
} |
||||
|
|
||||
|
public void setRank(int rank) { |
||||
|
this.rank = rank; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public double getRating() { |
||||
|
return rating; |
||||
|
} |
||||
|
|
||||
|
public void setRating(double rating) { |
||||
|
this.rating = rating; |
||||
|
} |
||||
|
|
||||
|
public int getRatingPeople() { |
||||
|
return ratingPeople; |
||||
|
} |
||||
|
|
||||
|
public void setRatingPeople(int ratingPeople) { |
||||
|
this.ratingPeople = ratingPeople; |
||||
|
} |
||||
|
|
||||
|
public String getDirector() { |
||||
|
return director; |
||||
|
} |
||||
|
|
||||
|
public void setDirector(String director) { |
||||
|
this.director = director; |
||||
|
} |
||||
|
|
||||
|
public String getActors() { |
||||
|
return actors; |
||||
|
} |
||||
|
|
||||
|
public void setActors(String actors) { |
||||
|
this.actors = actors; |
||||
|
} |
||||
|
|
||||
|
public String getYear() { |
||||
|
return year; |
||||
|
} |
||||
|
|
||||
|
public void setYear(String year) { |
||||
|
this.year = year; |
||||
|
} |
||||
|
|
||||
|
public String getCountry() { |
||||
|
return country; |
||||
|
} |
||||
|
|
||||
|
public void setCountry(String country) { |
||||
|
this.country = country; |
||||
|
} |
||||
|
|
||||
|
public String getGenre() { |
||||
|
return genre; |
||||
|
} |
||||
|
|
||||
|
public void setGenre(String genre) { |
||||
|
this.genre = genre; |
||||
|
} |
||||
|
|
||||
|
public String getQuote() { |
||||
|
return quote; |
||||
|
} |
||||
|
|
||||
|
public void setQuote(String quote) { |
||||
|
this.quote = quote; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Movie{" + |
||||
|
"rank=" + rank + |
||||
|
", title='" + title + '\'' + |
||||
|
", rating=" + rating + |
||||
|
", ratingPeople=" + ratingPeople + |
||||
|
", director='" + director + '\'' + |
||||
|
", year='" + year + '\'' + |
||||
|
", genre='" + genre + '\'' + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,55 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
|
||||
|
<groupId>com.crawler</groupId> |
||||
|
<artifactId>job-crawler</artifactId> |
||||
|
<version>1.0-SNAPSHOT</version> |
||||
|
|
||||
|
<properties> |
||||
|
<maven.compiler.source>1.8</maven.compiler.source> |
||||
|
<maven.compiler.target>1.8</maven.compiler.target> |
||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
||||
|
</properties> |
||||
|
|
||||
|
<dependencies> |
||||
|
<!-- Jsoup - HTML解析库 --> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>1.17.2</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<!-- JFreeChart - 图表生成库 --> |
||||
|
<dependency> |
||||
|
<groupId>org.jfree</groupId> |
||||
|
<artifactId>jfreechart</artifactId> |
||||
|
<version>1.5.4</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<!-- JCommon - JFreeChart依赖 --> |
||||
|
<dependency> |
||||
|
<groupId>org.jfree</groupId> |
||||
|
<artifactId>jcommon</artifactId> |
||||
|
<version>1.0.24</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
|
||||
|
<build> |
||||
|
<sourceDirectory>src/main/java</sourceDirectory> |
||||
|
<outputDirectory>target/classes</outputDirectory> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<version>3.8.1</version> |
||||
|
<configuration> |
||||
|
<source>${maven.compiler.source}</source> |
||||
|
<target>${maven.compiler.target}</target> |
||||
|
</configuration> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
</project> |
||||
@ -0,0 +1,206 @@ |
|||||
|
package com.crawler.spider; |
||||
|
|
||||
|
import com.crawler.model.Movie; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.concurrent.*; |
||||
|
|
||||
|
public class DoubanSpider { |
||||
|
private static final String BASE_URL = "https://movie.douban.com/top250"; |
||||
|
private static final int MAX_PAGES = 10; |
||||
|
private static final int THREAD_POOL_SIZE = 3; |
||||
|
private static final int REQUEST_DELAY = 1000; |
||||
|
|
||||
|
public List<Movie> crawlMovies() { |
||||
|
List<Movie> movieList = new ArrayList<>(); |
||||
|
ExecutorService executorService = Executors.newFixedThreadPool(THREAD_POOL_SIZE); |
||||
|
List<Future<List<Movie>>> futures = new ArrayList<>(); |
||||
|
|
||||
|
try { |
||||
|
for (int page = 0; page < MAX_PAGES; page++) { |
||||
|
final int currentPage = page; |
||||
|
futures.add(executorService.submit(() -> { |
||||
|
try { |
||||
|
Thread.sleep(REQUEST_DELAY); |
||||
|
return crawlPage(currentPage); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
return new ArrayList<>(); |
||||
|
} |
||||
|
})); |
||||
|
} |
||||
|
|
||||
|
for (Future<List<Movie>> future : futures) { |
||||
|
try { |
||||
|
movieList.addAll(future.get()); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
} finally { |
||||
|
executorService.shutdown(); |
||||
|
} |
||||
|
|
||||
|
return movieList; |
||||
|
} |
||||
|
|
||||
|
private List<Movie> crawlPage(int page) throws IOException { |
||||
|
List<Movie> movieList = new ArrayList<>(); |
||||
|
String url = BASE_URL + "?start=" + (page * 25); |
||||
|
System.out.println("爬取页面: " + url); |
||||
|
|
||||
|
Document document = Jsoup.connect(url) |
||||
|
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") |
||||
|
.timeout(10000) |
||||
|
.get(); |
||||
|
|
||||
|
System.out.println("页面标题: " + document.title()); |
||||
|
|
||||
|
// 选择电影条目
|
||||
|
Elements movieItems = document.select(".grid_view li"); |
||||
|
System.out.println("找到电影条目数: " + movieItems.size()); |
||||
|
|
||||
|
for (Element item : movieItems) { |
||||
|
Movie movie = parseMovie(item); |
||||
|
if (movie != null) { |
||||
|
movieList.add(movie); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println("页面" + (page + 1) + "爬取成功,获取电影数: " + movieList.size()); |
||||
|
return movieList; |
||||
|
} |
||||
|
|
||||
|
private Movie parseMovie(Element item) { |
||||
|
Movie movie = new Movie(); |
||||
|
|
||||
|
try { |
||||
|
// 排名
|
||||
|
Element rankElement = item.selectFirst(".pic em"); |
||||
|
if (rankElement != null) { |
||||
|
movie.setRank(Integer.parseInt(rankElement.text().trim())); |
||||
|
} |
||||
|
|
||||
|
// 标题
|
||||
|
Element titleElement = item.selectFirst(".title"); |
||||
|
if (titleElement != null) { |
||||
|
movie.setTitle(titleElement.text().trim()); |
||||
|
} |
||||
|
|
||||
|
// 评分
|
||||
|
Element ratingElement = item.selectFirst(".rating_num"); |
||||
|
if (ratingElement != null) { |
||||
|
movie.setRating(Double.parseDouble(ratingElement.text().trim())); |
||||
|
} |
||||
|
|
||||
|
// 评价人数
|
||||
|
Element ratingPeopleElement = item.selectFirst(".star span:nth-child(4)"); |
||||
|
if (ratingPeopleElement != null) { |
||||
|
String ratingPeople = ratingPeopleElement.text().trim(); |
||||
|
movie.setRatingPeople(Integer.parseInt(ratingPeople.replaceAll("[^0-9]", ""))); |
||||
|
} |
||||
|
|
||||
|
// 导演和演员
|
||||
|
Element infoElement = item.selectFirst(".bd p:first-child"); |
||||
|
if (infoElement != null) { |
||||
|
String info = infoElement.text().trim(); |
||||
|
|
||||
|
// 提取导演
|
||||
|
if (info.contains("导演:")) { |
||||
|
int directorStart = info.indexOf("导演:") + 3; |
||||
|
int directorEnd = info.indexOf("主演:"); |
||||
|
if (directorEnd == -1) { |
||||
|
directorEnd = info.indexOf(" "); |
||||
|
// 找到第一个数字年份的位置
|
||||
|
for (int i = 0; i < info.length(); i++) { |
||||
|
if (Character.isDigit(info.charAt(i))) { |
||||
|
directorEnd = i; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
if (directorEnd != -1) { |
||||
|
movie.setDirector(info.substring(directorStart, directorEnd).trim()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 提取主演
|
||||
|
if (info.contains("主演:")) { |
||||
|
int actorsStart = info.indexOf("主演:") + 3; |
||||
|
int actorsEnd = info.length(); |
||||
|
// 找到第一个数字年份的位置
|
||||
|
for (int i = actorsStart; i < info.length(); i++) { |
||||
|
if (Character.isDigit(info.charAt(i))) { |
||||
|
actorsEnd = i; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
movie.setActors(info.substring(actorsStart, actorsEnd).trim()); |
||||
|
} |
||||
|
|
||||
|
// 提取年份、国家/地区和类型
|
||||
|
// 找到年份的开始位置(第一个数字)
|
||||
|
int yearStart = -1; |
||||
|
for (int i = 0; i < info.length(); i++) { |
||||
|
if (Character.isDigit(info.charAt(i))) { |
||||
|
yearStart = i; |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (yearStart != -1) { |
||||
|
// 提取年份(4位数字)
|
||||
|
if (yearStart + 4 <= info.length()) { |
||||
|
String year = info.substring(yearStart, yearStart + 4); |
||||
|
if (year.matches("\\d{4}")) { |
||||
|
movie.setYear(year); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 提取国家/地区和类型
|
||||
|
int slashIndex = info.indexOf("/", yearStart); |
||||
|
if (slashIndex != -1) { |
||||
|
// 提取国家/地区
|
||||
|
int nextSlashIndex = info.indexOf("/", slashIndex + 1); |
||||
|
if (nextSlashIndex != -1) { |
||||
|
String country = info.substring(slashIndex + 1, nextSlashIndex).trim(); |
||||
|
movie.setCountry(country); |
||||
|
|
||||
|
// 提取类型
|
||||
|
String genre = info.substring(nextSlashIndex + 1).trim(); |
||||
|
// 取第一个类型
|
||||
|
if (!genre.isEmpty()) { |
||||
|
String[] genres = genre.split(" "); |
||||
|
if (genres.length > 0) { |
||||
|
movie.setGenre(genres[0]); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 简介
|
||||
|
Element quoteElement = item.selectFirst(".inq"); |
||||
|
if (quoteElement != null) { |
||||
|
movie.setQuote(quoteElement.text().trim()); |
||||
|
} |
||||
|
|
||||
|
// 过滤无效电影
|
||||
|
if (movie.getTitle() == null || movie.getTitle().isEmpty()) { |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
return movie; |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
return null; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,216 @@ |
|||||
|
package com.crawler.ui; |
||||
|
|
||||
|
import com.crawler.analysis.MovieAnalyzer; |
||||
|
import com.crawler.model.Movie; |
||||
|
import org.jfree.chart.ChartFactory; |
||||
|
import org.jfree.chart.ChartUtils; |
||||
|
import org.jfree.chart.JFreeChart; |
||||
|
import org.jfree.chart.plot.PlotOrientation; |
||||
|
import org.jfree.data.category.DefaultCategoryDataset; |
||||
|
import org.jfree.data.general.DefaultPieDataset; |
||||
|
import org.jfree.data.statistics.HistogramDataset; |
||||
|
import org.jfree.chart.plot.PiePlot; |
||||
|
import org.jfree.chart.labels.StandardPieSectionLabelGenerator; |
||||
|
import java.text.DecimalFormat; |
||||
|
import java.text.NumberFormat; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class MovieResultDisplay { |
||||
|
// 控制台输出统计结果
|
||||
|
public static void displayResults(List<Movie> movieList) { |
||||
|
System.out.println("\n=== 电影数据统计结果 ==="); |
||||
|
System.out.println("爬取电影总数: " + movieList.size()); |
||||
|
|
||||
|
// 平均评分
|
||||
|
double averageRating = MovieAnalyzer.calculateAverageRating(movieList); |
||||
|
System.out.printf("平均评分: %.2f\n", averageRating); |
||||
|
|
||||
|
// 电影评分分布
|
||||
|
System.out.println("\n=== 电影评分分布 ==="); |
||||
|
Map<Double, Integer> ratingDistribution = MovieAnalyzer.analyzeRatingDistribution(movieList); |
||||
|
for (Map.Entry<Double, Integer> entry : ratingDistribution.entrySet()) { |
||||
|
System.out.printf("评分 %.1f: %d部\n", entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
|
||||
|
// 电影年份分布(最近20年)
|
||||
|
System.out.println("\n=== 电影年份分布(最近20年)==="); |
||||
|
Map<String, Integer> yearDistribution = MovieAnalyzer.analyzeYearDistribution(movieList); |
||||
|
int count = 0; |
||||
|
for (Map.Entry<String, Integer> entry : yearDistribution.entrySet()) { |
||||
|
if (count >= yearDistribution.size() - 20) { // 只显示最近20年
|
||||
|
System.out.printf("%s年: %d部\n", entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
count++; |
||||
|
} |
||||
|
|
||||
|
// 电影类型分布
|
||||
|
System.out.println("\n=== 电影类型分布 ==="); |
||||
|
Map<String, Integer> genreDistribution = MovieAnalyzer.analyzeGenreDistribution(movieList); |
||||
|
genreDistribution.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.limit(10) // 只显示前10种类型
|
||||
|
.forEach(entry -> System.out.printf("%-10s: %d部\n", entry.getKey(), entry.getValue())); |
||||
|
|
||||
|
// 导演作品数量排行
|
||||
|
System.out.println("\n=== 导演作品数量排行 ==="); |
||||
|
Map<String, Integer> directorWorks = MovieAnalyzer.analyzeDirectorWorks(movieList); |
||||
|
count = 0; |
||||
|
for (Map.Entry<String, Integer> entry : directorWorks.entrySet()) { |
||||
|
if (count < 10) { // 只显示前10位导演
|
||||
|
System.out.printf("%-20s: %d部\n", entry.getKey(), entry.getValue()); |
||||
|
count++; |
||||
|
} else { |
||||
|
break; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 评分与年份相关性
|
||||
|
System.out.println("\n=== 评分与年份相关性 ==="); |
||||
|
Map<String, Double> yearRatingCorrelation = MovieAnalyzer.analyzeYearRatingCorrelation(movieList); |
||||
|
for (Map.Entry<String, Double> entry : yearRatingCorrelation.entrySet()) { |
||||
|
System.out.printf("%s年: 平均评分 %.2f\n", entry.getKey(), entry.getValue()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 生成电影评分分布直方图
|
||||
|
public static void generateRatingDistributionChart(List<Movie> movieList) throws IOException { |
||||
|
Map<Double, Integer> ratingDistribution = MovieAnalyzer.analyzeRatingDistribution(movieList); |
||||
|
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
||||
|
|
||||
|
for (Map.Entry<Double, Integer> entry : ratingDistribution.entrySet()) { |
||||
|
dataset.addValue(entry.getValue(), "Count", entry.getKey().toString()); |
||||
|
} |
||||
|
|
||||
|
JFreeChart chart = ChartFactory.createBarChart( |
||||
|
"Movie Rating Distribution", |
||||
|
"Rating", |
||||
|
"Count", |
||||
|
dataset, |
||||
|
PlotOrientation.VERTICAL, |
||||
|
true, |
||||
|
true, |
||||
|
false |
||||
|
); |
||||
|
|
||||
|
ChartUtils.saveChartAsPNG(new File("movie_rating_distribution.png"), chart, 800, 600); |
||||
|
System.out.println("电影评分分布图表已保存为 movie_rating_distribution.png"); |
||||
|
} |
||||
|
|
||||
|
// 生成电影年份分布折线图
|
||||
|
public static void generateYearDistributionChart(List<Movie> movieList) throws IOException { |
||||
|
Map<String, Integer> yearDistribution = MovieAnalyzer.analyzeYearDistribution(movieList); |
||||
|
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
||||
|
|
||||
|
System.out.println("年份分布数据:"); |
||||
|
for (Map.Entry<String, Integer> entry : yearDistribution.entrySet()) { |
||||
|
System.out.println("年份: '" + entry.getKey() + "', 数量: " + entry.getValue()); |
||||
|
// 尝试提取年份数字
|
||||
|
String year = entry.getKey(); |
||||
|
// 提取4位数字作为年份
|
||||
|
String yearMatch = year.replaceAll("[^0-9]", ""); |
||||
|
if (yearMatch.length() >= 4) { |
||||
|
yearMatch = yearMatch.substring(0, 4); |
||||
|
dataset.addValue(entry.getValue(), "Count", yearMatch); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
JFreeChart chart = ChartFactory.createLineChart( |
||||
|
"Movie Year Distribution", |
||||
|
"Year", |
||||
|
"Count", |
||||
|
dataset, |
||||
|
PlotOrientation.VERTICAL, |
||||
|
true, |
||||
|
true, |
||||
|
false |
||||
|
); |
||||
|
|
||||
|
ChartUtils.saveChartAsPNG(new File("movie_year_distribution.png"), chart, 800, 600); |
||||
|
System.out.println("电影年份分布图表已保存为 movie_year_distribution.png"); |
||||
|
} |
||||
|
|
||||
|
// 生成电影类型分布饼图
|
||||
|
public static void generateGenreDistributionChart(List<Movie> movieList) throws IOException { |
||||
|
Map<String, Integer> genreDistribution = MovieAnalyzer.analyzeGenreDistribution(movieList); |
||||
|
DefaultPieDataset dataset = new DefaultPieDataset(); |
||||
|
|
||||
|
// 只显示前10种类型
|
||||
|
genreDistribution.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
||||
|
.limit(10) |
||||
|
.forEach(entry -> { |
||||
|
// 使用英文标签避免中文显示问题
|
||||
|
String englishLabel = getEnglishGenre(entry.getKey()) + " (" + entry.getValue() + ")"; |
||||
|
dataset.setValue(englishLabel, entry.getValue()); |
||||
|
}); |
||||
|
|
||||
|
JFreeChart chart = ChartFactory.createPieChart( |
||||
|
"Movie Genre Distribution", // 使用英文标题
|
||||
|
dataset, |
||||
|
true, // 显示图例
|
||||
|
true, // 显示工具提示
|
||||
|
false // 不显示URL
|
||||
|
); |
||||
|
|
||||
|
ChartUtils.saveChartAsPNG(new File("movie_genre_distribution.png"), chart, 800, 600); |
||||
|
System.out.println("电影类型分布图表已保存为 movie_genre_distribution.png"); |
||||
|
} |
||||
|
|
||||
|
// 将中文类型转换为英文
|
||||
|
private static String getEnglishGenre(String chineseGenre) { |
||||
|
switch (chineseGenre) { |
||||
|
case "冒险": return "Adventure"; |
||||
|
case "奇幻": return "Fantasy"; |
||||
|
case "爱情": return "Romance"; |
||||
|
case "惊悚": return "Thriller"; |
||||
|
case "动画": return "Animation"; |
||||
|
case "悬疑": return "Mystery"; |
||||
|
case "家庭": return "Family"; |
||||
|
case "犯罪": return "Crime"; |
||||
|
case "同性": return "LGBTQ+"; |
||||
|
case "历史": return "History"; |
||||
|
case "剧情": return "Drama"; |
||||
|
case "动作": return "Action"; |
||||
|
case "喜剧": return "Comedy"; |
||||
|
case "科幻": return "Sci-Fi"; |
||||
|
default: return chineseGenre; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// 生成评分与年份相关性图表
|
||||
|
public static void generateYearRatingChart(List<Movie> movieList) throws IOException { |
||||
|
Map<String, Double> yearRatingCorrelation = MovieAnalyzer.analyzeYearRatingCorrelation(movieList); |
||||
|
DefaultCategoryDataset dataset = new DefaultCategoryDataset(); |
||||
|
|
||||
|
System.out.println("评分与年份相关性数据:"); |
||||
|
for (Map.Entry<String, Double> entry : yearRatingCorrelation.entrySet()) { |
||||
|
System.out.println("年份: '" + entry.getKey() + "', 平均评分: " + entry.getValue()); |
||||
|
// 尝试提取年份数字
|
||||
|
String year = entry.getKey(); |
||||
|
// 提取4位数字作为年份
|
||||
|
String yearMatch = year.replaceAll("[^0-9]", ""); |
||||
|
if (yearMatch.length() >= 4) { |
||||
|
yearMatch = yearMatch.substring(0, 4); |
||||
|
dataset.addValue(entry.getValue(), "Avg Rating", yearMatch); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
JFreeChart chart = ChartFactory.createLineChart( |
||||
|
"Year vs Rating Correlation", |
||||
|
"Year", |
||||
|
"Average Rating", |
||||
|
dataset, |
||||
|
PlotOrientation.VERTICAL, |
||||
|
true, |
||||
|
true, |
||||
|
false |
||||
|
); |
||||
|
|
||||
|
ChartUtils.saveChartAsPNG(new File("movie_year_rating.png"), chart, 800, 600); |
||||
|
System.out.println("评分与年份相关性图表已保存为 movie_year_rating.png"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,91 @@ |
|||||
|
package com.crawler.utils; |
||||
|
|
||||
|
import com.crawler.model.Movie; |
||||
|
|
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class DataUtils { |
||||
|
// 清洗电影数据
|
||||
|
public static Movie cleanMovie(Movie movie) { |
||||
|
if (movie == null) return null; |
||||
|
|
||||
|
// 清洗标题
|
||||
|
if (movie.getTitle() != null) { |
||||
|
movie.setTitle(movie.getTitle().trim().replaceAll("\\s+", " ")); |
||||
|
} |
||||
|
|
||||
|
// 清洗导演
|
||||
|
if (movie.getDirector() != null) { |
||||
|
movie.setDirector(movie.getDirector().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗演员
|
||||
|
if (movie.getActors() != null) { |
||||
|
movie.setActors(movie.getActors().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗年份
|
||||
|
if (movie.getYear() != null) { |
||||
|
movie.setYear(movie.getYear().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗国家/地区
|
||||
|
if (movie.getCountry() != null) { |
||||
|
movie.setCountry(movie.getCountry().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗类型
|
||||
|
if (movie.getGenre() != null) { |
||||
|
movie.setGenre(movie.getGenre().trim()); |
||||
|
} |
||||
|
|
||||
|
// 清洗简介
|
||||
|
if (movie.getQuote() != null) { |
||||
|
movie.setQuote(movie.getQuote().trim().replaceAll("\\s+", " ")); |
||||
|
} |
||||
|
|
||||
|
return movie; |
||||
|
} |
||||
|
|
||||
|
// 写入电影数据到CSV文件
|
||||
|
public static void writeMovieToCSV(List<Movie> movieList, String filePath) throws IOException { |
||||
|
// 添加时间戳避免文件冲突
|
||||
|
String timestamp = String.valueOf(System.currentTimeMillis()); |
||||
|
String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv"); |
||||
|
|
||||
|
FileWriter writer = new FileWriter(actualFilePath); |
||||
|
// 写入表头
|
||||
|
writer.write("排名,标题,评分,评价人数,导演,演员,年份,国家/地区,类型,简介\n"); |
||||
|
|
||||
|
// 写入数据
|
||||
|
for (Movie movie : movieList) { |
||||
|
if (movie != null) { |
||||
|
writer.write(movie.getRank() + ","); |
||||
|
writer.write(escapeCsv(movie.getTitle()) + ","); |
||||
|
writer.write(movie.getRating() + ","); |
||||
|
writer.write(movie.getRatingPeople() + ","); |
||||
|
writer.write(escapeCsv(movie.getDirector()) + ","); |
||||
|
writer.write(escapeCsv(movie.getActors()) + ","); |
||||
|
writer.write(escapeCsv(movie.getYear()) + ","); |
||||
|
writer.write(escapeCsv(movie.getCountry()) + ","); |
||||
|
writer.write(escapeCsv(movie.getGenre()) + ","); |
||||
|
writer.write(escapeCsv(movie.getQuote()) + "\n"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
writer.close(); |
||||
|
System.out.println("数据已保存到 " + actualFilePath); |
||||
|
} |
||||
|
|
||||
|
// 转义CSV特殊字符
|
||||
|
private static String escapeCsv(String value) { |
||||
|
if (value == null) return ""; |
||||
|
if (value.contains(",") || value.contains("\"")) { |
||||
|
value = value.replaceAll("\"", "\"\""); |
||||
|
return "\"" + value + "\""; |
||||
|
} |
||||
|
return value; |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue