You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

169 lines
7.1 KiB

import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
import org.jfree.data.category.DefaultCategoryDataset;
import org.jfree.data.general.DefaultPieDataset;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
public class Main {
public static void main(String[] args) {
try {
// 1. 爬取电影数据
MovieCrawler crawler = new MovieCrawler();
List<Movie> movies = crawler.crawlTopMovies(50); // 爬取50部电影
// 2. 数据清洗
List<Movie> cleanedMovies = cleanData(movies);
// 3. 数据存储
saveToCSV(cleanedMovies, "movies.csv");
// 4. 数据分析
analyzeData(cleanedMovies);
// 5. 结果展示
displayResults(cleanedMovies);
generateCharts(cleanedMovies);
System.out.println("爬取完成!共获取了 " + cleanedMovies.size() + " 部电影数据。");
} catch (Exception e) {
e.printStackTrace();
}
}
private static List<Movie> cleanData(List<Movie> movies) {
return movies.stream()
.map(movie -> {
// 去空格
movie.setTitle(movie.getTitle().trim());
movie.setGenre(movie.getGenre().trim());
movie.setDirector(movie.getDirector().trim());
movie.setActors(movie.getActors().trim());
movie.setSynopsis(movie.getSynopsis().trim());
return movie;
})
.collect(Collectors.toList());
}
private static void saveToCSV(List<Movie> movies, String fileName) throws IOException {
try (FileWriter writer = new FileWriter(fileName)) {
// 写入表头
writer.write("Title,Year,Rating,Genre,Director,Actors,Synopsis\n");
// 写入数据
for (Movie movie : movies) {
writer.write(String.format("%s,%d,%.1f,%s,%s,%s,%s\n",
escapeCSV(movie.getTitle()),
movie.getYear(),
movie.getRating(),
escapeCSV(movie.getGenre()),
escapeCSV(movie.getDirector()),
escapeCSV(movie.getActors()),
escapeCSV(movie.getSynopsis())));
}
}
}
private static String escapeCSV(String value) {
if (value == null) return "";
if (value.contains(",") || value.contains("\n") || value.contains("\"")) {
value = value.replace("\"", "\"\"");
return "\"" + value + "\"";
}
return value;
}
private static void analyzeData(List<Movie> movies) {
System.out.println("\n=== 数据分析结果 ===");
// 1. 评分分布
System.out.println("\n1. 评分分布:");
Map<Double, Long> ratingDistribution = movies.stream()
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting()));
ratingDistribution.entrySet().stream()
.sorted(Map.Entry.comparingByKey())
.forEach(entry -> System.out.printf("评分 %.1f: %d 部\n", entry.getKey(), entry.getValue()));
// 2. 年份分布
System.out.println("\n2. 年份分布:");
Map<Integer, Long> yearDistribution = movies.stream()
.collect(Collectors.groupingBy(Movie::getYear, Collectors.counting()));
yearDistribution.entrySet().stream()
.sorted(Map.Entry.comparingByKey())
.forEach(entry -> System.out.printf("年份 %d: %d 部\n", entry.getKey(), entry.getValue()));
// 3. 导演作品数排行
System.out.println("\n3. 导演作品数排行:");
Map<String, Long> directorCount = movies.stream()
.collect(Collectors.groupingBy(Movie::getDirector, Collectors.counting()));
directorCount.entrySet().stream()
.sorted(Map.Entry.<String, Long>comparingByValue().reversed())
.limit(10)
.forEach(entry -> System.out.printf("%s: %d 部\n", entry.getKey(), entry.getValue()));
// 4. 平均评分
double averageRating = movies.stream()
.mapToDouble(Movie::getRating)
.average()
.orElse(0);
System.out.printf("\n4. 平均评分:%.2f\n", averageRating);
}
private static void displayResults(List<Movie> movies) {
System.out.println("\n=== 电影数据列表 ===");
System.out.printf("%-50s %-10s %-10s %-30s %-30s\n", "Title", "Year", "Rating", "Genre", "Director");
System.out.println("-------------------------------------------------------------------------------------------------------------------------------");
for (Movie movie : movies) {
System.out.printf("%-50s %-10d %-10.1f %-30s %-30s\n",
truncate(movie.getTitle(), 50),
movie.getYear(),
movie.getRating(),
truncate(movie.getGenre(), 30),
truncate(movie.getDirector(), 30));
}
}
private static String truncate(String text, int maxLength) {
return text.length() > maxLength ? text.substring(0, maxLength - 3) + "..." : text;
}
private static void generateCharts(List<Movie> movies) throws IOException {
// 1. 评分分布饼图
DefaultPieDataset ratingDataset = new DefaultPieDataset();
Map<Double, Long> ratingDistribution = movies.stream()
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting()));
ratingDistribution.forEach((rating, count) -> ratingDataset.setValue(String.valueOf(rating), count));
JFreeChart ratingChart = ChartFactory.createPieChart(
"电影评分分布",
ratingDataset,
true,
true,
false
);
ChartUtils.saveChartAsPNG(new File("rating_distribution.png"), ratingChart, 800, 600);
// 2. 年份与评分关系图
DefaultCategoryDataset yearRatingDataset = new DefaultCategoryDataset();
Map<Integer, Double> yearAverageRating = movies.stream()
.collect(Collectors.groupingBy(Movie::getYear,
Collectors.averagingDouble(Movie::getRating)));
yearAverageRating.forEach((year, avgRating) -> yearRatingDataset.addValue(avgRating, "评分", String.valueOf(year)));
JFreeChart yearRatingChart = ChartFactory.createBarChart(
"年份与平均评分关系",
"年份",
"平均评分",
yearRatingDataset
);
ChartUtils.saveChartAsPNG(new File("year_rating_relation.png"), yearRatingChart, 800, 600);
System.out.println("\n图表已生成:rating_distribution.png 和 year_rating_relation.png");
}
}