You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
169 lines
7.1 KiB
169 lines
7.1 KiB
import org.jfree.chart.ChartFactory;
|
|
import org.jfree.chart.ChartUtils;
|
|
import org.jfree.chart.JFreeChart;
|
|
import org.jfree.data.category.DefaultCategoryDataset;
|
|
import org.jfree.data.general.DefaultPieDataset;
|
|
|
|
import java.io.File;
|
|
import java.io.FileWriter;
|
|
import java.io.IOException;
|
|
import java.util.*;
|
|
import java.util.stream.Collectors;
|
|
|
|
public class Main {
|
|
public static void main(String[] args) {
|
|
try {
|
|
// 1. 爬取电影数据
|
|
MovieCrawler crawler = new MovieCrawler();
|
|
List<Movie> movies = crawler.crawlTopMovies(50); // 爬取50部电影
|
|
|
|
// 2. 数据清洗
|
|
List<Movie> cleanedMovies = cleanData(movies);
|
|
|
|
// 3. 数据存储
|
|
saveToCSV(cleanedMovies, "movies.csv");
|
|
|
|
// 4. 数据分析
|
|
analyzeData(cleanedMovies);
|
|
|
|
// 5. 结果展示
|
|
displayResults(cleanedMovies);
|
|
generateCharts(cleanedMovies);
|
|
|
|
System.out.println("爬取完成!共获取了 " + cleanedMovies.size() + " 部电影数据。");
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
|
|
private static List<Movie> cleanData(List<Movie> movies) {
|
|
return movies.stream()
|
|
.map(movie -> {
|
|
// 去空格
|
|
movie.setTitle(movie.getTitle().trim());
|
|
movie.setGenre(movie.getGenre().trim());
|
|
movie.setDirector(movie.getDirector().trim());
|
|
movie.setActors(movie.getActors().trim());
|
|
movie.setSynopsis(movie.getSynopsis().trim());
|
|
return movie;
|
|
})
|
|
.collect(Collectors.toList());
|
|
}
|
|
|
|
private static void saveToCSV(List<Movie> movies, String fileName) throws IOException {
|
|
try (FileWriter writer = new FileWriter(fileName)) {
|
|
// 写入表头
|
|
writer.write("Title,Year,Rating,Genre,Director,Actors,Synopsis\n");
|
|
|
|
// 写入数据
|
|
for (Movie movie : movies) {
|
|
writer.write(String.format("%s,%d,%.1f,%s,%s,%s,%s\n",
|
|
escapeCSV(movie.getTitle()),
|
|
movie.getYear(),
|
|
movie.getRating(),
|
|
escapeCSV(movie.getGenre()),
|
|
escapeCSV(movie.getDirector()),
|
|
escapeCSV(movie.getActors()),
|
|
escapeCSV(movie.getSynopsis())));
|
|
}
|
|
}
|
|
}
|
|
|
|
private static String escapeCSV(String value) {
|
|
if (value == null) return "";
|
|
if (value.contains(",") || value.contains("\n") || value.contains("\"")) {
|
|
value = value.replace("\"", "\"\"");
|
|
return "\"" + value + "\"";
|
|
}
|
|
return value;
|
|
}
|
|
|
|
private static void analyzeData(List<Movie> movies) {
|
|
System.out.println("\n=== 数据分析结果 ===");
|
|
|
|
// 1. 评分分布
|
|
System.out.println("\n1. 评分分布:");
|
|
Map<Double, Long> ratingDistribution = movies.stream()
|
|
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting()));
|
|
ratingDistribution.entrySet().stream()
|
|
.sorted(Map.Entry.comparingByKey())
|
|
.forEach(entry -> System.out.printf("评分 %.1f: %d 部\n", entry.getKey(), entry.getValue()));
|
|
|
|
// 2. 年份分布
|
|
System.out.println("\n2. 年份分布:");
|
|
Map<Integer, Long> yearDistribution = movies.stream()
|
|
.collect(Collectors.groupingBy(Movie::getYear, Collectors.counting()));
|
|
yearDistribution.entrySet().stream()
|
|
.sorted(Map.Entry.comparingByKey())
|
|
.forEach(entry -> System.out.printf("年份 %d: %d 部\n", entry.getKey(), entry.getValue()));
|
|
|
|
// 3. 导演作品数排行
|
|
System.out.println("\n3. 导演作品数排行:");
|
|
Map<String, Long> directorCount = movies.stream()
|
|
.collect(Collectors.groupingBy(Movie::getDirector, Collectors.counting()));
|
|
directorCount.entrySet().stream()
|
|
.sorted(Map.Entry.<String, Long>comparingByValue().reversed())
|
|
.limit(10)
|
|
.forEach(entry -> System.out.printf("%s: %d 部\n", entry.getKey(), entry.getValue()));
|
|
|
|
// 4. 平均评分
|
|
double averageRating = movies.stream()
|
|
.mapToDouble(Movie::getRating)
|
|
.average()
|
|
.orElse(0);
|
|
System.out.printf("\n4. 平均评分:%.2f\n", averageRating);
|
|
}
|
|
|
|
private static void displayResults(List<Movie> movies) {
|
|
System.out.println("\n=== 电影数据列表 ===");
|
|
System.out.printf("%-50s %-10s %-10s %-30s %-30s\n", "Title", "Year", "Rating", "Genre", "Director");
|
|
System.out.println("-------------------------------------------------------------------------------------------------------------------------------");
|
|
|
|
for (Movie movie : movies) {
|
|
System.out.printf("%-50s %-10d %-10.1f %-30s %-30s\n",
|
|
truncate(movie.getTitle(), 50),
|
|
movie.getYear(),
|
|
movie.getRating(),
|
|
truncate(movie.getGenre(), 30),
|
|
truncate(movie.getDirector(), 30));
|
|
}
|
|
}
|
|
|
|
private static String truncate(String text, int maxLength) {
|
|
return text.length() > maxLength ? text.substring(0, maxLength - 3) + "..." : text;
|
|
}
|
|
|
|
private static void generateCharts(List<Movie> movies) throws IOException {
|
|
// 1. 评分分布饼图
|
|
DefaultPieDataset ratingDataset = new DefaultPieDataset();
|
|
Map<Double, Long> ratingDistribution = movies.stream()
|
|
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting()));
|
|
ratingDistribution.forEach((rating, count) -> ratingDataset.setValue(String.valueOf(rating), count));
|
|
|
|
JFreeChart ratingChart = ChartFactory.createPieChart(
|
|
"电影评分分布",
|
|
ratingDataset,
|
|
true,
|
|
true,
|
|
false
|
|
);
|
|
ChartUtils.saveChartAsPNG(new File("rating_distribution.png"), ratingChart, 800, 600);
|
|
|
|
// 2. 年份与评分关系图
|
|
DefaultCategoryDataset yearRatingDataset = new DefaultCategoryDataset();
|
|
Map<Integer, Double> yearAverageRating = movies.stream()
|
|
.collect(Collectors.groupingBy(Movie::getYear,
|
|
Collectors.averagingDouble(Movie::getRating)));
|
|
yearAverageRating.forEach((year, avgRating) -> yearRatingDataset.addValue(avgRating, "评分", String.valueOf(year)));
|
|
|
|
JFreeChart yearRatingChart = ChartFactory.createBarChart(
|
|
"年份与平均评分关系",
|
|
"年份",
|
|
"平均评分",
|
|
yearRatingDataset
|
|
);
|
|
ChartUtils.saveChartAsPNG(new File("year_rating_relation.png"), yearRatingChart, 800, 600);
|
|
|
|
System.out.println("\n图表已生成:rating_distribution.png 和 year_rating_relation.png");
|
|
}
|
|
}
|
|
|