import org.jfree.chart.ChartFactory; import org.jfree.chart.ChartUtils; import org.jfree.chart.JFreeChart; import org.jfree.data.category.DefaultCategoryDataset; import org.jfree.data.general.DefaultPieDataset; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.*; import java.util.stream.Collectors; public class Main { public static void main(String[] args) { try { // 1. 爬取电影数据 MovieCrawler crawler = new MovieCrawler(); List movies = crawler.crawlTopMovies(50); // 爬取50部电影 // 2. 数据清洗 List cleanedMovies = cleanData(movies); // 3. 数据存储 saveToCSV(cleanedMovies, "movies.csv"); // 4. 数据分析 analyzeData(cleanedMovies); // 5. 结果展示 displayResults(cleanedMovies); generateCharts(cleanedMovies); System.out.println("爬取完成!共获取了 " + cleanedMovies.size() + " 部电影数据。"); } catch (Exception e) { e.printStackTrace(); } } private static List cleanData(List movies) { return movies.stream() .map(movie -> { // 去空格 movie.setTitle(movie.getTitle().trim()); movie.setGenre(movie.getGenre().trim()); movie.setDirector(movie.getDirector().trim()); movie.setActors(movie.getActors().trim()); movie.setSynopsis(movie.getSynopsis().trim()); return movie; }) .collect(Collectors.toList()); } private static void saveToCSV(List movies, String fileName) throws IOException { try (FileWriter writer = new FileWriter(fileName)) { // 写入表头 writer.write("Title,Year,Rating,Genre,Director,Actors,Synopsis\n"); // 写入数据 for (Movie movie : movies) { writer.write(String.format("%s,%d,%.1f,%s,%s,%s,%s\n", escapeCSV(movie.getTitle()), movie.getYear(), movie.getRating(), escapeCSV(movie.getGenre()), escapeCSV(movie.getDirector()), escapeCSV(movie.getActors()), escapeCSV(movie.getSynopsis()))); } } } private static String escapeCSV(String value) { if (value == null) return ""; if (value.contains(",") || value.contains("\n") || value.contains("\"")) { value = value.replace("\"", "\"\""); return "\"" + value + "\""; } return value; } private static void analyzeData(List movies) { System.out.println("\n=== 数据分析结果 ==="); // 1. 评分分布 System.out.println("\n1. 评分分布:"); Map ratingDistribution = movies.stream() .collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); ratingDistribution.entrySet().stream() .sorted(Map.Entry.comparingByKey()) .forEach(entry -> System.out.printf("评分 %.1f: %d 部\n", entry.getKey(), entry.getValue())); // 2. 年份分布 System.out.println("\n2. 年份分布:"); Map yearDistribution = movies.stream() .collect(Collectors.groupingBy(Movie::getYear, Collectors.counting())); yearDistribution.entrySet().stream() .sorted(Map.Entry.comparingByKey()) .forEach(entry -> System.out.printf("年份 %d: %d 部\n", entry.getKey(), entry.getValue())); // 3. 导演作品数排行 System.out.println("\n3. 导演作品数排行:"); Map directorCount = movies.stream() .collect(Collectors.groupingBy(Movie::getDirector, Collectors.counting())); directorCount.entrySet().stream() .sorted(Map.Entry.comparingByValue().reversed()) .limit(10) .forEach(entry -> System.out.printf("%s: %d 部\n", entry.getKey(), entry.getValue())); // 4. 平均评分 double averageRating = movies.stream() .mapToDouble(Movie::getRating) .average() .orElse(0); System.out.printf("\n4. 平均评分:%.2f\n", averageRating); } private static void displayResults(List movies) { System.out.println("\n=== 电影数据列表 ==="); System.out.printf("%-50s %-10s %-10s %-30s %-30s\n", "Title", "Year", "Rating", "Genre", "Director"); System.out.println("-------------------------------------------------------------------------------------------------------------------------------"); for (Movie movie : movies) { System.out.printf("%-50s %-10d %-10.1f %-30s %-30s\n", truncate(movie.getTitle(), 50), movie.getYear(), movie.getRating(), truncate(movie.getGenre(), 30), truncate(movie.getDirector(), 30)); } } private static String truncate(String text, int maxLength) { return text.length() > maxLength ? text.substring(0, maxLength - 3) + "..." : text; } private static void generateCharts(List movies) throws IOException { // 1. 评分分布饼图 DefaultPieDataset ratingDataset = new DefaultPieDataset(); Map ratingDistribution = movies.stream() .collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); ratingDistribution.forEach((rating, count) -> ratingDataset.setValue(String.valueOf(rating), count)); JFreeChart ratingChart = ChartFactory.createPieChart( "电影评分分布", ratingDataset, true, true, false ); ChartUtils.saveChartAsPNG(new File("rating_distribution.png"), ratingChart, 800, 600); // 2. 年份与评分关系图 DefaultCategoryDataset yearRatingDataset = new DefaultCategoryDataset(); Map yearAverageRating = movies.stream() .collect(Collectors.groupingBy(Movie::getYear, Collectors.averagingDouble(Movie::getRating))); yearAverageRating.forEach((year, avgRating) -> yearRatingDataset.addValue(avgRating, "评分", String.valueOf(year))); JFreeChart yearRatingChart = ChartFactory.createBarChart( "年份与平均评分关系", "年份", "平均评分", yearRatingDataset ); ChartUtils.saveChartAsPNG(new File("year_rating_relation.png"), yearRatingChart, 800, 600); System.out.println("\n图表已生成:rating_distribution.png 和 year_rating_relation.png"); } }