diff --git a/W3/Main.class b/W3/Main.class new file mode 100644 index 0000000..faa23ec Binary files /dev/null and b/W3/Main.class differ diff --git a/W3/Main.java b/W3/Main.java new file mode 100644 index 0000000..51537f5 --- /dev/null +++ b/W3/Main.java @@ -0,0 +1,169 @@ +import org.jfree.chart.ChartFactory; +import org.jfree.chart.ChartUtils; +import org.jfree.chart.JFreeChart; +import org.jfree.data.category.DefaultCategoryDataset; +import org.jfree.data.general.DefaultPieDataset; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.util.*; +import java.util.stream.Collectors; + +public class Main { + public static void main(String[] args) { + try { + // 1. 爬取电影数据 + MovieCrawler crawler = new MovieCrawler(); + List movies = crawler.crawlTopMovies(50); // 爬取50部电影 + + // 2. 数据清洗 + List cleanedMovies = cleanData(movies); + + // 3. 数据存储 + saveToCSV(cleanedMovies, "movies.csv"); + + // 4. 数据分析 + analyzeData(cleanedMovies); + + // 5. 结果展示 + displayResults(cleanedMovies); + generateCharts(cleanedMovies); + + System.out.println("爬取完成!共获取了 " + cleanedMovies.size() + " 部电影数据。"); + } catch (Exception e) { + e.printStackTrace(); + } + } + + private static List cleanData(List movies) { + return movies.stream() + .map(movie -> { + // 去空格 + movie.setTitle(movie.getTitle().trim()); + movie.setGenre(movie.getGenre().trim()); + movie.setDirector(movie.getDirector().trim()); + movie.setActors(movie.getActors().trim()); + movie.setSynopsis(movie.getSynopsis().trim()); + return movie; + }) + .collect(Collectors.toList()); + } + + private static void saveToCSV(List movies, String fileName) throws IOException { + try (FileWriter writer = new FileWriter(fileName)) { + // 写入表头 + writer.write("Title,Year,Rating,Genre,Director,Actors,Synopsis\n"); + + // 写入数据 + for (Movie movie : movies) { + writer.write(String.format("%s,%d,%.1f,%s,%s,%s,%s\n", + escapeCSV(movie.getTitle()), + movie.getYear(), + movie.getRating(), + escapeCSV(movie.getGenre()), + escapeCSV(movie.getDirector()), + escapeCSV(movie.getActors()), + escapeCSV(movie.getSynopsis()))); + } + } + } + + private static String escapeCSV(String value) { + if (value == null) return ""; + if (value.contains(",") || value.contains("\n") || value.contains("\"")) { + value = value.replace("\"", "\"\""); + return "\"" + value + "\""; + } + return value; + } + + private static void analyzeData(List movies) { + System.out.println("\n=== 数据分析结果 ==="); + + // 1. 评分分布 + System.out.println("\n1. 评分分布:"); + Map ratingDistribution = movies.stream() + .collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); + ratingDistribution.entrySet().stream() + .sorted(Map.Entry.comparingByKey()) + .forEach(entry -> System.out.printf("评分 %.1f: %d 部\n", entry.getKey(), entry.getValue())); + + // 2. 年份分布 + System.out.println("\n2. 年份分布:"); + Map yearDistribution = movies.stream() + .collect(Collectors.groupingBy(Movie::getYear, Collectors.counting())); + yearDistribution.entrySet().stream() + .sorted(Map.Entry.comparingByKey()) + .forEach(entry -> System.out.printf("年份 %d: %d 部\n", entry.getKey(), entry.getValue())); + + // 3. 导演作品数排行 + System.out.println("\n3. 导演作品数排行:"); + Map directorCount = movies.stream() + .collect(Collectors.groupingBy(Movie::getDirector, Collectors.counting())); + directorCount.entrySet().stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .limit(10) + .forEach(entry -> System.out.printf("%s: %d 部\n", entry.getKey(), entry.getValue())); + + // 4. 平均评分 + double averageRating = movies.stream() + .mapToDouble(Movie::getRating) + .average() + .orElse(0); + System.out.printf("\n4. 平均评分:%.2f\n", averageRating); + } + + private static void displayResults(List movies) { + System.out.println("\n=== 电影数据列表 ==="); + System.out.printf("%-50s %-10s %-10s %-30s %-30s\n", "Title", "Year", "Rating", "Genre", "Director"); + System.out.println("-------------------------------------------------------------------------------------------------------------------------------"); + + for (Movie movie : movies) { + System.out.printf("%-50s %-10d %-10.1f %-30s %-30s\n", + truncate(movie.getTitle(), 50), + movie.getYear(), + movie.getRating(), + truncate(movie.getGenre(), 30), + truncate(movie.getDirector(), 30)); + } + } + + private static String truncate(String text, int maxLength) { + return text.length() > maxLength ? text.substring(0, maxLength - 3) + "..." : text; + } + + private static void generateCharts(List movies) throws IOException { + // 1. 评分分布饼图 + DefaultPieDataset ratingDataset = new DefaultPieDataset(); + Map ratingDistribution = movies.stream() + .collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); + ratingDistribution.forEach((rating, count) -> ratingDataset.setValue(String.valueOf(rating), count)); + + JFreeChart ratingChart = ChartFactory.createPieChart( + "电影评分分布", + ratingDataset, + true, + true, + false + ); + ChartUtils.saveChartAsPNG(new File("rating_distribution.png"), ratingChart, 800, 600); + + // 2. 年份与评分关系图 + DefaultCategoryDataset yearRatingDataset = new DefaultCategoryDataset(); + Map yearAverageRating = movies.stream() + .collect(Collectors.groupingBy(Movie::getYear, + Collectors.averagingDouble(Movie::getRating))); + yearAverageRating.forEach((year, avgRating) -> yearRatingDataset.addValue(avgRating, "评分", String.valueOf(year))); + + JFreeChart yearRatingChart = ChartFactory.createBarChart( + "年份与平均评分关系", + "年份", + "平均评分", + yearRatingDataset + ); + ChartUtils.saveChartAsPNG(new File("year_rating_relation.png"), yearRatingChart, 800, 600); + + System.out.println("\n图表已生成:rating_distribution.png 和 year_rating_relation.png"); + } +} diff --git a/W3/jcommon-1.0.24.jar b/W3/jcommon-1.0.24.jar new file mode 100644 index 0000000..4f1015d Binary files /dev/null and b/W3/jcommon-1.0.24.jar differ diff --git a/W3/jfreechart-1.5.4.jar b/W3/jfreechart-1.5.4.jar new file mode 100644 index 0000000..ddd7c23 Binary files /dev/null and b/W3/jfreechart-1.5.4.jar differ diff --git a/W3/jsoup-1.17.2.jar b/W3/jsoup-1.17.2.jar new file mode 100644 index 0000000..52ae16d Binary files /dev/null and b/W3/jsoup-1.17.2.jar differ