5 changed files with 169 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,169 @@ |
|||||
|
import org.jfree.chart.ChartFactory; |
||||
|
import org.jfree.chart.ChartUtils; |
||||
|
import org.jfree.chart.JFreeChart; |
||||
|
import org.jfree.data.category.DefaultCategoryDataset; |
||||
|
import org.jfree.data.general.DefaultPieDataset; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.util.*; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
public class Main { |
||||
|
public static void main(String[] args) { |
||||
|
try { |
||||
|
// 1. 爬取电影数据
|
||||
|
MovieCrawler crawler = new MovieCrawler(); |
||||
|
List<Movie> movies = crawler.crawlTopMovies(50); // 爬取50部电影
|
||||
|
|
||||
|
// 2. 数据清洗
|
||||
|
List<Movie> cleanedMovies = cleanData(movies); |
||||
|
|
||||
|
// 3. 数据存储
|
||||
|
saveToCSV(cleanedMovies, "movies.csv"); |
||||
|
|
||||
|
// 4. 数据分析
|
||||
|
analyzeData(cleanedMovies); |
||||
|
|
||||
|
// 5. 结果展示
|
||||
|
displayResults(cleanedMovies); |
||||
|
generateCharts(cleanedMovies); |
||||
|
|
||||
|
System.out.println("爬取完成!共获取了 " + cleanedMovies.size() + " 部电影数据。"); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static List<Movie> cleanData(List<Movie> movies) { |
||||
|
return movies.stream() |
||||
|
.map(movie -> { |
||||
|
// 去空格
|
||||
|
movie.setTitle(movie.getTitle().trim()); |
||||
|
movie.setGenre(movie.getGenre().trim()); |
||||
|
movie.setDirector(movie.getDirector().trim()); |
||||
|
movie.setActors(movie.getActors().trim()); |
||||
|
movie.setSynopsis(movie.getSynopsis().trim()); |
||||
|
return movie; |
||||
|
}) |
||||
|
.collect(Collectors.toList()); |
||||
|
} |
||||
|
|
||||
|
private static void saveToCSV(List<Movie> movies, String fileName) throws IOException { |
||||
|
try (FileWriter writer = new FileWriter(fileName)) { |
||||
|
// 写入表头
|
||||
|
writer.write("Title,Year,Rating,Genre,Director,Actors,Synopsis\n"); |
||||
|
|
||||
|
// 写入数据
|
||||
|
for (Movie movie : movies) { |
||||
|
writer.write(String.format("%s,%d,%.1f,%s,%s,%s,%s\n", |
||||
|
escapeCSV(movie.getTitle()), |
||||
|
movie.getYear(), |
||||
|
movie.getRating(), |
||||
|
escapeCSV(movie.getGenre()), |
||||
|
escapeCSV(movie.getDirector()), |
||||
|
escapeCSV(movie.getActors()), |
||||
|
escapeCSV(movie.getSynopsis()))); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static String escapeCSV(String value) { |
||||
|
if (value == null) return ""; |
||||
|
if (value.contains(",") || value.contains("\n") || value.contains("\"")) { |
||||
|
value = value.replace("\"", "\"\""); |
||||
|
return "\"" + value + "\""; |
||||
|
} |
||||
|
return value; |
||||
|
} |
||||
|
|
||||
|
private static void analyzeData(List<Movie> movies) { |
||||
|
System.out.println("\n=== 数据分析结果 ==="); |
||||
|
|
||||
|
// 1. 评分分布
|
||||
|
System.out.println("\n1. 评分分布:"); |
||||
|
Map<Double, Long> ratingDistribution = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); |
||||
|
ratingDistribution.entrySet().stream() |
||||
|
.sorted(Map.Entry.comparingByKey()) |
||||
|
.forEach(entry -> System.out.printf("评分 %.1f: %d 部\n", entry.getKey(), entry.getValue())); |
||||
|
|
||||
|
// 2. 年份分布
|
||||
|
System.out.println("\n2. 年份分布:"); |
||||
|
Map<Integer, Long> yearDistribution = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getYear, Collectors.counting())); |
||||
|
yearDistribution.entrySet().stream() |
||||
|
.sorted(Map.Entry.comparingByKey()) |
||||
|
.forEach(entry -> System.out.printf("年份 %d: %d 部\n", entry.getKey(), entry.getValue())); |
||||
|
|
||||
|
// 3. 导演作品数排行
|
||||
|
System.out.println("\n3. 导演作品数排行:"); |
||||
|
Map<String, Long> directorCount = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getDirector, Collectors.counting())); |
||||
|
directorCount.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Long>comparingByValue().reversed()) |
||||
|
.limit(10) |
||||
|
.forEach(entry -> System.out.printf("%s: %d 部\n", entry.getKey(), entry.getValue())); |
||||
|
|
||||
|
// 4. 平均评分
|
||||
|
double averageRating = movies.stream() |
||||
|
.mapToDouble(Movie::getRating) |
||||
|
.average() |
||||
|
.orElse(0); |
||||
|
System.out.printf("\n4. 平均评分:%.2f\n", averageRating); |
||||
|
} |
||||
|
|
||||
|
private static void displayResults(List<Movie> movies) { |
||||
|
System.out.println("\n=== 电影数据列表 ==="); |
||||
|
System.out.printf("%-50s %-10s %-10s %-30s %-30s\n", "Title", "Year", "Rating", "Genre", "Director"); |
||||
|
System.out.println("-------------------------------------------------------------------------------------------------------------------------------"); |
||||
|
|
||||
|
for (Movie movie : movies) { |
||||
|
System.out.printf("%-50s %-10d %-10.1f %-30s %-30s\n", |
||||
|
truncate(movie.getTitle(), 50), |
||||
|
movie.getYear(), |
||||
|
movie.getRating(), |
||||
|
truncate(movie.getGenre(), 30), |
||||
|
truncate(movie.getDirector(), 30)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static String truncate(String text, int maxLength) { |
||||
|
return text.length() > maxLength ? text.substring(0, maxLength - 3) + "..." : text; |
||||
|
} |
||||
|
|
||||
|
private static void generateCharts(List<Movie> movies) throws IOException { |
||||
|
// 1. 评分分布饼图
|
||||
|
DefaultPieDataset ratingDataset = new DefaultPieDataset(); |
||||
|
Map<Double, Long> ratingDistribution = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); |
||||
|
ratingDistribution.forEach((rating, count) -> ratingDataset.setValue(String.valueOf(rating), count)); |
||||
|
|
||||
|
JFreeChart ratingChart = ChartFactory.createPieChart( |
||||
|
"电影评分分布", |
||||
|
ratingDataset, |
||||
|
true, |
||||
|
true, |
||||
|
false |
||||
|
); |
||||
|
ChartUtils.saveChartAsPNG(new File("rating_distribution.png"), ratingChart, 800, 600); |
||||
|
|
||||
|
// 2. 年份与评分关系图
|
||||
|
DefaultCategoryDataset yearRatingDataset = new DefaultCategoryDataset(); |
||||
|
Map<Integer, Double> yearAverageRating = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getYear, |
||||
|
Collectors.averagingDouble(Movie::getRating))); |
||||
|
yearAverageRating.forEach((year, avgRating) -> yearRatingDataset.addValue(avgRating, "评分", String.valueOf(year))); |
||||
|
|
||||
|
JFreeChart yearRatingChart = ChartFactory.createBarChart( |
||||
|
"年份与平均评分关系", |
||||
|
"年份", |
||||
|
"平均评分", |
||||
|
yearRatingDataset |
||||
|
); |
||||
|
ChartUtils.saveChartAsPNG(new File("year_rating_relation.png"), yearRatingChart, 800, 600); |
||||
|
|
||||
|
System.out.println("\n图表已生成:rating_distribution.png 和 year_rating_relation.png"); |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue