19 changed files with 250 additions and 277 deletions
@ -1,27 +1,169 @@ |
|||||
|
import org.jfree.chart.ChartFactory; |
||||
|
import org.jfree.chart.ChartUtils; |
||||
|
import org.jfree.chart.JFreeChart; |
||||
|
import org.jfree.data.category.DefaultCategoryDataset; |
||||
|
import org.jfree.data.general.DefaultPieDataset; |
||||
|
|
||||
|
import java.io.File; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.util.*; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
public class Main { |
public class Main { |
||||
public static void main(String[] args) { |
public static void main(String[] args) { |
||||
int[] voltages = {10, -5, 7, 105, 999, 89, 76, 74}; |
try { |
||||
int validCount = 0; // 有效数据个数
|
// 1. 爬取电影数据
|
||||
double validSum = 0; // 有效数据总和
|
MovieCrawler crawler = new MovieCrawler(); |
||||
|
List<Movie> movies = crawler.crawlTopMovies(50); // 爬取50部电影
|
||||
|
|
||||
|
// 2. 数据清洗
|
||||
|
List<Movie> cleanedMovies = cleanData(movies); |
||||
|
|
||||
|
// 3. 数据存储
|
||||
|
saveToCSV(cleanedMovies, "movies.csv"); |
||||
|
|
||||
|
// 4. 数据分析
|
||||
|
analyzeData(cleanedMovies); |
||||
|
|
||||
|
// 5. 结果展示
|
||||
|
displayResults(cleanedMovies); |
||||
|
generateCharts(cleanedMovies); |
||||
|
|
||||
|
System.out.println("爬取完成!共获取了 " + cleanedMovies.size() + " 部电影数据。"); |
||||
|
} catch (Exception e) { |
||||
|
e.printStackTrace(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static List<Movie> cleanData(List<Movie> movies) { |
||||
|
return movies.stream() |
||||
|
.map(movie -> { |
||||
|
// 去空格
|
||||
|
movie.setTitle(movie.getTitle().trim()); |
||||
|
movie.setGenre(movie.getGenre().trim()); |
||||
|
movie.setDirector(movie.getDirector().trim()); |
||||
|
movie.setActors(movie.getActors().trim()); |
||||
|
movie.setSynopsis(movie.getSynopsis().trim()); |
||||
|
return movie; |
||||
|
}) |
||||
|
.collect(Collectors.toList()); |
||||
|
} |
||||
|
|
||||
|
private static void saveToCSV(List<Movie> movies, String fileName) throws IOException { |
||||
|
try (FileWriter writer = new FileWriter(fileName)) { |
||||
|
// 写入表头
|
||||
|
writer.write("Title,Year,Rating,Genre,Director,Actors,Synopsis\n"); |
||||
|
|
||||
for (int voltage : voltages) { |
// 写入数据
|
||||
if (voltage == 999) { |
for (Movie movie : movies) { |
||||
System.out.println("程序终止,传感器离线"); |
writer.write(String.format("%s,%d,%.1f,%s,%s,%s,%s\n", |
||||
break; // 遇到999,终止程序
|
escapeCSV(movie.getTitle()), |
||||
} else if (voltage < 0) { |
movie.getYear(), |
||||
System.out.println("警告:发现负数,数据已跳过"); |
movie.getRating(), |
||||
} else if (voltage >= 1 && voltage <= 100) { |
escapeCSV(movie.getGenre()), |
||||
validCount++; |
escapeCSV(movie.getDirector()), |
||||
validSum += voltage; |
escapeCSV(movie.getActors()), |
||||
|
escapeCSV(movie.getSynopsis()))); |
||||
|
} |
||||
} |
} |
||||
// 其他情况(如>100)不处理
|
|
||||
} |
} |
||||
|
|
||||
if (validCount > 0) { |
private static String escapeCSV(String value) { |
||||
double average = validSum / validCount; |
if (value == null) return ""; |
||||
System.out.printf("有效数据个数:%d,平均值:%.2f\n", validCount, average); |
if (value.contains(",") || value.contains("\n") || value.contains("\"")) { |
||||
} else { |
value = value.replace("\"", "\"\""); |
||||
System.out.println("没有收集到有效数据,打印初始状态"); |
return "\"" + value + "\""; |
||||
|
} |
||||
|
return value; |
||||
} |
} |
||||
|
|
||||
|
private static void analyzeData(List<Movie> movies) { |
||||
|
System.out.println("\n=== 数据分析结果 ==="); |
||||
|
|
||||
|
// 1. 评分分布
|
||||
|
System.out.println("\n1. 评分分布:"); |
||||
|
Map<Double, Long> ratingDistribution = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); |
||||
|
ratingDistribution.entrySet().stream() |
||||
|
.sorted(Map.Entry.comparingByKey()) |
||||
|
.forEach(entry -> System.out.printf("评分 %.1f: %d 部\n", entry.getKey(), entry.getValue())); |
||||
|
|
||||
|
// 2. 年份分布
|
||||
|
System.out.println("\n2. 年份分布:"); |
||||
|
Map<Integer, Long> yearDistribution = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getYear, Collectors.counting())); |
||||
|
yearDistribution.entrySet().stream() |
||||
|
.sorted(Map.Entry.comparingByKey()) |
||||
|
.forEach(entry -> System.out.printf("年份 %d: %d 部\n", entry.getKey(), entry.getValue())); |
||||
|
|
||||
|
// 3. 导演作品数排行
|
||||
|
System.out.println("\n3. 导演作品数排行:"); |
||||
|
Map<String, Long> directorCount = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getDirector, Collectors.counting())); |
||||
|
directorCount.entrySet().stream() |
||||
|
.sorted(Map.Entry.<String, Long>comparingByValue().reversed()) |
||||
|
.limit(10) |
||||
|
.forEach(entry -> System.out.printf("%s: %d 部\n", entry.getKey(), entry.getValue())); |
||||
|
|
||||
|
// 4. 平均评分
|
||||
|
double averageRating = movies.stream() |
||||
|
.mapToDouble(Movie::getRating) |
||||
|
.average() |
||||
|
.orElse(0); |
||||
|
System.out.printf("\n4. 平均评分:%.2f\n", averageRating); |
||||
|
} |
||||
|
|
||||
|
private static void displayResults(List<Movie> movies) { |
||||
|
System.out.println("\n=== 电影数据列表 ==="); |
||||
|
System.out.printf("%-50s %-10s %-10s %-30s %-30s\n", "Title", "Year", "Rating", "Genre", "Director"); |
||||
|
System.out.println("-------------------------------------------------------------------------------------------------------------------------------"); |
||||
|
|
||||
|
for (Movie movie : movies) { |
||||
|
System.out.printf("%-50s %-10d %-10.1f %-30s %-30s\n", |
||||
|
truncate(movie.getTitle(), 50), |
||||
|
movie.getYear(), |
||||
|
movie.getRating(), |
||||
|
truncate(movie.getGenre(), 30), |
||||
|
truncate(movie.getDirector(), 30)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static String truncate(String text, int maxLength) { |
||||
|
return text.length() > maxLength ? text.substring(0, maxLength - 3) + "..." : text; |
||||
|
} |
||||
|
|
||||
|
private static void generateCharts(List<Movie> movies) throws IOException { |
||||
|
// 1. 评分分布饼图
|
||||
|
DefaultPieDataset ratingDataset = new DefaultPieDataset(); |
||||
|
Map<Double, Long> ratingDistribution = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); |
||||
|
ratingDistribution.forEach((rating, count) -> ratingDataset.setValue(String.valueOf(rating), count)); |
||||
|
|
||||
|
JFreeChart ratingChart = ChartFactory.createPieChart( |
||||
|
"电影评分分布", |
||||
|
ratingDataset, |
||||
|
true, |
||||
|
true, |
||||
|
false |
||||
|
); |
||||
|
ChartUtils.saveChartAsPNG(new File("rating_distribution.png"), ratingChart, 800, 600); |
||||
|
|
||||
|
// 2. 年份与评分关系图
|
||||
|
DefaultCategoryDataset yearRatingDataset = new DefaultCategoryDataset(); |
||||
|
Map<Integer, Double> yearAverageRating = movies.stream() |
||||
|
.collect(Collectors.groupingBy(Movie::getYear, |
||||
|
Collectors.averagingDouble(Movie::getRating))); |
||||
|
yearAverageRating.forEach((year, avgRating) -> yearRatingDataset.addValue(avgRating, "评分", String.valueOf(year))); |
||||
|
|
||||
|
JFreeChart yearRatingChart = ChartFactory.createBarChart( |
||||
|
"年份与平均评分关系", |
||||
|
"年份", |
||||
|
"平均评分", |
||||
|
yearRatingDataset |
||||
|
); |
||||
|
ChartUtils.saveChartAsPNG(new File("year_rating_relation.png"), yearRatingChart, 800, 600); |
||||
|
|
||||
|
System.out.println("\n图表已生成:rating_distribution.png 和 year_rating_relation.png"); |
||||
} |
} |
||||
} |
} |
||||
|
|||||
@ -1,169 +0,0 @@ |
|||||
import org.jfree.chart.ChartFactory; |
|
||||
import org.jfree.chart.ChartUtils; |
|
||||
import org.jfree.chart.JFreeChart; |
|
||||
import org.jfree.data.category.DefaultCategoryDataset; |
|
||||
import org.jfree.data.general.DefaultPieDataset; |
|
||||
|
|
||||
import java.io.File; |
|
||||
import java.io.FileWriter; |
|
||||
import java.io.IOException; |
|
||||
import java.util.*; |
|
||||
import java.util.stream.Collectors; |
|
||||
|
|
||||
public class Main { |
|
||||
public static void main(String[] args) { |
|
||||
try { |
|
||||
// 1. 爬取电影数据
|
|
||||
MovieCrawler crawler = new MovieCrawler(); |
|
||||
List<Movie> movies = crawler.crawlTopMovies(50); // 爬取50部电影
|
|
||||
|
|
||||
// 2. 数据清洗
|
|
||||
List<Movie> cleanedMovies = cleanData(movies); |
|
||||
|
|
||||
// 3. 数据存储
|
|
||||
saveToCSV(cleanedMovies, "movies.csv"); |
|
||||
|
|
||||
// 4. 数据分析
|
|
||||
analyzeData(cleanedMovies); |
|
||||
|
|
||||
// 5. 结果展示
|
|
||||
displayResults(cleanedMovies); |
|
||||
generateCharts(cleanedMovies); |
|
||||
|
|
||||
System.out.println("爬取完成!共获取了 " + cleanedMovies.size() + " 部电影数据。"); |
|
||||
} catch (Exception e) { |
|
||||
e.printStackTrace(); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private static List<Movie> cleanData(List<Movie> movies) { |
|
||||
return movies.stream() |
|
||||
.map(movie -> { |
|
||||
// 去空格
|
|
||||
movie.setTitle(movie.getTitle().trim()); |
|
||||
movie.setGenre(movie.getGenre().trim()); |
|
||||
movie.setDirector(movie.getDirector().trim()); |
|
||||
movie.setActors(movie.getActors().trim()); |
|
||||
movie.setSynopsis(movie.getSynopsis().trim()); |
|
||||
return movie; |
|
||||
}) |
|
||||
.collect(Collectors.toList()); |
|
||||
} |
|
||||
|
|
||||
private static void saveToCSV(List<Movie> movies, String fileName) throws IOException { |
|
||||
try (FileWriter writer = new FileWriter(fileName)) { |
|
||||
// 写入表头
|
|
||||
writer.write("Title,Year,Rating,Genre,Director,Actors,Synopsis\n"); |
|
||||
|
|
||||
// 写入数据
|
|
||||
for (Movie movie : movies) { |
|
||||
writer.write(String.format("%s,%d,%.1f,%s,%s,%s,%s\n", |
|
||||
escapeCSV(movie.getTitle()), |
|
||||
movie.getYear(), |
|
||||
movie.getRating(), |
|
||||
escapeCSV(movie.getGenre()), |
|
||||
escapeCSV(movie.getDirector()), |
|
||||
escapeCSV(movie.getActors()), |
|
||||
escapeCSV(movie.getSynopsis()))); |
|
||||
} |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private static String escapeCSV(String value) { |
|
||||
if (value == null) return ""; |
|
||||
if (value.contains(",") || value.contains("\n") || value.contains("\"")) { |
|
||||
value = value.replace("\"", "\"\""); |
|
||||
return "\"" + value + "\""; |
|
||||
} |
|
||||
return value; |
|
||||
} |
|
||||
|
|
||||
private static void analyzeData(List<Movie> movies) { |
|
||||
System.out.println("\n=== 数据分析结果 ==="); |
|
||||
|
|
||||
// 1. 评分分布
|
|
||||
System.out.println("\n1. 评分分布:"); |
|
||||
Map<Double, Long> ratingDistribution = movies.stream() |
|
||||
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); |
|
||||
ratingDistribution.entrySet().stream() |
|
||||
.sorted(Map.Entry.comparingByKey()) |
|
||||
.forEach(entry -> System.out.printf("评分 %.1f: %d 部\n", entry.getKey(), entry.getValue())); |
|
||||
|
|
||||
// 2. 年份分布
|
|
||||
System.out.println("\n2. 年份分布:"); |
|
||||
Map<Integer, Long> yearDistribution = movies.stream() |
|
||||
.collect(Collectors.groupingBy(Movie::getYear, Collectors.counting())); |
|
||||
yearDistribution.entrySet().stream() |
|
||||
.sorted(Map.Entry.comparingByKey()) |
|
||||
.forEach(entry -> System.out.printf("年份 %d: %d 部\n", entry.getKey(), entry.getValue())); |
|
||||
|
|
||||
// 3. 导演作品数排行
|
|
||||
System.out.println("\n3. 导演作品数排行:"); |
|
||||
Map<String, Long> directorCount = movies.stream() |
|
||||
.collect(Collectors.groupingBy(Movie::getDirector, Collectors.counting())); |
|
||||
directorCount.entrySet().stream() |
|
||||
.sorted(Map.Entry.<String, Long>comparingByValue().reversed()) |
|
||||
.limit(10) |
|
||||
.forEach(entry -> System.out.printf("%s: %d 部\n", entry.getKey(), entry.getValue())); |
|
||||
|
|
||||
// 4. 平均评分
|
|
||||
double averageRating = movies.stream() |
|
||||
.mapToDouble(Movie::getRating) |
|
||||
.average() |
|
||||
.orElse(0); |
|
||||
System.out.printf("\n4. 平均评分:%.2f\n", averageRating); |
|
||||
} |
|
||||
|
|
||||
private static void displayResults(List<Movie> movies) { |
|
||||
System.out.println("\n=== 电影数据列表 ==="); |
|
||||
System.out.printf("%-50s %-10s %-10s %-30s %-30s\n", "Title", "Year", "Rating", "Genre", "Director"); |
|
||||
System.out.println("-------------------------------------------------------------------------------------------------------------------------------"); |
|
||||
|
|
||||
for (Movie movie : movies) { |
|
||||
System.out.printf("%-50s %-10d %-10.1f %-30s %-30s\n", |
|
||||
truncate(movie.getTitle(), 50), |
|
||||
movie.getYear(), |
|
||||
movie.getRating(), |
|
||||
truncate(movie.getGenre(), 30), |
|
||||
truncate(movie.getDirector(), 30)); |
|
||||
} |
|
||||
} |
|
||||
|
|
||||
private static String truncate(String text, int maxLength) { |
|
||||
return text.length() > maxLength ? text.substring(0, maxLength - 3) + "..." : text; |
|
||||
} |
|
||||
|
|
||||
private static void generateCharts(List<Movie> movies) throws IOException { |
|
||||
// 1. 评分分布饼图
|
|
||||
DefaultPieDataset ratingDataset = new DefaultPieDataset(); |
|
||||
Map<Double, Long> ratingDistribution = movies.stream() |
|
||||
.collect(Collectors.groupingBy(Movie::getRating, Collectors.counting())); |
|
||||
ratingDistribution.forEach((rating, count) -> ratingDataset.setValue(String.valueOf(rating), count)); |
|
||||
|
|
||||
JFreeChart ratingChart = ChartFactory.createPieChart( |
|
||||
"电影评分分布", |
|
||||
ratingDataset, |
|
||||
true, |
|
||||
true, |
|
||||
false |
|
||||
); |
|
||||
ChartUtils.saveChartAsPNG(new File("rating_distribution.png"), ratingChart, 800, 600); |
|
||||
|
|
||||
// 2. 年份与评分关系图
|
|
||||
DefaultCategoryDataset yearRatingDataset = new DefaultCategoryDataset(); |
|
||||
Map<Integer, Double> yearAverageRating = movies.stream() |
|
||||
.collect(Collectors.groupingBy(Movie::getYear, |
|
||||
Collectors.averagingDouble(Movie::getRating))); |
|
||||
yearAverageRating.forEach((year, avgRating) -> yearRatingDataset.addValue(avgRating, "评分", String.valueOf(year))); |
|
||||
|
|
||||
JFreeChart yearRatingChart = ChartFactory.createBarChart( |
|
||||
"年份与平均评分关系", |
|
||||
"年份", |
|
||||
"平均评分", |
|
||||
yearRatingDataset |
|
||||
); |
|
||||
ChartUtils.saveChartAsPNG(new File("year_rating_relation.png"), yearRatingChart, 800, 600); |
|
||||
|
|
||||
System.out.println("\n图表已生成:rating_distribution.png 和 year_rating_relation.png"); |
|
||||
} |
|
||||
} |
|
||||
|
Before Width: | Height: | Size: 191 KiB After Width: | Height: | Size: 191 KiB |
|
|
Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 39 KiB |
|
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB |
Loading…
Reference in new issue