package com.crawler.utils; import com.crawler.model.Movie; import java.io.FileWriter; import java.io.IOException; import java.util.List; public class DataUtils { // 清洗电影数据 public static Movie cleanMovie(Movie movie) { if (movie == null) return null; // 清洗标题 if (movie.getTitle() != null) { movie.setTitle(movie.getTitle().trim().replaceAll("\\s+", " ")); } // 清洗导演 if (movie.getDirector() != null) { movie.setDirector(movie.getDirector().trim()); } // 清洗演员 if (movie.getActors() != null) { movie.setActors(movie.getActors().trim()); } // 清洗年份 if (movie.getYear() != null) { movie.setYear(movie.getYear().trim()); } // 清洗国家/地区 if (movie.getCountry() != null) { movie.setCountry(movie.getCountry().trim()); } // 清洗类型 if (movie.getGenre() != null) { movie.setGenre(movie.getGenre().trim()); } // 清洗简介 if (movie.getQuote() != null) { movie.setQuote(movie.getQuote().trim().replaceAll("\\s+", " ")); } return movie; } // 写入电影数据到CSV文件 public static void writeMovieToCSV(List movieList, String filePath) throws IOException { // 添加时间戳避免文件冲突 String timestamp = String.valueOf(System.currentTimeMillis()); String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv"); FileWriter writer = new FileWriter(actualFilePath); // 写入表头 writer.write("排名,标题,评分,评价人数,导演,演员,年份,国家/地区,类型,简介\n"); // 写入数据 for (Movie movie : movieList) { if (movie != null) { writer.write(movie.getRank() + ","); writer.write(escapeCsv(movie.getTitle()) + ","); writer.write(movie.getRating() + ","); writer.write(movie.getRatingPeople() + ","); writer.write(escapeCsv(movie.getDirector()) + ","); writer.write(escapeCsv(movie.getActors()) + ","); writer.write(escapeCsv(movie.getYear()) + ","); writer.write(escapeCsv(movie.getCountry()) + ","); writer.write(escapeCsv(movie.getGenre()) + ","); writer.write(escapeCsv(movie.getQuote()) + "\n"); } } writer.close(); System.out.println("数据已保存到 " + actualFilePath); } // 转义CSV特殊字符 private static String escapeCsv(String value) { if (value == null) return ""; if (value.contains(",") || value.contains("\"")) { value = value.replaceAll("\"", "\"\""); return "\"" + value + "\""; } return value; } }