You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
91 lines
2.9 KiB
91 lines
2.9 KiB
package com.crawler.utils;
|
|
|
|
import com.crawler.model.Movie;
|
|
|
|
import java.io.FileWriter;
|
|
import java.io.IOException;
|
|
import java.util.List;
|
|
|
|
public class DataUtils {
|
|
// 清洗电影数据
|
|
public static Movie cleanMovie(Movie movie) {
|
|
if (movie == null) return null;
|
|
|
|
// 清洗标题
|
|
if (movie.getTitle() != null) {
|
|
movie.setTitle(movie.getTitle().trim().replaceAll("\\s+", " "));
|
|
}
|
|
|
|
// 清洗导演
|
|
if (movie.getDirector() != null) {
|
|
movie.setDirector(movie.getDirector().trim());
|
|
}
|
|
|
|
// 清洗演员
|
|
if (movie.getActors() != null) {
|
|
movie.setActors(movie.getActors().trim());
|
|
}
|
|
|
|
// 清洗年份
|
|
if (movie.getYear() != null) {
|
|
movie.setYear(movie.getYear().trim());
|
|
}
|
|
|
|
// 清洗国家/地区
|
|
if (movie.getCountry() != null) {
|
|
movie.setCountry(movie.getCountry().trim());
|
|
}
|
|
|
|
// 清洗类型
|
|
if (movie.getGenre() != null) {
|
|
movie.setGenre(movie.getGenre().trim());
|
|
}
|
|
|
|
// 清洗简介
|
|
if (movie.getQuote() != null) {
|
|
movie.setQuote(movie.getQuote().trim().replaceAll("\\s+", " "));
|
|
}
|
|
|
|
return movie;
|
|
}
|
|
|
|
// 写入电影数据到CSV文件
|
|
public static void writeMovieToCSV(List<Movie> movieList, String filePath) throws IOException {
|
|
// 添加时间戳避免文件冲突
|
|
String timestamp = String.valueOf(System.currentTimeMillis());
|
|
String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv");
|
|
|
|
FileWriter writer = new FileWriter(actualFilePath);
|
|
// 写入表头
|
|
writer.write("排名,标题,评分,评价人数,导演,演员,年份,国家/地区,类型,简介\n");
|
|
|
|
// 写入数据
|
|
for (Movie movie : movieList) {
|
|
if (movie != null) {
|
|
writer.write(movie.getRank() + ",");
|
|
writer.write(escapeCsv(movie.getTitle()) + ",");
|
|
writer.write(movie.getRating() + ",");
|
|
writer.write(movie.getRatingPeople() + ",");
|
|
writer.write(escapeCsv(movie.getDirector()) + ",");
|
|
writer.write(escapeCsv(movie.getActors()) + ",");
|
|
writer.write(escapeCsv(movie.getYear()) + ",");
|
|
writer.write(escapeCsv(movie.getCountry()) + ",");
|
|
writer.write(escapeCsv(movie.getGenre()) + ",");
|
|
writer.write(escapeCsv(movie.getQuote()) + "\n");
|
|
}
|
|
}
|
|
|
|
writer.close();
|
|
System.out.println("数据已保存到 " + actualFilePath);
|
|
}
|
|
|
|
// 转义CSV特殊字符
|
|
private static String escapeCsv(String value) {
|
|
if (value == null) return "";
|
|
if (value.contains(",") || value.contains("\"")) {
|
|
value = value.replaceAll("\"", "\"\"");
|
|
return "\"" + value + "\"";
|
|
}
|
|
return value;
|
|
}
|
|
}
|