You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

210 lines
8.7 KiB

package com.crawler;
import com.crawler.model.Movie;
import com.crawler.strategy.impl.DoubanCrawler;
import com.crawler.strategy.impl.QuotesCrawler;
import com.crawler.strategy.impl.TencentCrawler;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
public class TestCrawler {
public static void main(String[] args) {
System.out.println("\n" + "=".repeat(80));
System.out.println(" Movie Data Crawler - 3 Sites");
System.out.println("=".repeat(80) + "\n");
List<Movie> allMovies = new ArrayList<>();
List<Movie> doubanMovies = new ArrayList<>();
List<Movie> quotesMovies = new ArrayList<>();
List<Movie> tencentMovies = new ArrayList<>();
// Crawl Douban
System.out.println("\n" + "-".repeat(80));
System.out.println("Crawling Douban Movies...");
System.out.println("-".repeat(80));
try {
DoubanCrawler doubanCrawler = new DoubanCrawler();
doubanMovies = doubanCrawler.crawl();
allMovies.addAll(doubanMovies);
System.out.println("\nDouban crawling completed! Got " + doubanMovies.size() + " movies");
printMovieList("Douban", doubanMovies);
} catch (Exception e) {
System.err.println("Douban crawling error: " + e.getMessage());
}
// Crawl Quotes
System.out.println("\n" + "-".repeat(80));
System.out.println("Crawling Quotes to Movies...");
System.out.println("-".repeat(80));
try {
QuotesCrawler quotesCrawler = new QuotesCrawler();
quotesMovies = quotesCrawler.crawl();
allMovies.addAll(quotesMovies);
System.out.println("\nQuotes crawling completed! Got " + quotesMovies.size() + " movies");
printMovieList("Quotes", quotesMovies);
} catch (Exception e) {
System.err.println("Quotes crawling error: " + e.getMessage());
}
// Crawl Tencent
System.out.println("\n" + "-".repeat(80));
System.out.println("Crawling Tencent Movies...");
System.out.println("-".repeat(80));
try {
TencentCrawler tencentCrawler = new TencentCrawler();
tencentMovies = tencentCrawler.crawl();
allMovies.addAll(tencentMovies);
System.out.println("\nTencent crawling completed! Got " + tencentMovies.size() + " movies");
printMovieList("Tencent", tencentMovies);
} catch (Exception e) {
System.err.println("Tencent crawling error: " + e.getMessage());
}
// Sort by rating descending
allMovies.sort(Comparator.comparing(Movie::getRating).reversed());
// Show summary
System.out.println("\n\n" + "=".repeat(80));
System.out.println(" Crawling Summary");
System.out.println("=".repeat(80));
System.out.println("\nData statistics:");
System.out.println(" - Douban: " + doubanMovies.size() + " movies");
System.out.println(" - Quotes: " + quotesMovies.size() + " movies");
System.out.println(" - Tencent: " + tencentMovies.size() + " movies");
System.out.println(" - Total: " + allMovies.size() + " movies");
System.out.println("\nTop 50 Movies:");
System.out.println("-".repeat(80));
System.out.printf("%-6s %-35s %-10s %-15s %-12s %-15s %-10s\n",
"Rank", "Title", "Rating", "Source", "Year", "Director", "Actors");
System.out.println("-".repeat(80));
int count = 0;
for (Movie movie : allMovies) {
if (count >= 50) break;
System.out.printf("%-6d %-35s %-10.1f %-15s %-12s %-15s %-10s\n",
count + 1,
truncate(movie.getTitle(), 35),
movie.getRating() != null ? movie.getRating() : 0.0,
movie.getSource(),
movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown",
truncate(movie.getDirector(), 15),
truncate(movie.getActors(), 10));
count++;
}
System.out.println("-".repeat(80) + "\n");
// Save to CSV file
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
String csvFilename = "movies_" + timestamp + ".csv";
try {
saveToCSV(allMovies, doubanMovies, quotesMovies, tencentMovies, csvFilename);
System.out.println("Data saved to file: " + csvFilename);
System.out.println(" - Contains complete data, can be opened directly in Excel");
} catch (IOException e) {
System.err.println("Save CSV error: " + e.getMessage());
}
System.out.println("\n" + "=".repeat(80));
System.out.println(" Program completed!");
System.out.println("=".repeat(80) + "\n");
}
private static void printMovieList(String source, List<Movie> movies) {
System.out.println("\n" + source + " Movie List (First 30):");
System.out.println("-".repeat(80));
System.out.printf("%-6s %-35s %-10s %-12s %-15s %-10s\n",
"No.", "Title", "Rating", "Year", "Director", "Source");
System.out.println("-".repeat(80));
int index = 0;
for (Movie movie : movies) {
if (index >= 30) break;
System.out.printf("%-6d %-35s %-10.1f %-12s %-15s %-10s\n",
index + 1,
truncate(movie.getTitle(), 35),
movie.getRating() != null ? movie.getRating() : 0.0,
movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown",
truncate(movie.getDirector(), 15),
movie.getSource());
index++;
}
System.out.println("-".repeat(80));
}
private static void saveToCSV(List<Movie> allMovies, List<Movie> doubanMovies,
List<Movie> quotesMovies, List<Movie> tencentMovies,
String filename) throws IOException {
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename))) {
// Write header
writer.write("No.,Title,Rating,Director,Actors,Year,Source");
writer.newLine();
// Write all movies
writer.newLine();
writer.write("All Movies (Sorted by Rating)");
writer.newLine();
writeMoviesToCSV(writer, allMovies);
// Write Douban movies
writer.newLine();
writer.write("Douban Movies");
writer.newLine();
writeMoviesToCSV(writer, doubanMovies);
// Write Quotes movies
writer.newLine();
writer.write("Quotes Movies");
writer.newLine();
writeMoviesToCSV(writer, quotesMovies);
// Write Tencent movies
writer.newLine();
writer.write("Tencent Movies");
writer.newLine();
writeMoviesToCSV(writer, tencentMovies);
}
}
private static void writeMoviesToCSV(BufferedWriter writer, List<Movie> movies) throws IOException {
int index = 0;
for (Movie movie : movies) {
String line = String.format("%d,\"%s\",%.1f,\"%s\",\"%s\",\"%s\",\"%s\"",
index + 1,
escapeCSV(movie.getTitle()),
movie.getRating() != null ? movie.getRating() : 0.0,
escapeCSV(movie.getDirector()),
escapeCSV(movie.getActors()),
escapeCSV(movie.getReleaseDate()),
escapeCSV(movie.getSource()));
writer.write(line);
writer.newLine();
index++;
}
}
private static String escapeCSV(String str) {
if (str == null || str.isEmpty()) {
return "";
}
if (str.contains(",") || str.contains("\"") || str.contains("\n")) {
return "\"" + str.replace("\"", "\"\"") + "\"";
}
return str;
}
private static String truncate(String str, int maxLength) {
if (str == null || str.isEmpty()) {
return "";
}
return str.length() > maxLength ? str.substring(0, maxLength - 3) + "..." : str;
}
}