package com.crawler; import com.crawler.model.Movie; import com.crawler.strategy.impl.DoubanCrawler; import com.crawler.strategy.impl.QuotesCrawler; import com.crawler.strategy.impl.TencentCrawler; import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.ArrayList; import java.util.Comparator; import java.util.List; public class TestCrawler { public static void main(String[] args) { System.out.println("\n" + "=".repeat(80)); System.out.println(" Movie Data Crawler - 3 Sites"); System.out.println("=".repeat(80) + "\n"); List allMovies = new ArrayList<>(); List doubanMovies = new ArrayList<>(); List quotesMovies = new ArrayList<>(); List tencentMovies = new ArrayList<>(); // Crawl Douban System.out.println("\n" + "-".repeat(80)); System.out.println("Crawling Douban Movies..."); System.out.println("-".repeat(80)); try { DoubanCrawler doubanCrawler = new DoubanCrawler(); doubanMovies = doubanCrawler.crawl(); allMovies.addAll(doubanMovies); System.out.println("\nDouban crawling completed! Got " + doubanMovies.size() + " movies"); printMovieList("Douban", doubanMovies); } catch (Exception e) { System.err.println("Douban crawling error: " + e.getMessage()); } // Crawl Quotes System.out.println("\n" + "-".repeat(80)); System.out.println("Crawling Quotes to Movies..."); System.out.println("-".repeat(80)); try { QuotesCrawler quotesCrawler = new QuotesCrawler(); quotesMovies = quotesCrawler.crawl(); allMovies.addAll(quotesMovies); System.out.println("\nQuotes crawling completed! Got " + quotesMovies.size() + " movies"); printMovieList("Quotes", quotesMovies); } catch (Exception e) { System.err.println("Quotes crawling error: " + e.getMessage()); } // Crawl Tencent System.out.println("\n" + "-".repeat(80)); System.out.println("Crawling Tencent Movies..."); System.out.println("-".repeat(80)); try { TencentCrawler tencentCrawler = new TencentCrawler(); tencentMovies = tencentCrawler.crawl(); allMovies.addAll(tencentMovies); System.out.println("\nTencent crawling completed! Got " + tencentMovies.size() + " movies"); printMovieList("Tencent", tencentMovies); } catch (Exception e) { System.err.println("Tencent crawling error: " + e.getMessage()); } // Sort by rating descending allMovies.sort(Comparator.comparing(Movie::getRating).reversed()); // Show summary System.out.println("\n\n" + "=".repeat(80)); System.out.println(" Crawling Summary"); System.out.println("=".repeat(80)); System.out.println("\nData statistics:"); System.out.println(" - Douban: " + doubanMovies.size() + " movies"); System.out.println(" - Quotes: " + quotesMovies.size() + " movies"); System.out.println(" - Tencent: " + tencentMovies.size() + " movies"); System.out.println(" - Total: " + allMovies.size() + " movies"); System.out.println("\nTop 50 Movies:"); System.out.println("-".repeat(80)); System.out.printf("%-6s %-35s %-10s %-15s %-12s %-15s %-10s\n", "Rank", "Title", "Rating", "Source", "Year", "Director", "Actors"); System.out.println("-".repeat(80)); int count = 0; for (Movie movie : allMovies) { if (count >= 50) break; System.out.printf("%-6d %-35s %-10.1f %-15s %-12s %-15s %-10s\n", count + 1, truncate(movie.getTitle(), 35), movie.getRating() != null ? movie.getRating() : 0.0, movie.getSource(), movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown", truncate(movie.getDirector(), 15), truncate(movie.getActors(), 10)); count++; } System.out.println("-".repeat(80) + "\n"); // Save to CSV file String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); String csvFilename = "movies_" + timestamp + ".csv"; try { saveToCSV(allMovies, doubanMovies, quotesMovies, tencentMovies, csvFilename); System.out.println("Data saved to file: " + csvFilename); System.out.println(" - Contains complete data, can be opened directly in Excel"); } catch (IOException e) { System.err.println("Save CSV error: " + e.getMessage()); } System.out.println("\n" + "=".repeat(80)); System.out.println(" Program completed!"); System.out.println("=".repeat(80) + "\n"); } private static void printMovieList(String source, List movies) { System.out.println("\n" + source + " Movie List (First 30):"); System.out.println("-".repeat(80)); System.out.printf("%-6s %-35s %-10s %-12s %-15s %-10s\n", "No.", "Title", "Rating", "Year", "Director", "Source"); System.out.println("-".repeat(80)); int index = 0; for (Movie movie : movies) { if (index >= 30) break; System.out.printf("%-6d %-35s %-10.1f %-12s %-15s %-10s\n", index + 1, truncate(movie.getTitle(), 35), movie.getRating() != null ? movie.getRating() : 0.0, movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown", truncate(movie.getDirector(), 15), movie.getSource()); index++; } System.out.println("-".repeat(80)); } private static void saveToCSV(List allMovies, List doubanMovies, List quotesMovies, List tencentMovies, String filename) throws IOException { try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename))) { // Write header writer.write("No.,Title,Rating,Director,Actors,Year,Source"); writer.newLine(); // Write all movies writer.newLine(); writer.write("All Movies (Sorted by Rating)"); writer.newLine(); writeMoviesToCSV(writer, allMovies); // Write Douban movies writer.newLine(); writer.write("Douban Movies"); writer.newLine(); writeMoviesToCSV(writer, doubanMovies); // Write Quotes movies writer.newLine(); writer.write("Quotes Movies"); writer.newLine(); writeMoviesToCSV(writer, quotesMovies); // Write Tencent movies writer.newLine(); writer.write("Tencent Movies"); writer.newLine(); writeMoviesToCSV(writer, tencentMovies); } } private static void writeMoviesToCSV(BufferedWriter writer, List movies) throws IOException { int index = 0; for (Movie movie : movies) { String line = String.format("%d,\"%s\",%.1f,\"%s\",\"%s\",\"%s\",\"%s\"", index + 1, escapeCSV(movie.getTitle()), movie.getRating() != null ? movie.getRating() : 0.0, escapeCSV(movie.getDirector()), escapeCSV(movie.getActors()), escapeCSV(movie.getReleaseDate()), escapeCSV(movie.getSource())); writer.write(line); writer.newLine(); index++; } } private static String escapeCSV(String str) { if (str == null || str.isEmpty()) { return ""; } if (str.contains(",") || str.contains("\"") || str.contains("\n")) { return "\"" + str.replace("\"", "\"\"") + "\""; } return str; } private static String truncate(String str, int maxLength) { if (str == null || str.isEmpty()) { return ""; } return str.length() > maxLength ? str.substring(0, maxLength - 3) + "..." : str; } }