diff --git a/project/202402050211-高耿-期末实验报告.docx b/project/202402050211-高耿-期末实验报告.docx new file mode 100644 index 0000000..6becbb4 Binary files /dev/null and b/project/202402050211-高耿-期末实验报告.docx differ diff --git a/project/App.java b/project/App.java new file mode 100644 index 0000000..321361f --- /dev/null +++ b/project/App.java @@ -0,0 +1,41 @@ + +package com.crawler; + +import com.crawler.command.Command; +import com.crawler.command.CrawlCommand; +import com.crawler.controller.CrawlerController; +import com.crawler.view.ConsoleView; + +import java.util.Scanner; + +public class App { + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + CrawlerController controller = new CrawlerController(view); + Command crawlCommand = new CrawlCommand(controller); + + view.showWelcome(); + + Scanner scanner = new Scanner(System.in); + boolean running = true; + + while (running) { + view.showMenu(); + String input = scanner.nextLine().trim(); + + switch (input) { + case "1": + crawlCommand.execute(); + break; + case "2": + running = false; + view.showMessage("程序退出"); + break; + default: + view.showError("无效选项,请重新输入"); + } + } + + scanner.close(); + } +} diff --git a/project/Command.java b/project/Command.java new file mode 100644 index 0000000..e629ddb --- /dev/null +++ b/project/Command.java @@ -0,0 +1,6 @@ + +package com.crawler.command; + +public interface Command { + void execute(); +} diff --git a/project/TestC.java b/project/TestC.java new file mode 100644 index 0000000..5aa37ca --- /dev/null +++ b/project/TestC.java @@ -0,0 +1,19 @@ +package com.crawler; + +import com.crawler.model.Movie; +import com.crawler.strategy.impl.DoubanCrawler; +import com.crawler.strategy.impl.MaoyanCrawler; +import com.crawler.strategy.impl.TencentCrawler; +import org.apache.poi.ss.usermodel.*; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; + +import java.io.FileOutputStream; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +public class TestCrawler { + public static void main(String[] \ No newline at end of file diff --git a/project/TestCrawler.java b/project/TestCrawler.java new file mode 100644 index 0000000..48f5cd1 --- /dev/null +++ b/project/TestCrawler.java @@ -0,0 +1,210 @@ +package com.crawler; + +import com.crawler.model.Movie; +import com.crawler.strategy.impl.DoubanCrawler; +import com.crawler.strategy.impl.QuotesCrawler; +import com.crawler.strategy.impl.TencentCrawler; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; + +public class TestCrawler { + public static void main(String[] args) { + System.out.println("\n" + "=".repeat(80)); + System.out.println(" Movie Data Crawler - 3 Sites"); + System.out.println("=".repeat(80) + "\n"); + + List allMovies = new ArrayList<>(); + List doubanMovies = new ArrayList<>(); + List quotesMovies = new ArrayList<>(); + List tencentMovies = new ArrayList<>(); + + // Crawl Douban + System.out.println("\n" + "-".repeat(80)); + System.out.println("Crawling Douban Movies..."); + System.out.println("-".repeat(80)); + try { + DoubanCrawler doubanCrawler = new DoubanCrawler(); + doubanMovies = doubanCrawler.crawl(); + allMovies.addAll(doubanMovies); + System.out.println("\nDouban crawling completed! Got " + doubanMovies.size() + " movies"); + printMovieList("Douban", doubanMovies); + } catch (Exception e) { + System.err.println("Douban crawling error: " + e.getMessage()); + } + + // Crawl Quotes + System.out.println("\n" + "-".repeat(80)); + System.out.println("Crawling Quotes to Movies..."); + System.out.println("-".repeat(80)); + try { + QuotesCrawler quotesCrawler = new QuotesCrawler(); + quotesMovies = quotesCrawler.crawl(); + allMovies.addAll(quotesMovies); + System.out.println("\nQuotes crawling completed! Got " + quotesMovies.size() + " movies"); + printMovieList("Quotes", quotesMovies); + } catch (Exception e) { + System.err.println("Quotes crawling error: " + e.getMessage()); + } + + // Crawl Tencent + System.out.println("\n" + "-".repeat(80)); + System.out.println("Crawling Tencent Movies..."); + System.out.println("-".repeat(80)); + try { + TencentCrawler tencentCrawler = new TencentCrawler(); + tencentMovies = tencentCrawler.crawl(); + allMovies.addAll(tencentMovies); + System.out.println("\nTencent crawling completed! Got " + tencentMovies.size() + " movies"); + printMovieList("Tencent", tencentMovies); + } catch (Exception e) { + System.err.println("Tencent crawling error: " + e.getMessage()); + } + + // Sort by rating descending + allMovies.sort(Comparator.comparing(Movie::getRating).reversed()); + + // Show summary + System.out.println("\n\n" + "=".repeat(80)); + System.out.println(" Crawling Summary"); + System.out.println("=".repeat(80)); + System.out.println("\nData statistics:"); + System.out.println(" - Douban: " + doubanMovies.size() + " movies"); + System.out.println(" - Quotes: " + quotesMovies.size() + " movies"); + System.out.println(" - Tencent: " + tencentMovies.size() + " movies"); + System.out.println(" - Total: " + allMovies.size() + " movies"); + + System.out.println("\nTop 50 Movies:"); + System.out.println("-".repeat(80)); + System.out.printf("%-6s %-35s %-10s %-15s %-12s %-15s %-10s\n", + "Rank", "Title", "Rating", "Source", "Year", "Director", "Actors"); + System.out.println("-".repeat(80)); + + int count = 0; + for (Movie movie : allMovies) { + if (count >= 50) break; + System.out.printf("%-6d %-35s %-10.1f %-15s %-12s %-15s %-10s\n", + count + 1, + truncate(movie.getTitle(), 35), + movie.getRating() != null ? movie.getRating() : 0.0, + movie.getSource(), + movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown", + truncate(movie.getDirector(), 15), + truncate(movie.getActors(), 10)); + count++; + } + + System.out.println("-".repeat(80) + "\n"); + + // Save to CSV file + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); + String csvFilename = "movies_" + timestamp + ".csv"; + try { + saveToCSV(allMovies, doubanMovies, quotesMovies, tencentMovies, csvFilename); + System.out.println("Data saved to file: " + csvFilename); + System.out.println(" - Contains complete data, can be opened directly in Excel"); + } catch (IOException e) { + System.err.println("Save CSV error: " + e.getMessage()); + } + + System.out.println("\n" + "=".repeat(80)); + System.out.println(" Program completed!"); + System.out.println("=".repeat(80) + "\n"); + } + + private static void printMovieList(String source, List movies) { + System.out.println("\n" + source + " Movie List (First 30):"); + System.out.println("-".repeat(80)); + System.out.printf("%-6s %-35s %-10s %-12s %-15s %-10s\n", + "No.", "Title", "Rating", "Year", "Director", "Source"); + System.out.println("-".repeat(80)); + + int index = 0; + for (Movie movie : movies) { + if (index >= 30) break; + System.out.printf("%-6d %-35s %-10.1f %-12s %-15s %-10s\n", + index + 1, + truncate(movie.getTitle(), 35), + movie.getRating() != null ? movie.getRating() : 0.0, + movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown", + truncate(movie.getDirector(), 15), + movie.getSource()); + index++; + } + System.out.println("-".repeat(80)); + } + + private static void saveToCSV(List allMovies, List doubanMovies, + List quotesMovies, List tencentMovies, + String filename) throws IOException { + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename))) { + // Write header + writer.write("No.,Title,Rating,Director,Actors,Year,Source"); + writer.newLine(); + + // Write all movies + writer.newLine(); + writer.write("All Movies (Sorted by Rating)"); + writer.newLine(); + writeMoviesToCSV(writer, allMovies); + + // Write Douban movies + writer.newLine(); + writer.write("Douban Movies"); + writer.newLine(); + writeMoviesToCSV(writer, doubanMovies); + + // Write Quotes movies + writer.newLine(); + writer.write("Quotes Movies"); + writer.newLine(); + writeMoviesToCSV(writer, quotesMovies); + + // Write Tencent movies + writer.newLine(); + writer.write("Tencent Movies"); + writer.newLine(); + writeMoviesToCSV(writer, tencentMovies); + } + } + + private static void writeMoviesToCSV(BufferedWriter writer, List movies) throws IOException { + int index = 0; + for (Movie movie : movies) { + String line = String.format("%d,\"%s\",%.1f,\"%s\",\"%s\",\"%s\",\"%s\"", + index + 1, + escapeCSV(movie.getTitle()), + movie.getRating() != null ? movie.getRating() : 0.0, + escapeCSV(movie.getDirector()), + escapeCSV(movie.getActors()), + escapeCSV(movie.getReleaseDate()), + escapeCSV(movie.getSource())); + writer.write(line); + writer.newLine(); + index++; + } + } + + private static String escapeCSV(String str) { + if (str == null || str.isEmpty()) { + return ""; + } + if (str.contains(",") || str.contains("\"") || str.contains("\n")) { + return "\"" + str.replace("\"", "\"\"") + "\""; + } + return str; + } + + private static String truncate(String str, int maxLength) { + if (str == null || str.isEmpty()) { + return ""; + } + return str.length() > maxLength ? str.substring(0, maxLength - 3) + "..." : str; + } +}