5 changed files with 276 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,41 @@ |
|||||
|
|
||||
|
package com.crawler; |
||||
|
|
||||
|
import com.crawler.command.Command; |
||||
|
import com.crawler.command.CrawlCommand; |
||||
|
import com.crawler.controller.CrawlerController; |
||||
|
import com.crawler.view.ConsoleView; |
||||
|
|
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class App { |
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
CrawlerController controller = new CrawlerController(view); |
||||
|
Command crawlCommand = new CrawlCommand(controller); |
||||
|
|
||||
|
view.showWelcome(); |
||||
|
|
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
boolean running = true; |
||||
|
|
||||
|
while (running) { |
||||
|
view.showMenu(); |
||||
|
String input = scanner.nextLine().trim(); |
||||
|
|
||||
|
switch (input) { |
||||
|
case "1": |
||||
|
crawlCommand.execute(); |
||||
|
break; |
||||
|
case "2": |
||||
|
running = false; |
||||
|
view.showMessage("程序退出"); |
||||
|
break; |
||||
|
default: |
||||
|
view.showError("无效选项,请重新输入"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
scanner.close(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,6 @@ |
|||||
|
|
||||
|
package com.crawler.command; |
||||
|
|
||||
|
public interface Command { |
||||
|
void execute(); |
||||
|
} |
||||
@ -0,0 +1,19 @@ |
|||||
|
package com.crawler; |
||||
|
|
||||
|
import com.crawler.model.Movie; |
||||
|
import com.crawler.strategy.impl.DoubanCrawler; |
||||
|
import com.crawler.strategy.impl.MaoyanCrawler; |
||||
|
import com.crawler.strategy.impl.TencentCrawler; |
||||
|
import org.apache.poi.ss.usermodel.*; |
||||
|
import org.apache.poi.xssf.usermodel.XSSFWorkbook; |
||||
|
|
||||
|
import java.io.FileOutputStream; |
||||
|
import java.io.IOException; |
||||
|
import java.time.LocalDateTime; |
||||
|
import java.time.format.DateTimeFormatter; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Comparator; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class TestCrawler { |
||||
|
public static void main(String[] |
||||
@ -0,0 +1,210 @@ |
|||||
|
package com.crawler; |
||||
|
|
||||
|
import com.crawler.model.Movie; |
||||
|
import com.crawler.strategy.impl.DoubanCrawler; |
||||
|
import com.crawler.strategy.impl.QuotesCrawler; |
||||
|
import com.crawler.strategy.impl.TencentCrawler; |
||||
|
|
||||
|
import java.io.BufferedWriter; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
import java.time.LocalDateTime; |
||||
|
import java.time.format.DateTimeFormatter; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Comparator; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class TestCrawler { |
||||
|
public static void main(String[] args) { |
||||
|
System.out.println("\n" + "=".repeat(80)); |
||||
|
System.out.println(" Movie Data Crawler - 3 Sites"); |
||||
|
System.out.println("=".repeat(80) + "\n"); |
||||
|
|
||||
|
List<Movie> allMovies = new ArrayList<>(); |
||||
|
List<Movie> doubanMovies = new ArrayList<>(); |
||||
|
List<Movie> quotesMovies = new ArrayList<>(); |
||||
|
List<Movie> tencentMovies = new ArrayList<>(); |
||||
|
|
||||
|
// Crawl Douban
|
||||
|
System.out.println("\n" + "-".repeat(80)); |
||||
|
System.out.println("Crawling Douban Movies..."); |
||||
|
System.out.println("-".repeat(80)); |
||||
|
try { |
||||
|
DoubanCrawler doubanCrawler = new DoubanCrawler(); |
||||
|
doubanMovies = doubanCrawler.crawl(); |
||||
|
allMovies.addAll(doubanMovies); |
||||
|
System.out.println("\nDouban crawling completed! Got " + doubanMovies.size() + " movies"); |
||||
|
printMovieList("Douban", doubanMovies); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("Douban crawling error: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
// Crawl Quotes
|
||||
|
System.out.println("\n" + "-".repeat(80)); |
||||
|
System.out.println("Crawling Quotes to Movies..."); |
||||
|
System.out.println("-".repeat(80)); |
||||
|
try { |
||||
|
QuotesCrawler quotesCrawler = new QuotesCrawler(); |
||||
|
quotesMovies = quotesCrawler.crawl(); |
||||
|
allMovies.addAll(quotesMovies); |
||||
|
System.out.println("\nQuotes crawling completed! Got " + quotesMovies.size() + " movies"); |
||||
|
printMovieList("Quotes", quotesMovies); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("Quotes crawling error: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
// Crawl Tencent
|
||||
|
System.out.println("\n" + "-".repeat(80)); |
||||
|
System.out.println("Crawling Tencent Movies..."); |
||||
|
System.out.println("-".repeat(80)); |
||||
|
try { |
||||
|
TencentCrawler tencentCrawler = new TencentCrawler(); |
||||
|
tencentMovies = tencentCrawler.crawl(); |
||||
|
allMovies.addAll(tencentMovies); |
||||
|
System.out.println("\nTencent crawling completed! Got " + tencentMovies.size() + " movies"); |
||||
|
printMovieList("Tencent", tencentMovies); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("Tencent crawling error: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
// Sort by rating descending
|
||||
|
allMovies.sort(Comparator.comparing(Movie::getRating).reversed()); |
||||
|
|
||||
|
// Show summary
|
||||
|
System.out.println("\n\n" + "=".repeat(80)); |
||||
|
System.out.println(" Crawling Summary"); |
||||
|
System.out.println("=".repeat(80)); |
||||
|
System.out.println("\nData statistics:"); |
||||
|
System.out.println(" - Douban: " + doubanMovies.size() + " movies"); |
||||
|
System.out.println(" - Quotes: " + quotesMovies.size() + " movies"); |
||||
|
System.out.println(" - Tencent: " + tencentMovies.size() + " movies"); |
||||
|
System.out.println(" - Total: " + allMovies.size() + " movies"); |
||||
|
|
||||
|
System.out.println("\nTop 50 Movies:"); |
||||
|
System.out.println("-".repeat(80)); |
||||
|
System.out.printf("%-6s %-35s %-10s %-15s %-12s %-15s %-10s\n", |
||||
|
"Rank", "Title", "Rating", "Source", "Year", "Director", "Actors"); |
||||
|
System.out.println("-".repeat(80)); |
||||
|
|
||||
|
int count = 0; |
||||
|
for (Movie movie : allMovies) { |
||||
|
if (count >= 50) break; |
||||
|
System.out.printf("%-6d %-35s %-10.1f %-15s %-12s %-15s %-10s\n", |
||||
|
count + 1, |
||||
|
truncate(movie.getTitle(), 35), |
||||
|
movie.getRating() != null ? movie.getRating() : 0.0, |
||||
|
movie.getSource(), |
||||
|
movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown", |
||||
|
truncate(movie.getDirector(), 15), |
||||
|
truncate(movie.getActors(), 10)); |
||||
|
count++; |
||||
|
} |
||||
|
|
||||
|
System.out.println("-".repeat(80) + "\n"); |
||||
|
|
||||
|
// Save to CSV file
|
||||
|
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); |
||||
|
String csvFilename = "movies_" + timestamp + ".csv"; |
||||
|
try { |
||||
|
saveToCSV(allMovies, doubanMovies, quotesMovies, tencentMovies, csvFilename); |
||||
|
System.out.println("Data saved to file: " + csvFilename); |
||||
|
System.out.println(" - Contains complete data, can be opened directly in Excel"); |
||||
|
} catch (IOException e) { |
||||
|
System.err.println("Save CSV error: " + e.getMessage()); |
||||
|
} |
||||
|
|
||||
|
System.out.println("\n" + "=".repeat(80)); |
||||
|
System.out.println(" Program completed!"); |
||||
|
System.out.println("=".repeat(80) + "\n"); |
||||
|
} |
||||
|
|
||||
|
private static void printMovieList(String source, List<Movie> movies) { |
||||
|
System.out.println("\n" + source + " Movie List (First 30):"); |
||||
|
System.out.println("-".repeat(80)); |
||||
|
System.out.printf("%-6s %-35s %-10s %-12s %-15s %-10s\n", |
||||
|
"No.", "Title", "Rating", "Year", "Director", "Source"); |
||||
|
System.out.println("-".repeat(80)); |
||||
|
|
||||
|
int index = 0; |
||||
|
for (Movie movie : movies) { |
||||
|
if (index >= 30) break; |
||||
|
System.out.printf("%-6d %-35s %-10.1f %-12s %-15s %-10s\n", |
||||
|
index + 1, |
||||
|
truncate(movie.getTitle(), 35), |
||||
|
movie.getRating() != null ? movie.getRating() : 0.0, |
||||
|
movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown", |
||||
|
truncate(movie.getDirector(), 15), |
||||
|
movie.getSource()); |
||||
|
index++; |
||||
|
} |
||||
|
System.out.println("-".repeat(80)); |
||||
|
} |
||||
|
|
||||
|
private static void saveToCSV(List<Movie> allMovies, List<Movie> doubanMovies, |
||||
|
List<Movie> quotesMovies, List<Movie> tencentMovies, |
||||
|
String filename) throws IOException { |
||||
|
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename))) { |
||||
|
// Write header
|
||||
|
writer.write("No.,Title,Rating,Director,Actors,Year,Source"); |
||||
|
writer.newLine(); |
||||
|
|
||||
|
// Write all movies
|
||||
|
writer.newLine(); |
||||
|
writer.write("All Movies (Sorted by Rating)"); |
||||
|
writer.newLine(); |
||||
|
writeMoviesToCSV(writer, allMovies); |
||||
|
|
||||
|
// Write Douban movies
|
||||
|
writer.newLine(); |
||||
|
writer.write("Douban Movies"); |
||||
|
writer.newLine(); |
||||
|
writeMoviesToCSV(writer, doubanMovies); |
||||
|
|
||||
|
// Write Quotes movies
|
||||
|
writer.newLine(); |
||||
|
writer.write("Quotes Movies"); |
||||
|
writer.newLine(); |
||||
|
writeMoviesToCSV(writer, quotesMovies); |
||||
|
|
||||
|
// Write Tencent movies
|
||||
|
writer.newLine(); |
||||
|
writer.write("Tencent Movies"); |
||||
|
writer.newLine(); |
||||
|
writeMoviesToCSV(writer, tencentMovies); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static void writeMoviesToCSV(BufferedWriter writer, List<Movie> movies) throws IOException { |
||||
|
int index = 0; |
||||
|
for (Movie movie : movies) { |
||||
|
String line = String.format("%d,\"%s\",%.1f,\"%s\",\"%s\",\"%s\",\"%s\"", |
||||
|
index + 1, |
||||
|
escapeCSV(movie.getTitle()), |
||||
|
movie.getRating() != null ? movie.getRating() : 0.0, |
||||
|
escapeCSV(movie.getDirector()), |
||||
|
escapeCSV(movie.getActors()), |
||||
|
escapeCSV(movie.getReleaseDate()), |
||||
|
escapeCSV(movie.getSource())); |
||||
|
writer.write(line); |
||||
|
writer.newLine(); |
||||
|
index++; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static String escapeCSV(String str) { |
||||
|
if (str == null || str.isEmpty()) { |
||||
|
return ""; |
||||
|
} |
||||
|
if (str.contains(",") || str.contains("\"") || str.contains("\n")) { |
||||
|
return "\"" + str.replace("\"", "\"\"") + "\""; |
||||
|
} |
||||
|
return str; |
||||
|
} |
||||
|
|
||||
|
private static String truncate(String str, int maxLength) { |
||||
|
if (str == null || str.isEmpty()) { |
||||
|
return ""; |
||||
|
} |
||||
|
return str.length() > maxLength ? str.substring(0, maxLength - 3) + "..." : str; |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue