5 changed files with 276 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,41 @@ |
|||
|
|||
package com.crawler; |
|||
|
|||
import com.crawler.command.Command; |
|||
import com.crawler.command.CrawlCommand; |
|||
import com.crawler.controller.CrawlerController; |
|||
import com.crawler.view.ConsoleView; |
|||
|
|||
import java.util.Scanner; |
|||
|
|||
public class App { |
|||
public static void main(String[] args) { |
|||
ConsoleView view = new ConsoleView(); |
|||
CrawlerController controller = new CrawlerController(view); |
|||
Command crawlCommand = new CrawlCommand(controller); |
|||
|
|||
view.showWelcome(); |
|||
|
|||
Scanner scanner = new Scanner(System.in); |
|||
boolean running = true; |
|||
|
|||
while (running) { |
|||
view.showMenu(); |
|||
String input = scanner.nextLine().trim(); |
|||
|
|||
switch (input) { |
|||
case "1": |
|||
crawlCommand.execute(); |
|||
break; |
|||
case "2": |
|||
running = false; |
|||
view.showMessage("程序退出"); |
|||
break; |
|||
default: |
|||
view.showError("无效选项,请重新输入"); |
|||
} |
|||
} |
|||
|
|||
scanner.close(); |
|||
} |
|||
} |
|||
@ -0,0 +1,6 @@ |
|||
|
|||
package com.crawler.command; |
|||
|
|||
public interface Command { |
|||
void execute(); |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package com.crawler; |
|||
|
|||
import com.crawler.model.Movie; |
|||
import com.crawler.strategy.impl.DoubanCrawler; |
|||
import com.crawler.strategy.impl.MaoyanCrawler; |
|||
import com.crawler.strategy.impl.TencentCrawler; |
|||
import org.apache.poi.ss.usermodel.*; |
|||
import org.apache.poi.xssf.usermodel.XSSFWorkbook; |
|||
|
|||
import java.io.FileOutputStream; |
|||
import java.io.IOException; |
|||
import java.time.LocalDateTime; |
|||
import java.time.format.DateTimeFormatter; |
|||
import java.util.ArrayList; |
|||
import java.util.Comparator; |
|||
import java.util.List; |
|||
|
|||
public class TestCrawler { |
|||
public static void main(String[] |
|||
@ -0,0 +1,210 @@ |
|||
package com.crawler; |
|||
|
|||
import com.crawler.model.Movie; |
|||
import com.crawler.strategy.impl.DoubanCrawler; |
|||
import com.crawler.strategy.impl.QuotesCrawler; |
|||
import com.crawler.strategy.impl.TencentCrawler; |
|||
|
|||
import java.io.BufferedWriter; |
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.time.LocalDateTime; |
|||
import java.time.format.DateTimeFormatter; |
|||
import java.util.ArrayList; |
|||
import java.util.Comparator; |
|||
import java.util.List; |
|||
|
|||
public class TestCrawler { |
|||
public static void main(String[] args) { |
|||
System.out.println("\n" + "=".repeat(80)); |
|||
System.out.println(" Movie Data Crawler - 3 Sites"); |
|||
System.out.println("=".repeat(80) + "\n"); |
|||
|
|||
List<Movie> allMovies = new ArrayList<>(); |
|||
List<Movie> doubanMovies = new ArrayList<>(); |
|||
List<Movie> quotesMovies = new ArrayList<>(); |
|||
List<Movie> tencentMovies = new ArrayList<>(); |
|||
|
|||
// Crawl Douban
|
|||
System.out.println("\n" + "-".repeat(80)); |
|||
System.out.println("Crawling Douban Movies..."); |
|||
System.out.println("-".repeat(80)); |
|||
try { |
|||
DoubanCrawler doubanCrawler = new DoubanCrawler(); |
|||
doubanMovies = doubanCrawler.crawl(); |
|||
allMovies.addAll(doubanMovies); |
|||
System.out.println("\nDouban crawling completed! Got " + doubanMovies.size() + " movies"); |
|||
printMovieList("Douban", doubanMovies); |
|||
} catch (Exception e) { |
|||
System.err.println("Douban crawling error: " + e.getMessage()); |
|||
} |
|||
|
|||
// Crawl Quotes
|
|||
System.out.println("\n" + "-".repeat(80)); |
|||
System.out.println("Crawling Quotes to Movies..."); |
|||
System.out.println("-".repeat(80)); |
|||
try { |
|||
QuotesCrawler quotesCrawler = new QuotesCrawler(); |
|||
quotesMovies = quotesCrawler.crawl(); |
|||
allMovies.addAll(quotesMovies); |
|||
System.out.println("\nQuotes crawling completed! Got " + quotesMovies.size() + " movies"); |
|||
printMovieList("Quotes", quotesMovies); |
|||
} catch (Exception e) { |
|||
System.err.println("Quotes crawling error: " + e.getMessage()); |
|||
} |
|||
|
|||
// Crawl Tencent
|
|||
System.out.println("\n" + "-".repeat(80)); |
|||
System.out.println("Crawling Tencent Movies..."); |
|||
System.out.println("-".repeat(80)); |
|||
try { |
|||
TencentCrawler tencentCrawler = new TencentCrawler(); |
|||
tencentMovies = tencentCrawler.crawl(); |
|||
allMovies.addAll(tencentMovies); |
|||
System.out.println("\nTencent crawling completed! Got " + tencentMovies.size() + " movies"); |
|||
printMovieList("Tencent", tencentMovies); |
|||
} catch (Exception e) { |
|||
System.err.println("Tencent crawling error: " + e.getMessage()); |
|||
} |
|||
|
|||
// Sort by rating descending
|
|||
allMovies.sort(Comparator.comparing(Movie::getRating).reversed()); |
|||
|
|||
// Show summary
|
|||
System.out.println("\n\n" + "=".repeat(80)); |
|||
System.out.println(" Crawling Summary"); |
|||
System.out.println("=".repeat(80)); |
|||
System.out.println("\nData statistics:"); |
|||
System.out.println(" - Douban: " + doubanMovies.size() + " movies"); |
|||
System.out.println(" - Quotes: " + quotesMovies.size() + " movies"); |
|||
System.out.println(" - Tencent: " + tencentMovies.size() + " movies"); |
|||
System.out.println(" - Total: " + allMovies.size() + " movies"); |
|||
|
|||
System.out.println("\nTop 50 Movies:"); |
|||
System.out.println("-".repeat(80)); |
|||
System.out.printf("%-6s %-35s %-10s %-15s %-12s %-15s %-10s\n", |
|||
"Rank", "Title", "Rating", "Source", "Year", "Director", "Actors"); |
|||
System.out.println("-".repeat(80)); |
|||
|
|||
int count = 0; |
|||
for (Movie movie : allMovies) { |
|||
if (count >= 50) break; |
|||
System.out.printf("%-6d %-35s %-10.1f %-15s %-12s %-15s %-10s\n", |
|||
count + 1, |
|||
truncate(movie.getTitle(), 35), |
|||
movie.getRating() != null ? movie.getRating() : 0.0, |
|||
movie.getSource(), |
|||
movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown", |
|||
truncate(movie.getDirector(), 15), |
|||
truncate(movie.getActors(), 10)); |
|||
count++; |
|||
} |
|||
|
|||
System.out.println("-".repeat(80) + "\n"); |
|||
|
|||
// Save to CSV file
|
|||
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); |
|||
String csvFilename = "movies_" + timestamp + ".csv"; |
|||
try { |
|||
saveToCSV(allMovies, doubanMovies, quotesMovies, tencentMovies, csvFilename); |
|||
System.out.println("Data saved to file: " + csvFilename); |
|||
System.out.println(" - Contains complete data, can be opened directly in Excel"); |
|||
} catch (IOException e) { |
|||
System.err.println("Save CSV error: " + e.getMessage()); |
|||
} |
|||
|
|||
System.out.println("\n" + "=".repeat(80)); |
|||
System.out.println(" Program completed!"); |
|||
System.out.println("=".repeat(80) + "\n"); |
|||
} |
|||
|
|||
private static void printMovieList(String source, List<Movie> movies) { |
|||
System.out.println("\n" + source + " Movie List (First 30):"); |
|||
System.out.println("-".repeat(80)); |
|||
System.out.printf("%-6s %-35s %-10s %-12s %-15s %-10s\n", |
|||
"No.", "Title", "Rating", "Year", "Director", "Source"); |
|||
System.out.println("-".repeat(80)); |
|||
|
|||
int index = 0; |
|||
for (Movie movie : movies) { |
|||
if (index >= 30) break; |
|||
System.out.printf("%-6d %-35s %-10.1f %-12s %-15s %-10s\n", |
|||
index + 1, |
|||
truncate(movie.getTitle(), 35), |
|||
movie.getRating() != null ? movie.getRating() : 0.0, |
|||
movie.getReleaseDate() != null && !movie.getReleaseDate().isEmpty() ? movie.getReleaseDate() : "Unknown", |
|||
truncate(movie.getDirector(), 15), |
|||
movie.getSource()); |
|||
index++; |
|||
} |
|||
System.out.println("-".repeat(80)); |
|||
} |
|||
|
|||
private static void saveToCSV(List<Movie> allMovies, List<Movie> doubanMovies, |
|||
List<Movie> quotesMovies, List<Movie> tencentMovies, |
|||
String filename) throws IOException { |
|||
try (BufferedWriter writer = new BufferedWriter(new FileWriter(filename))) { |
|||
// Write header
|
|||
writer.write("No.,Title,Rating,Director,Actors,Year,Source"); |
|||
writer.newLine(); |
|||
|
|||
// Write all movies
|
|||
writer.newLine(); |
|||
writer.write("All Movies (Sorted by Rating)"); |
|||
writer.newLine(); |
|||
writeMoviesToCSV(writer, allMovies); |
|||
|
|||
// Write Douban movies
|
|||
writer.newLine(); |
|||
writer.write("Douban Movies"); |
|||
writer.newLine(); |
|||
writeMoviesToCSV(writer, doubanMovies); |
|||
|
|||
// Write Quotes movies
|
|||
writer.newLine(); |
|||
writer.write("Quotes Movies"); |
|||
writer.newLine(); |
|||
writeMoviesToCSV(writer, quotesMovies); |
|||
|
|||
// Write Tencent movies
|
|||
writer.newLine(); |
|||
writer.write("Tencent Movies"); |
|||
writer.newLine(); |
|||
writeMoviesToCSV(writer, tencentMovies); |
|||
} |
|||
} |
|||
|
|||
private static void writeMoviesToCSV(BufferedWriter writer, List<Movie> movies) throws IOException { |
|||
int index = 0; |
|||
for (Movie movie : movies) { |
|||
String line = String.format("%d,\"%s\",%.1f,\"%s\",\"%s\",\"%s\",\"%s\"", |
|||
index + 1, |
|||
escapeCSV(movie.getTitle()), |
|||
movie.getRating() != null ? movie.getRating() : 0.0, |
|||
escapeCSV(movie.getDirector()), |
|||
escapeCSV(movie.getActors()), |
|||
escapeCSV(movie.getReleaseDate()), |
|||
escapeCSV(movie.getSource())); |
|||
writer.write(line); |
|||
writer.newLine(); |
|||
index++; |
|||
} |
|||
} |
|||
|
|||
private static String escapeCSV(String str) { |
|||
if (str == null || str.isEmpty()) { |
|||
return ""; |
|||
} |
|||
if (str.contains(",") || str.contains("\"") || str.contains("\n")) { |
|||
return "\"" + str.replace("\"", "\"\"") + "\""; |
|||
} |
|||
return str; |
|||
} |
|||
|
|||
private static String truncate(String str, int maxLength) { |
|||
if (str == null || str.isEmpty()) { |
|||
return ""; |
|||
} |
|||
return str.length() > maxLength ? str.substring(0, maxLength - 3) + "..." : str; |
|||
} |
|||
} |
|||
Loading…
Reference in new issue