You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
59 lines
2.7 KiB
59 lines
2.7 KiB
package com.yyt.moviecrawler;
|
|
|
|
import com.yyt.moviecrawler.model.Book;
|
|
import com.yyt.moviecrawler.model.Movie;
|
|
import com.yyt.moviecrawler.model.NewsArticle;
|
|
import com.yyt.moviecrawler.strategy.BookStrategy;
|
|
import com.yyt.moviecrawler.strategy.CrawlerStrategy;
|
|
import com.yyt.moviecrawler.strategy.DoubanStrategy;
|
|
import com.yyt.moviecrawler.strategy.XiaohongshuStrategy;
|
|
import com.yyt.moviecrawler.util.CrawlerContext;
|
|
import com.yyt.moviecrawler.util.ExcelExporter;
|
|
import com.yyt.moviecrawler.strategy.DoubanBookStrategy;
|
|
|
|
import java.util.List;
|
|
import java.util.Scanner;
|
|
|
|
public class Main {
|
|
public static void main(String[] args) {
|
|
CrawlerContext context = new CrawlerContext();
|
|
Scanner scanner = new Scanner(System.in);
|
|
int crawlNum = 20;
|
|
|
|
// 1. 豆瓣电影(已正常工作)
|
|
System.out.println("=====================================");
|
|
System.out.println("开始爬取【豆瓣电影】数据");
|
|
CrawlerStrategy doubanStrategy = new DoubanStrategy();
|
|
context.setStrategy(doubanStrategy);
|
|
List<Movie> doubanMovieList = context.executeStrategy(crawlNum);
|
|
ExcelExporter.exportMovies(doubanMovieList, "豆瓣电影数据.xlsx");
|
|
|
|
// 2. 小红书(修复:可爬20条,且不会被前面的异常中断)
|
|
System.out.println("\n=====================================");
|
|
System.out.println("开始爬取【小红书】数据");
|
|
System.out.println("请在弹出的浏览器中完成登录,登录完毕后按下回车键继续...");
|
|
scanner.nextLine();
|
|
CrawlerStrategy xhsStrategy = new XiaohongshuStrategy();
|
|
context.setStrategy(xhsStrategy);
|
|
List<Movie> xhsMovieList = context.executeStrategy(crawlNum);
|
|
ExcelExporter.exportMovies(xhsMovieList, "小红书数据.xlsx");
|
|
|
|
// 3. 图书网站
|
|
System.out.println("\n=====================================");
|
|
System.out.println("开始爬取【图书网站】数据");
|
|
BookStrategy bookStrategy = new BookStrategy();
|
|
List<Book> bookList = bookStrategy.crawl(crawlNum);
|
|
ExcelExporter.exportBooks(bookList, "图书数据.xlsx");
|
|
|
|
// 4. 豆瓣读书(真实爬取,非模拟)
|
|
System.out.println("\n=====================================");
|
|
System.out.println("开始爬取【豆瓣读书】数据");
|
|
DoubanBookStrategy doubanBookStrategy = new DoubanBookStrategy();
|
|
List<Book> doubanBookList = doubanBookStrategy.crawl(crawlNum);
|
|
ExcelExporter.exportBooks(doubanBookList, "豆瓣读书数据.xlsx");
|
|
|
|
System.out.println("\n=====================================");
|
|
System.out.println("🎉 所有爬虫任务执行完毕!");
|
|
scanner.close();
|
|
}
|
|
}
|