import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.ArrayList; import java.util.List; public class MovieCrawler extends BaseMovieCrawler { private static final String DOUBAN_URL = "https://movie.douban.com/top250?start=0"; private static final String XIAOHONGSHU_URL = "https://www.xiaohongshu.com"; private static final String DOUYIN_URL = "https://www.douyin.com"; @Override public List crawl(int limit) { List list = new ArrayList<>(); list.addAll(crawlByType(DOUBAN_URL, "douban", limit)); list.addAll(crawlByType(XIAOHONGSHU_URL, "xiaohongshu", limit)); list.addAll(crawlByType(DOUYIN_URL, "douyin", limit)); return list; } private List crawlByType(String url, String type, int limit) { List res = new ArrayList<>(); try { Document doc = Jsoup.connect(url) .userAgent("Mozilla/5.0") .timeout(10000) .get(); Elements items = doc.select(".item"); if (items.isEmpty()) items = doc.select("div"); int count = 0; for (Element e : items) { if (count >= limit) break; Movie m = parseMovie(e, type); if (m != null) { res.add(m); count++; } } } catch (Exception ex) { System.out.println(type + " 抓取失败(反爬保护),已跳过"); } return res; } @Override protected Movie parseMovie(Element element, String type) { try { if (type.equals("douban")) { String title = element.select(".hd .title").first().text(); double rating = Double.parseDouble(element.select(".rating_num").first().text()); return new TheatreMovie(title, 2024, rating, "电影", 49.9); } if (type.equals("xiaohongshu")) { String title = element.select("h2").first().text(); // 改成你真实的类名 Xiaohongshu return new XiaohongshuMovie(title, 2024, 9.2, "笔记", "小红书用户"); } if (type.equals("douyin")) { String title = element.select("h3").first().text(); // 改成你真实的类名 Douyin return new DouyinMovie(title, 2024, 9.5, "视频", "100w+"); } } catch (Exception e) { return null; } return null; } }