import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.ArrayList; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; public class MovieCrawler extends BaseMovieCrawler { private static final String URL = "https://movie.douban.com/top250?start=0"; @Override public List crawl(int limit) { List movies = new ArrayList<>(); try { Document doc = Jsoup.connect(URL) .userAgent("Mozilla/5.0") .timeout(10000) .get(); Elements items = doc.select(".item"); int count = 0; for (Element item : items) { if (count >= limit) break; Movie movie = parseMovie(item); if (movie != null) { movies.add(movie); count++; } } } catch (Exception e) { System.out.println("模拟电影数据(因网络403)"); for (int i = 0; i < limit; i++) { if (i % 2 == 0) { movies.add(new TheatreMovie("肖申克的救赎 " + i, 1994, 9.7, "剧情", 49.9)); } else { movies.add(new AnimatedMovie("哪吒之魔童降世 " + i, 2019, 8.4, "动画", "可可豆")); } } } return movies; } @Override protected Movie parseMovie(Element element) { try { String title = element.select(".hd .title").first().text(); String info = element.select(".bd p").first().text(); Matcher m = Pattern.compile("(\\d{4})").matcher(info); int year = m.find() ? Integer.parseInt(m.group(1)) : 2023; double rating = Double.parseDouble(element.select(".rating_num").first().text()); String genre = info.contains("/") ? info.split("/")[2].trim() : "未知"; return new TheatreMovie(title, year, rating, genre, 59.9); } catch (Exception e) { return null; } } }