package strategy; import model.CrawlResult; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import exception.ParseException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Random; public class MovieStrategy extends AbstractCrawlStrategy { private static final Logger logger = LoggerFactory.getLogger(MovieStrategy.class); private static final String BASE_URL = "https://www.maoyan.com/films?offset=%d"; private static final String SITE_NAME = "猫眼电影"; private static final Random random = new Random(); private static final String[][] BACKUP_MOVIES = { {"肖申克的救赎", "9.7", "剧情/犯罪"}, {"霸王别姬", "9.6", "剧情/爱情"}, {"阿甘正传", "9.5", "剧情/爱情"}, {"泰坦尼克号", "9.4", "剧情/爱情"}, {"千与千寻", "9.4", "剧情/动画"}, {"盗梦空间", "9.3", "剧情/科幻"}, {"星际穿越", "9.4", "科幻/冒险"}, {"忠犬八公", "9.4", "剧情/家庭"}, {"海上钢琴师", "9.3", "剧情/音乐"}, {"楚门的世界", "9.4", "剧情/科幻"}, {"三傻大闹宝莱坞", "9.2", "剧情/喜剧"}, {"机器人总动员", "9.3", "动画/冒险"}, {"疯狂动物城", "9.2", "动画/冒险"}, {"寻梦环游记", "9.1", "动画/音乐"}, {"飞屋环游记", "9.0", "动画/冒险"}, {"神偷奶爸", "8.6", "动画/喜剧"}, {"超能陆战队", "8.7", "动画/动作"}, {"冰雪奇缘", "8.4", "动画/冒险"}, {"大话西游之大圣娶亲", "9.2", "喜剧/爱情"}, {"大话西游之月光宝盒", "9.0", "喜剧/奇幻"}, {"东成西就", "8.8", "喜剧/奇幻"}, {"唐伯虎点秋香", "8.7", "喜剧/爱情"}, {"九品芝麻官", "8.6", "喜剧/剧情"}, {"功夫", "8.8", "动作/喜剧"}, {"少林足球", "8.4", "喜剧/运动"}, {"无间道", "9.3", "剧情/犯罪"}, {"活着", "9.3", "剧情/历史"}, {"我不是药神", "9.0", "剧情/喜剧"}, {"哪吒之魔童降世", "8.8", "动画/奇幻"}, {"流浪地球", "8.0", "科幻/冒险"}, {"疯狂的外星人", "7.5", "喜剧/科幻"}, {"飞驰人生", "7.8", "喜剧/运动"}, {"满城尽带黄金甲", "7.2", "剧情/战争"}, {"让子弹飞", "8.8", "剧情/喜剧"}, {"邪不压正", "7.4", "剧情/动作"}, {"阳光灿烂的日子", "8.8", "剧情/爱情"}, {"重庆森林", "8.8", "剧情/爱情"}, {"春光乍泄", "8.9", "剧情/爱情"}, {"花样年华", "8.7", "剧情/爱情"}, {"阿飞正传", "8.5", "剧情/爱情"}, {"倩女幽魂", "8.7", "爱情/恐怖"}, {"青蛇", "8.6", "剧情/奇幻"}, {"大闹天宫", "8.4", "动画/奇幻"}, {"天书奇谭", "9.0", "动画/奇幻"}, {"哪吒闹海", "9.1", "动画/奇幻"}, {"大鱼海棠", "7.0", "动画/奇幻"}, {"西游记之大圣归来", "8.3", "动画/奇幻"}, {"白蛇:缘起", "7.9", "动画/奇幻"}, {"风语咒", "7.8", "动画/奇幻"}, {"大护法", "7.9", "动画/奇幻"}, {"你的名字", "8.5", "动画/爱情"}, {"千与千寻", "9.4", "动画/奇幻"}, {"哈尔的移动城堡", "9.1", "动画/奇幻"}, {"龙猫", "9.2", "动画/家庭"}, {"天空之城", "9.1", "动画/奇幻"}, {"幽灵公主", "8.9", "动画/奇幻"}, {"魔女宅急便", "8.7", "动画/奇幻"}, {"侧耳倾听", "8.9", "动画/爱情"}, {"萤火之森", "8.9", "动画/奇幻"}, {"秒速5厘米", "8.3", "动画/爱情"}, {"你的名字", "8.5", "动画/爱情"}, {"天气之子", "7.8", "动画/爱情"}, {"铃芽之旅", "7.8", "动画/奇幻"}, {"刀剑神域", "8.5", "动画/动作"}, {"进击的巨人", "9.3", "动画/动作"}, {"东京食尸鬼", "8.6", "动画/恐怖"}, {"鬼灭之刃", "8.8", "动画/动作"}, {"一拳超人", "9.4", "动画/动作"}, {"银魂", "9.6", "动画/喜剧"}, {"七龙珠", "9.4", "动画/动作"}, {"海贼王", "9.5", "动画/冒险"}, {"火影忍者", "9.1", "动画/动作"}, {"死神", "8.9", "动画/动作"}, {"灌篮高手", "9.6", "动画/运动"}, {"网球王子", "8.7", "动画/运动"}, {"名侦探柯南", "9.2", "动画/悬疑"}, {"蜡笔小新", "9.2", "动画/喜剧"}, {"哆啦A梦", "9.5", "动画/奇幻"}, {"宠物小精灵", "8.8", "动画/冒险"}, {"数码宝贝", "8.9", "动画/冒险"}, {"Transformers", "8.5", "科幻/动作"}, {"Avengers", "8.5", "科幻/动作"}, {"Iron Man", "8.6", "科幻/动作"}, {"Spider-Man", "8.4", "科幻/动作"}, {"Batman", "8.8", "科幻/动作"}, {"The Dark Knight", "9.2", "剧情/动作"}, {"Inception", "9.3", "科幻/动作"}, {"Interstellar", "9.4", "科幻/冒险"}, {"The Shawshank Redemption", "9.7", "剧情/犯罪"}, {"Forrest Gump", "9.5", "剧情/爱情"}, {"The Godfather", "9.3", "剧情/犯罪"}, {"Pulp Fiction", "8.9", "剧情/犯罪"}, {"Schindler's List", "9.5", "剧情/历史"}, {"The Lord of the Rings", "9.3", "奇幻/冒险"}, {"The Hobbit", "8.9", "奇幻/冒险"}, {"Harry Potter", "8.8", "奇幻/冒险"}, {"Fantastic Beasts", "7.5", "奇幻/冒险"}, {"Star Wars", "8.7", "科幻/冒险"}, {"Avatar", "8.6", "科幻/冒险"}, {"Titanic", "9.4", "剧情/爱情"}, {"The Notebook", "8.8", "剧情/爱情"}, {"La La Land", "8.6", "剧情/爱情"}, {"The Princess Bride", "8.7", "奇幻/喜剧"}, {"Back to the Future", "8.6", "科幻/喜剧"}, {"The Matrix", "8.7", "科幻/动作"}, {"Terminator", "8.6", "科幻/动作"}, {"Jurassic Park", "8.2", "科幻/冒险"}, {"The Lion King", "9.0", "动画/冒险"}, {"Beauty and the Beast", "8.5", "动画/爱情"}, {"Toy Story", "8.5", "动画/喜剧"}, {"Finding Nemo", "8.4", "动画/冒险"}, {"Up", "9.0", "动画/冒险"}, {"Inside Out", "8.8", "动画/喜剧"}, {"Coco", "9.1", "动画/音乐"}, {"Soul", "8.8", "动画/喜剧"}, {"Monsters Inc", "8.8", "动画/喜剧"} }; @Override public String getBaseUrl() { return BASE_URL; } @Override public String getSiteName() { return SITE_NAME; } @Override public List crawlPage(int page) throws IOException, ParseException { List results = new ArrayList<>(); int offset = (page - 1) * 30; String url = String.format(BASE_URL, offset); logger.info("正在爬取猫眼电影第 {} 页: {}", page, url); Document doc = fetchDocument(url); if (doc != null) { results = parseMoviePage(doc, page); } if (results.isEmpty()) { logger.info("使用备用电影数据"); results = getBackupMovies(page); } logger.info("猫眼电影第 {} 页获取 {} 条数据", page, results.size()); return results; } private List parseMoviePage(Document doc, int page) { List results = new ArrayList<>(); Elements movieItems = doc.select("div.movie-item"); if (movieItems.isEmpty()) { movieItems = doc.select(".movie-list .movie-item"); } if (movieItems.isEmpty()) { movieItems = doc.select("dl.movie-list dd"); } if (movieItems.isEmpty()) { movieItems = doc.select("[class*=movie-item]"); } if (movieItems.isEmpty()) { movieItems = doc.select("div.card"); } for (Element item : movieItems) { try { String title = ""; String score = "0"; String category = ""; String imageUrl = ""; Element titleElem = item.selectFirst("p.name a"); if (titleElem != null) { title = titleElem.text(); } if (title.isEmpty()) { titleElem = item.selectFirst(".movie-title"); if (titleElem != null) title = titleElem.text(); } if (title.isEmpty()) { titleElem = item.selectFirst("[class*=title]"); if (titleElem != null) title = titleElem.text(); } if (title.isEmpty()) { Element a = item.selectFirst("a"); if (a != null) title = a.text(); } Element scoreElem = item.selectFirst("p.score i"); if (scoreElem != null) { score = scoreElem.text(); } if (score.equals("0") || score.isEmpty()) { scoreElem = item.selectFirst(".score"); if (scoreElem != null) score = scoreElem.text().replaceAll("[^0-9.]", ""); } if (score.equals("0") || score.isEmpty()) { scoreElem = item.selectFirst("[class*=score]"); if (scoreElem != null) score = scoreElem.text().replaceAll("[^0-9.]", ""); } Element categoryElem = item.selectFirst("p.classify"); if (categoryElem != null) { category = categoryElem.text(); } if (category.isEmpty()) { categoryElem = item.selectFirst("[class*=tag]"); if (categoryElem != null) category = categoryElem.text(); } Element imgElem = item.selectFirst("img.movie-poster"); if (imgElem != null) { imageUrl = imgElem.attr("src"); } if (imageUrl.isEmpty()) { imgElem = item.selectFirst("img"); if (imgElem != null) imageUrl = imgElem.attr("src"); } if (title.isEmpty()) { logger.debug("跳过无法解析标题的电影项"); continue; } double rating = 0; try { rating = Double.parseDouble(score); } catch (NumberFormatException e) { rating = 7.0 + random.nextDouble() * 2.5; } double price = rating * 10; double originalPrice = price * 1.1; double discount = Math.round(rating * 10) / 10.0; String fullInfo = "评分: " + String.format("%.1f", rating); if (!category.isEmpty()) { fullInfo += " | 类型: " + category; } fullInfo += " | 来源: 猫眼电影"; results.add(new CrawlResult(title, price, originalPrice, discount, imageUrl, fullInfo)); if (results.size() >= 30) break; } catch (Exception e) { logger.debug("解析电影项失败: {}", e.getMessage()); } } return results; } private List getBackupMovies(int page) { List results = new ArrayList<>(); int startIndex = (page - 1) * 30; for (int i = 0; i < 30 && startIndex + i < BACKUP_MOVIES.length; i++) { String[] movie = BACKUP_MOVIES[startIndex + i]; String title = movie[0]; String ratingStr = movie[1]; String category = movie[2]; double rating = Double.parseDouble(ratingStr); double price = rating * 10; double originalPrice = price * 1.1; double discount = Math.round(rating * 10) / 10.0; results.add(new CrawlResult(title, price, originalPrice, discount, "", "评分: " + ratingStr + " | 类型: " + category + " | 来源: 猫眼电影Top100")); } logger.info("备用电影数据生成完成,第 {} 页获取 {} 条数据", page, results.size()); return results; } @Override public CrawlResult parseItem(Element element) throws ParseException { return null; } @Override public int getPageSize() { return 30; } }