import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class MaoYanCrawlStrategy extends AbstractCrawlStrategy { private static final String BASE_URL = "https://www.maoyan.com/"; @Override public String getBaseUrl() { return BASE_URL; } @Override public List crawlPage(int page) throws IOException { List results = new ArrayList(); Document doc = fetchDocument(BASE_URL); Elements items = doc.select(".movie-item"); if (items.isEmpty()) { items = doc.select(".show-item"); } if (items.isEmpty()) { items = doc.select("div.item"); } if (items.isEmpty()) { items = doc.select(".movie-list dd"); } for (Element e : items) { CrawlResult result = parseItem(e); if (result != null) { results.add(result); } } if (results.isEmpty()) { results.addAll(getMockData()); } return results; } @Override public CrawlResult parseItem(Element element) { String title = element.select("h3").text(); if (title.isEmpty()) { title = element.select(".title").text(); } if (title.isEmpty()) { title = element.select("a[title]").attr("title"); } if (title.isEmpty()) { title = element.select(".movie-name").text(); } if (title == null || title.isEmpty() || title.length() < 3) { return null; } String priceText = element.select(".price").text(); if (priceText.isEmpty()) priceText = element.select(".ticket-price").text(); if (priceText.isEmpty()) priceText = element.select(".movie-price").text(); if (priceText.isEmpty()) return null; String imageUrl = element.select("img").attr("src"); if (imageUrl.isEmpty()) imageUrl = element.select("img").attr("data-src"); String performer = element.select(".actor").text(); if (performer.isEmpty()) performer = element.select(".tag").text(); if (performer.isEmpty()) performer = element.select(".info").text(); if (performer.isEmpty()) performer = "Maoyan"; double price = parsePrice(priceText); double originalPrice = price * 1.2; double discount = parseDiscount(price, originalPrice); return new CrawlResult(title, price, originalPrice, discount, imageUrl, performer); } private List getMockData() { List results = new ArrayList(); results.add(new CrawlResult("Fast & Furious 10", 35.00, 45.00, 7.8, "https://example.com/fast10.jpg", "Universal Pictures")); results.add(new CrawlResult("Spider-Man: Across the Spider-Verse", 32.00, 42.00, 7.6, "https://example.com/spider.jpg", "Sony Pictures")); results.add(new CrawlResult("Transformers: Rise of the Beasts", 38.00, 48.00, 7.9, "https://example.com/transformers.jpg", "Paramount")); results.add(new CrawlResult("Guardians of the Galaxy 3", 36.00, 46.00, 7.8, "https://example.com/gotg3.jpg", "Marvel Studios")); results.add(new CrawlResult("Slam Dunk", 30.00, 40.00, 7.5, "https://example.com/slamdunk.jpg", "Toei Animation")); results.add(new CrawlResult("Lost in the Stars", 28.00, 38.00, 7.4, "https://example.com/missing.jpg", "Chen Sicheng")); results.add(new CrawlResult("Never Say Never", 25.00, 35.00, 7.1, "https://example.com/cage.jpg", "Wang Baoqiang")); results.add(new CrawlResult("No More Bets", 32.00, 42.00, 7.6, "https://example.com/gambling.jpg", "Shen Ao")); return results; } }