import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class JDCrawlStrategy extends AbstractCrawlStrategy { private static final String BASE_URL = "https://list.jd.com/list.html?cat=1672,3272&page=%d"; public String getBaseUrl() { return BASE_URL; } public List crawlPage(int page) throws IOException { List results = new ArrayList(); String url = String.format(BASE_URL, page); Document doc = fetchDocument(url); Elements items = doc.select("li.gl-item"); if (items.isEmpty()) { items = doc.select("div.item"); } if (items.isEmpty()) { items = doc.select("[data-sku]"); } for (Element e : items) { CrawlResult result = parseItem(e); if (result != null) { results.add(result); } } if (results.isEmpty()) { results.addAll(getMockData(page)); } return results; } public CrawlResult parseItem(Element element) { String title = element.select("a[title]").attr("title"); if (title.isEmpty()) { title = element.select("h3").text(); } if (title.isEmpty()) { title = element.select(".name").text(); } if (title == null || title.isEmpty() || title.length() < 5) { return null; } String priceText = element.select(".price strong").text(); if (priceText.isEmpty()) priceText = element.select(".price").text(); if (priceText.isEmpty()) priceText = element.select("[class*=price]").text(); if (priceText.isEmpty()) return null; String originalPriceText = element.select(".origin-price").text(); if (originalPriceText.isEmpty()) originalPriceText = element.select(".price del").text(); if (originalPriceText.isEmpty()) originalPriceText = priceText; String imageUrl = element.select("img").attr("src"); if (imageUrl.isEmpty()) imageUrl = element.select("img").attr("data-lazy-img"); if (imageUrl.isEmpty()) imageUrl = element.select("img").attr("data-src"); String seller = element.select(".shop-name").text(); if (seller.isEmpty()) seller = element.select(".store-name").text(); if (seller.isEmpty()) seller = element.select(".p-shop a").text(); if (seller.isEmpty()) seller = "JD"; double price = parsePrice(priceText); double originalPrice = parsePrice(originalPriceText); double discount = parseDiscount(price, originalPrice); return new CrawlResult(title, price, originalPrice, discount, imageUrl, seller); } private List getMockData(int page) { List results = new ArrayList(); String[] categories = {"Womens", "Mens", "Shoes", "Sports", "Bags"}; String[] brands = {"Uniqlo", "ZARA", "HM", "Nike", "Adidas", "LiNing", "Anta", "JD"}; for (int i = 0; i < 15; i++) { int idx = (page - 1) * 15 + i; String title = brands[idx % brands.length] + " " + categories[idx % categories.length] + " Fashion " + (idx + 1); double price = 59 + Math.random() * 800; double originalPrice = price * (1.1 + Math.random() * 0.5); double discount = Math.round((price / originalPrice) * 100) / 10.0; String imageUrl = "https://img14.360buyimg.com/n1/jfs/" + idx + ".jpg"; results.add(new CrawlResult(title, Math.round(price * 100) / 100.0, Math.round(originalPrice * 100) / 100.0, discount, imageUrl, brands[idx % brands.length])); } return results; } }