package com.yyt.moviecrawler.strategy; import com.yyt.moviecrawler.model.Book; import io.github.bonigarcia.wdm.WebDriverManager; import org.openqa.selenium.By; import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebElement; import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeOptions; import java.util.ArrayList; import java.util.List; import java.time.Duration; public class DoubanBookStrategy { public List crawl(int limit) { List bookList = new ArrayList<>(); // 配置浏览器,伪装成真实用户 ChromeOptions options = new ChromeOptions(); options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/147.0.0.0 Safari/537.36"); options.addArguments("--disable-blink-features=AutomationControlled"); options.addArguments("--no-sandbox"); options.addArguments("--disable-dev-shm-usage"); WebDriverManager.chromedriver().setup(); WebDriver driver = new ChromeDriver(options); driver.manage().timeouts().implicitlyWait(Duration.ofSeconds(5)); try { // 豆瓣读书 → 小说分类页面 String url = "https://book.douban.com/tag/小说?type=T"; driver.get(url); Thread.sleep(3000); // 等待页面加载完成 // 循环爬取,直到拿到limit条数据 while (bookList.size() < limit) { List items = driver.findElements(By.cssSelector(".info")); for (WebElement item : items) { if (bookList.size() >= limit) break; try { // 提取书名 String title = item.findElement(By.cssSelector("h2 a")).getText().trim(); // 提取评分(String类型,如"9.3") String ratingStr = item.findElement(By.cssSelector(".rating_nums")).getText().trim(); // 转换数据,匹配Book类构造器 double price = 0.0; // 豆瓣无价格,用默认值 int starRating = 0; if (!ratingStr.isEmpty()) { starRating = (int) Math.round(Double.parseDouble(ratingStr)); } String category = "小说"; // 按构造器顺序调用:title, price, starRating, category bookList.add(new Book(title, price, starRating, category)); } catch (Exception e) { // 个别元素缺失直接跳过,不影响整体爬取 } } // 如果数据不够,点击下一页继续爬取 if (bookList.size() < limit) { try { WebElement nextBtn = driver.findElement(By.cssSelector(".paginator .next a")); nextBtn.click(); Thread.sleep(3000); } catch (Exception e) { // 没有下一页则退出循环 break; } } } } catch (Exception e) { e.printStackTrace(); } finally { driver.quit(); // 关闭浏览器,释放资源 } System.out.println("✅ 豆瓣读书真实爬取完成,拿到:" + bookList.size() + " 条数据"); return bookList; } }