You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
38 lines
1.5 KiB
38 lines
1.5 KiB
package com.yyt.moviecrawler.strategy;
|
|
|
|
import com.yyt.moviecrawler.model.Movie;
|
|
import com.yyt.moviecrawler.model.DoubanMovie;
|
|
import org.openqa.selenium.By;
|
|
import org.openqa.selenium.WebDriver;
|
|
import org.openqa.selenium.WebElement;
|
|
import org.openqa.selenium.chrome.ChromeDriver;
|
|
|
|
import java.util.ArrayList;
|
|
import java.util.List;
|
|
|
|
public class DoubanStrategy implements CrawlerStrategy {
|
|
@Override
|
|
public List<Movie> crawl(int limit) {
|
|
WebDriver driver = new ChromeDriver();
|
|
driver.get("https://movie.douban.com/top250");
|
|
List<Movie> movies = new ArrayList<>();
|
|
try {
|
|
Thread.sleep(3000);
|
|
List<WebElement> elements = driver.findElements(By.cssSelector(".item"));
|
|
for (int i = 0; i < Math.min(limit, elements.size()); i++) {
|
|
WebElement el = elements.get(i);
|
|
String title = el.findElement(By.cssSelector(".title")).getText();
|
|
double score = Double.parseDouble(el.findElement(By.cssSelector(".rating_num")).getText());
|
|
String type = el.findElement(By.cssSelector(".bd p")).getText().split("/")[1].trim();
|
|
String author = el.findElement(By.cssSelector(".bd p")).getText().split("/")[0].trim();
|
|
|
|
movies.add(new DoubanMovie(title, score, type, author));
|
|
}
|
|
} catch (Exception e) {
|
|
e.printStackTrace();
|
|
} finally {
|
|
driver.quit();
|
|
}
|
|
return movies;
|
|
}
|
|
}
|