You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

43 lines
1.5 KiB

package com.yyt.moviecrawler.strategy;
import com.yyt.moviecrawler.model.Book;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class BookStrategy {
public List<Book> crawl(int limit) {
List<Book> books = new ArrayList<>();
try {
Document doc = Jsoup.connect("http://books.toscrape.com/").get();
Elements bookElements = doc.select("article.product_pod");
for (int i = 0; i < Math.min(limit, bookElements.size()); i++) {
Element el = bookElements.get(i);
String title = el.select("h3 a").attr("title");
double price = Double.parseDouble(el.select(".price_color").text().replace("£", ""));
int star = getStarRating(el.select(".star-rating").attr("class"));
String category = "Books to Scrape";
books.add(new Book(title, price, star, category));
}
} catch (IOException e) {
e.printStackTrace();
}
return books;
}
private int getStarRating(String className) {
if (className.contains("One")) return 1;
if (className.contains("Two")) return 2;
if (className.contains("Three")) return 3;
if (className.contains("Four")) return 4;
if (className.contains("Five")) return 5;
return 0;
}
}