You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

78 lines
2.5 KiB

package com.movieratings.crawler.strategy;
import com.movieratings.exception.CrawlerException;
import com.movieratings.model.Movie;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class BoxOfficeMojoCrawlerStrategy extends AbstractCrawlerStrategy {
private static final String URL = "https://www.boxofficemojo.com/chart/top_lifetime_gross/";
@Override
public String getSiteName() {
return "Box Office Mojo";
}
@Override
public List<Movie> crawl(int limit) {
try {
Document doc = connection(URL).get();
List<Movie> movies = new ArrayList<>();
for (Element row : doc.select("tr")) {
if (movies.size() >= limit) {
break;
}
Elements cols = row.select("td");
if (cols.size() < 4) {
continue;
}
Movie movie = parseRow(cols);
if (movie.getTitle() != null && !movie.getTitle().isBlank()) {
movies.add(movie);
}
}
return movies;
} catch (IOException e) {
throw new CrawlerException("Failed to crawl " + getSiteName(), e);
}
}
private Movie parseRow(Elements cols) {
int rank = parseCount(cols.get(0).text());
Movie movie = new Movie();
movie.setRank(rank);
movie.setTitle(cols.get(1).text().trim());
movie.setBoxOffice(parseMoney(cols.get(2).text()));
movie.setReleaseYear(parseYear(cols.get(3).text()));
movie.setRating(estimateRating(rank));
movie.setDirector("Unknown");
movie.setCountry("United States");
movie.setReviewCount(0);
movie.setPosterUrl("");
movie.setQuote("Box Office Mojo lifetime gross chart entry");
movie.setType("Movie");
movie.setSourceSite(getSiteName());
return movie;
}
private double parseMoney(String value) {
if (value == null || value.isBlank()) {
return 0.0;
}
String normalized = value.replace("$", "").replace(",", "").trim();
try {
return Double.parseDouble(normalized);
} catch (NumberFormatException e) {
return 0.0;
}
}
private double estimateRating(int rank) {
return Math.max(7.0, 8.8 - rank * 0.01);
}
}