You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

59 lines
2.2 KiB

package com.example.moviecli.strategy;
import com.example.moviecli.model.Movie;
import com.example.moviecli.exception.ParseFailedException;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class DoubanTop250Strategy implements MovieCrawlStrategy {
@Override
public boolean supports(String url) {
return url.contains("movie.douban.com/top250");
}
@Override
public List<Movie> parse(Document doc) throws ParseFailedException {
try {
List<Movie> movies = new ArrayList<>();
Elements items = doc.select(".item");
for (Element item : items) {
String rankText = item.select(".pic em").text();
int rank = Integer.parseInt(rankText);
String title = item.select(".title").first().text();
String originalTitle = "";
Elements titles = item.select(".title");
if (titles.size() > 1) {
originalTitle = titles.get(1).text().replace("/", "").trim();
}
String score = item.select(".rating_num").text();
String info = item.select(".bd p").first().text();
String year = extractYear(info);
String director = extractDirector(info);
movies.add(new Movie(rank, title, originalTitle, score, year, director));
}
return movies;
} catch (Exception e) {
throw new ParseFailedException("豆瓣电影解析失败", e);
}
}
private String extractYear(String info) {
for (String part : info.split(" ")) {
if (part.matches("\\d{4}")) return part;
}
return "未知";
}
private String extractDirector(String info) {
if (info.contains("导演:")) {
int start = info.indexOf("导演:") + 3;
int end = info.indexOf(" ", start);
if (end == -1) end = info.length();
return info.substring(start, end).trim();
}
return "未知";
}
}