package com.example.moviecli.strategy; import com.example.moviecli.model.Movie; import com.example.moviecli.exception.ParseFailedException; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.ArrayList; import java.util.List; public class DoubanTop250Strategy implements MovieCrawlStrategy { @Override public boolean supports(String url) { return url.contains("movie.douban.com/top250"); } @Override public List parse(Document doc) throws ParseFailedException { try { List movies = new ArrayList<>(); Elements items = doc.select(".item"); for (Element item : items) { String rankText = item.select(".pic em").text(); int rank = Integer.parseInt(rankText); String title = item.select(".title").first().text(); String originalTitle = ""; Elements titles = item.select(".title"); if (titles.size() > 1) { originalTitle = titles.get(1).text().replace("/", "").trim(); } String score = item.select(".rating_num").text(); String info = item.select(".bd p").first().text(); String year = extractYear(info); String director = extractDirector(info); movies.add(new Movie(rank, title, originalTitle, score, year, director)); } return movies; } catch (Exception e) { throw new ParseFailedException("豆瓣电影解析失败", e); } } private String extractYear(String info) { for (String part : info.split(" ")) { if (part.matches("\\d{4}")) return part; } return "未知"; } private String extractDirector(String info) { if (info.contains("导演:")) { int start = info.indexOf("导演:") + 3; int end = info.indexOf(" ", start); if (end == -1) end = info.length(); return info.substring(start, end).trim(); } return "未知"; } }