package com.example.datacollect.strategy; import com.example.datacollect.model.Article; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.ArrayList; import java.util.List; public class DefaultStrategy implements CrawlStrategy{ @Override public boolean supports(String url) { return true; // 兜底策略 } @Override public List
parse(String url, Document doc) { List
articles = new ArrayList<>(); // 通用逻辑:提取所有 h1 或 h2 作为标题 Elements titles = doc.select("h1, h2"); for (Element e : titles) { articles.add(new Article(e.text(), url, "")); } return articles; } @Override public int getPriority() { return -1; // 优先级最低 } }