You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
25 lines
666 B
25 lines
666 B
package com.example.datacollect.strategy;
|
|
|
|
import com.example.datacollect.exception.ParseException;
|
|
import com.example.datacollect.model.Article;
|
|
import org.jsoup.nodes.Document;
|
|
|
|
import java.util.List;
|
|
|
|
public interface CrawlStrategy {
|
|
/**
|
|
* 解析文档并提取文章列表
|
|
* @param url 原始URL
|
|
* @param doc Jsoup文档对象
|
|
* @return 文章列表
|
|
* @throws ParseException 解析失败时抛出
|
|
*/
|
|
List<Article> parse(String url, Document doc) throws ParseException;
|
|
|
|
/**
|
|
* 判断该策略是否支持指定的URL
|
|
* @param url 目标URL
|
|
* @return 是否支持
|
|
*/
|
|
boolean supports(String url);
|
|
}
|