package strategy; import model.Article; import util.HttpUtil; import exception.SpiderException; public class HttpBinStrategy implements CrawlStrategy { @Override public String getName() { return "HttpBin"; } @Override public String getUrl() { return "https://httpbin.org/html"; } @Override public Article crawl() throws SpiderException { String html = HttpUtil.get(getUrl(), "UTF-8"); String title = HttpUtil.extractTagSafe(html, "

", "

"); String content = HttpUtil.extractTagSafe(html, "

", "

"); Article article = new Article(); article.setTitle(title); article.setContent(content); article.setUrl(getUrl()); article.setSource(getName()); return article; } }