You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

33 lines
837 B

package strategy;
import model.Article;
import util.HttpUtil;
import exception.SpiderException;
public class HttpBinStrategy implements CrawlStrategy {
@Override
public String getName() {
return "HttpBin";
}
@Override
public String getUrl() {
return "https://httpbin.org/html";
}
@Override
public Article crawl() throws SpiderException {
String html = HttpUtil.get(getUrl(), "UTF-8");
String title = HttpUtil.extractTagSafe(html, "<h1>", "</h1>");
String content = HttpUtil.extractTagSafe(html, "<p>", "</p>");
Article article = new Article();
article.setTitle(title);
article.setContent(content);
article.setUrl(getUrl());
article.setSource(getName());
return article;
}
}