You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
33 lines
837 B
33 lines
837 B
package strategy;
|
|
|
|
import model.Article;
|
|
import util.HttpUtil;
|
|
import exception.SpiderException;
|
|
|
|
public class HttpBinStrategy implements CrawlStrategy {
|
|
@Override
|
|
public String getName() {
|
|
return "HttpBin";
|
|
}
|
|
|
|
@Override
|
|
public String getUrl() {
|
|
return "https://httpbin.org/html";
|
|
}
|
|
|
|
@Override
|
|
public Article crawl() throws SpiderException {
|
|
String html = HttpUtil.get(getUrl(), "UTF-8");
|
|
|
|
String title = HttpUtil.extractTagSafe(html, "<h1>", "</h1>");
|
|
String content = HttpUtil.extractTagSafe(html, "<p>", "</p>");
|
|
|
|
Article article = new Article();
|
|
article.setTitle(title);
|
|
article.setContent(content);
|
|
article.setUrl(getUrl());
|
|
article.setSource(getName());
|
|
|
|
return article;
|
|
}
|
|
}
|
|
|