You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
32 lines
774 B
32 lines
774 B
package strategy;
|
|
|
|
import model.Article;
|
|
import util.HttpUtil;
|
|
import exception.SpiderException;
|
|
|
|
public class BaiduStrategy implements CrawlStrategy {
|
|
@Override
|
|
public String getName() {
|
|
return "百度";
|
|
}
|
|
|
|
@Override
|
|
public String getUrl() {
|
|
return "https://www.baidu.com/";
|
|
}
|
|
|
|
@Override
|
|
public Article crawl() throws SpiderException {
|
|
String html = HttpUtil.get(getUrl(), "UTF-8");
|
|
|
|
String title = HttpUtil.extractTagSafe(html, "<title>", "</title>");
|
|
|
|
Article article = new Article();
|
|
article.setTitle(title);
|
|
article.setContent("百度首页");
|
|
article.setUrl(getUrl());
|
|
article.setSource(getName());
|
|
|
|
return article;
|
|
}
|
|
}
|
|
|