You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
30 lines
809 B
30 lines
809 B
package crawler;
|
|
|
|
import org.jsoup.nodes.Document;
|
|
|
|
public class WebPageCrawler extends BaseCrawler {
|
|
|
|
public WebPageCrawler(String url) {
|
|
super(url);
|
|
}
|
|
|
|
@Override
|
|
public void crawl() {
|
|
try {
|
|
Document doc = getDocument();
|
|
String title = doc.title();
|
|
String text = doc.body().text();
|
|
|
|
if (text.length() > 200) {
|
|
text = text.substring(0, 200) + "...";
|
|
}
|
|
|
|
System.out.println("===== 网页爬取完成 =====");
|
|
System.out.println("URL:" + url);
|
|
System.out.println("标题:" + title);
|
|
System.out.println("内容预览:" + text);
|
|
} catch (Exception e) {
|
|
System.err.println("网页爬取失败:" + e.getMessage());
|
|
}
|
|
}
|
|
}
|