You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

30 lines
809 B

package crawler;
import org.jsoup.nodes.Document;
public class WebPageCrawler extends BaseCrawler {
public WebPageCrawler(String url) {
super(url);
}
@Override
public void crawl() {
try {
Document doc = getDocument();
String title = doc.title();
String text = doc.body().text();
if (text.length() > 200) {
text = text.substring(0, 200) + "...";
}
System.out.println("===== 网页爬取完成 =====");
System.out.println("URL:" + url);
System.out.println("标题:" + title);
System.out.println("内容预览:" + text);
} catch (Exception e) {
System.err.println("网页爬取失败:" + e.getMessage());
}
}
}