package crawler; import org.jsoup.nodes.Document; public class WebPageCrawler extends BaseCrawler { public WebPageCrawler(String url) { super(url); } @Override public void crawl() { try { Document doc = getDocument(); String title = doc.title(); String text = doc.body().text(); if (text.length() > 200) { text = text.substring(0, 200) + "..."; } System.out.println("===== 网页爬取完成 ====="); System.out.println("URL:" + url); System.out.println("标题:" + title); System.out.println("内容预览:" + text); } catch (Exception e) { System.err.println("网页爬取失败:" + e.getMessage()); } } }