package crawler;

import org.jsoup.nodes.Document;

public class WebPageCrawler extends BaseCrawler {

    public WebPageCrawler(String url) {
        super(url);
    }

    @Override
    public void crawl() {
        try {
            Document doc = getDocument();
            String title = doc.title();
            String text = doc.body().text();

            if (text.length() > 200) {
                text = text.substring(0, 200) + "...";
            }

            System.out.println("===== 网页爬取完成 =====");
            System.out.println("URL：" + url);
            System.out.println("标题：" + title);
            System.out.println("内容预览：" + text);
        } catch (Exception e) {
            System.err.println("网页爬取失败：" + e.getMessage());
        }
    }
}