package com.cctv.news.command; import com.cctv.news.view.OutputView; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.util.ArrayList; import java.util.List; public class CrawlCommand implements Command { private final OutputView view; private final List articles; public CrawlCommand(OutputView view, List articles) { this.view = view; this.articles = articles; } @Override public String getName() { return "crawl"; } @Override public String getHelp() { return "crawl - 爬取央视新闻"; } @Override public void execute(String[] args) { view.showMessage("开始爬取央视新闻..."); try { Document doc = Jsoup.connect("https://news.cctv.com/world/") .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") .timeout(10000) .get(); Elements newsItems = doc.select("a.item-title"); if (newsItems.isEmpty()) { newsItems = doc.select("h1 a, h2 a, .news-title"); } List newArticles = new ArrayList<>(); for (Element item : newsItems) { String title = item.text(); String url = item.attr("href"); if (!title.isEmpty() && url.startsWith("http")) { String articleInfo = title + " - " + url; newArticles.add(articleInfo); articles.add(articleInfo); } } if (newArticles.isEmpty()) { view.showMessage("未能获取到文章,尝试其他选择器..."); Elements allLinks = doc.select("a[href]"); for (Element link : allLinks) { String text = link.text().trim(); String href = link.attr("href"); if (text.length() > 10 && href.contains("cctv.com")) { String articleInfo = text + " - " + href; newArticles.add(articleInfo); articles.add(articleInfo); if (newArticles.size() >= 10) break; } } } view.showMessage("成功爬取 " + newArticles.size() + " 篇文章。"); view.showMessage("使用 list 命令可以查看已抓取的文章。"); } catch (IOException e) { view.showError("爬取失败: " + e.getMessage()); } } }