From 51b8414b091d3fcd4901371929e06076c6d9a55a Mon Sep 17 00:00:00 2001 From: peisishuang <1767255628@qq.com> Date: Fri, 1 May 2026 19:30:45 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20'=E6=96=B0=E9=97=BB=E7=BD=91=E7=88=AC=E8=99=AB'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 新闻网爬虫/Command.java | 7 +++ 新闻网爬虫/CrawlCommand.java | 79 +++++++++++++++++++++++++++++++ 新闻网爬虫/ExitCommand.java | 27 +++++++++++ 新闻网爬虫/Main.java | 33 +++++++++++++ 4 files changed, 146 insertions(+) create mode 100644 新闻网爬虫/Command.java create mode 100644 新闻网爬虫/CrawlCommand.java create mode 100644 新闻网爬虫/ExitCommand.java create mode 100644 新闻网爬虫/Main.java diff --git a/新闻网爬虫/Command.java b/新闻网爬虫/Command.java new file mode 100644 index 0000000..2645a7e --- /dev/null +++ b/新闻网爬虫/Command.java @@ -0,0 +1,7 @@ +package com.cctv.news.command; + +public interface Command { + String getName(); + String getHelp(); + void execute(String[] args); +} diff --git a/新闻网爬虫/CrawlCommand.java b/新闻网爬虫/CrawlCommand.java new file mode 100644 index 0000000..94da27a --- /dev/null +++ b/新闻网爬虫/CrawlCommand.java @@ -0,0 +1,79 @@ +package com.cctv.news.command; + +import com.cctv.news.view.OutputView; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class CrawlCommand implements Command { + private final OutputView view; + private final List articles; + + public CrawlCommand(OutputView view, List articles) { + this.view = view; + this.articles = articles; + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public String getHelp() { + return "crawl - 爬取央视新闻"; + } + + @Override + public void execute(String[] args) { + view.showMessage("开始爬取央视新闻..."); + try { + Document doc = Jsoup.connect("https://news.cctv.com/world/") + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + .timeout(10000) + .get(); + + Elements newsItems = doc.select("a.item-title"); + if (newsItems.isEmpty()) { + newsItems = doc.select("h1 a, h2 a, .news-title"); + } + + List newArticles = new ArrayList<>(); + for (Element item : newsItems) { + String title = item.text(); + String url = item.attr("href"); + if (!title.isEmpty() && url.startsWith("http")) { + String articleInfo = title + " - " + url; + newArticles.add(articleInfo); + articles.add(articleInfo); + } + } + + if (newArticles.isEmpty()) { + view.showMessage("未能获取到文章,尝试其他选择器..."); + Elements allLinks = doc.select("a[href]"); + for (Element link : allLinks) { + String text = link.text().trim(); + String href = link.attr("href"); + if (text.length() > 10 && href.contains("cctv.com")) { + String articleInfo = text + " - " + href; + newArticles.add(articleInfo); + articles.add(articleInfo); + if (newArticles.size() >= 10) break; + } + } + } + + view.showMessage("成功爬取 " + newArticles.size() + " 篇文章。"); + view.showMessage("使用 list 命令可以查看已抓取的文章。"); + + } catch (IOException e) { + view.showError("爬取失败: " + e.getMessage()); + } + } +} diff --git a/新闻网爬虫/ExitCommand.java b/新闻网爬虫/ExitCommand.java new file mode 100644 index 0000000..dd39068 --- /dev/null +++ b/新闻网爬虫/ExitCommand.java @@ -0,0 +1,27 @@ +package com.cctv.news.command; + +import com.cctv.news.view.OutputView; + +public class ExitCommand implements Command { + private final OutputView view; + + public ExitCommand(OutputView view) { + this.view = view; + } + + @Override + public String getName() { + return "exit"; + } + + @Override + public String getHelp() { + return "exit - 退出程序"; + } + + @Override + public void execute(String[] args) { + view.showMessage("感谢使用央视新闻爬虫,再见!"); + System.exit(0); + } +} diff --git a/新闻网爬虫/Main.java b/新闻网爬虫/Main.java new file mode 100644 index 0000000..2e51884 --- /dev/null +++ b/新闻网爬虫/Main.java @@ -0,0 +1,33 @@ +package com.cctv.news; + +import com.cctv.news.command.*; +import com.cctv.news.controller.CommandController; +import com.cctv.news.view.ConsoleView; +import com.cctv.news.view.OutputView; + +import java.util.ArrayList; +import java.util.List; +import java.util.Scanner; + +public class Main { + public static void main(String[] args) { + OutputView view = new ConsoleView(); + List articles = new ArrayList<>(); + CommandController controller = new CommandController(view); + + controller.registerCommand(new HelpCommand(view)); + controller.registerCommand(new ListCommand(view, articles)); + controller.registerCommand(new CrawlCommand(view, articles)); + controller.registerCommand(new ExitCommand(view)); + + view.showWelcome(); + view.showMessage("输入 help 查看可用命令"); + + Scanner scanner = new Scanner(System.in); + while (true) { + view.showPrompt(); + String input = scanner.nextLine(); + controller.executeCommand(input); + } + } +}