4 changed files with 146 additions and 0 deletions
@ -0,0 +1,7 @@ |
|||||
|
package com.cctv.news.command; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
String getHelp(); |
||||
|
void execute(String[] args); |
||||
|
} |
||||
@ -0,0 +1,79 @@ |
|||||
|
package com.cctv.news.command; |
||||
|
|
||||
|
import com.cctv.news.view.OutputView; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class CrawlCommand implements Command { |
||||
|
private final OutputView view; |
||||
|
private final List<String> articles; |
||||
|
|
||||
|
public CrawlCommand(OutputView view, List<String> articles) { |
||||
|
this.view = view; |
||||
|
this.articles = articles; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "crawl"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getHelp() { |
||||
|
return "crawl - 爬取央视新闻"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args) { |
||||
|
view.showMessage("开始爬取央视新闻..."); |
||||
|
try { |
||||
|
Document doc = Jsoup.connect("https://news.cctv.com/world/") |
||||
|
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") |
||||
|
.timeout(10000) |
||||
|
.get(); |
||||
|
|
||||
|
Elements newsItems = doc.select("a.item-title"); |
||||
|
if (newsItems.isEmpty()) { |
||||
|
newsItems = doc.select("h1 a, h2 a, .news-title"); |
||||
|
} |
||||
|
|
||||
|
List<String> newArticles = new ArrayList<>(); |
||||
|
for (Element item : newsItems) { |
||||
|
String title = item.text(); |
||||
|
String url = item.attr("href"); |
||||
|
if (!title.isEmpty() && url.startsWith("http")) { |
||||
|
String articleInfo = title + " - " + url; |
||||
|
newArticles.add(articleInfo); |
||||
|
articles.add(articleInfo); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (newArticles.isEmpty()) { |
||||
|
view.showMessage("未能获取到文章,尝试其他选择器..."); |
||||
|
Elements allLinks = doc.select("a[href]"); |
||||
|
for (Element link : allLinks) { |
||||
|
String text = link.text().trim(); |
||||
|
String href = link.attr("href"); |
||||
|
if (text.length() > 10 && href.contains("cctv.com")) { |
||||
|
String articleInfo = text + " - " + href; |
||||
|
newArticles.add(articleInfo); |
||||
|
articles.add(articleInfo); |
||||
|
if (newArticles.size() >= 10) break; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
view.showMessage("成功爬取 " + newArticles.size() + " 篇文章。"); |
||||
|
view.showMessage("使用 list 命令可以查看已抓取的文章。"); |
||||
|
|
||||
|
} catch (IOException e) { |
||||
|
view.showError("爬取失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
package com.cctv.news.command; |
||||
|
|
||||
|
import com.cctv.news.view.OutputView; |
||||
|
|
||||
|
public class ExitCommand implements Command { |
||||
|
private final OutputView view; |
||||
|
|
||||
|
public ExitCommand(OutputView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getHelp() { |
||||
|
return "exit - 退出程序"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args) { |
||||
|
view.showMessage("感谢使用央视新闻爬虫,再见!"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,33 @@ |
|||||
|
package com.cctv.news; |
||||
|
|
||||
|
import com.cctv.news.command.*; |
||||
|
import com.cctv.news.controller.CommandController; |
||||
|
import com.cctv.news.view.ConsoleView; |
||||
|
import com.cctv.news.view.OutputView; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class Main { |
||||
|
public static void main(String[] args) { |
||||
|
OutputView view = new ConsoleView(); |
||||
|
List<String> articles = new ArrayList<>(); |
||||
|
CommandController controller = new CommandController(view); |
||||
|
|
||||
|
controller.registerCommand(new HelpCommand(view)); |
||||
|
controller.registerCommand(new ListCommand(view, articles)); |
||||
|
controller.registerCommand(new CrawlCommand(view, articles)); |
||||
|
controller.registerCommand(new ExitCommand(view)); |
||||
|
|
||||
|
view.showWelcome(); |
||||
|
view.showMessage("输入 help 查看可用命令"); |
||||
|
|
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
while (true) { |
||||
|
view.showPrompt(); |
||||
|
String input = scanner.nextLine(); |
||||
|
controller.executeCommand(input); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue