diff --git a/project/danmaku-crawler/202506050223何欣蓉期末实验报告.docx b/project/danmaku-crawler/202506050223何欣蓉期末实验报告.docx
new file mode 100644
index 0000000..4d500bd
Binary files /dev/null and b/project/danmaku-crawler/202506050223何欣蓉期末实验报告.docx differ
diff --git a/project/danmaku-crawler/pom.xml b/project/danmaku-crawler/pom.xml
new file mode 100644
index 0000000..66a6456
--- /dev/null
+++ b/project/danmaku-crawler/pom.xml
@@ -0,0 +1,97 @@
+
+ 4.0.0
+
+ com.bilibili
+ danmaku-crawler
+ 1.0-SNAPSHOT
+
+
+ 21
+ 21
+ UTF-8
+
+
+
+
+ junit
+ junit
+ 4.13.2
+ test
+
+
+ org.apache.httpcomponents.client5
+ httpclient5
+ 5.3
+
+
+ com.alibaba
+ fastjson
+ 2.0.32
+
+
+ org.jsoup
+ jsoup
+ 1.17.2
+
+
+ org.dom4j
+ dom4j
+ 2.1.3
+
+
+ org.slf4j
+ slf4j-api
+ 1.7.36
+
+
+ org.slf4j
+ slf4j-simple
+ 1.7.36
+ runtime
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.11.0
+
+ 21
+ 21
+
+
+
+ org.apache.maven.plugins
+ maven-surefire-plugin
+ 3.2.5
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.6.0
+
+
+
+ com.danmaku.DanmakuCrawlerApp
+
+
+
+ jar-with-dependencies
+
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/project/danmaku-crawler/run.bat b/project/danmaku-crawler/run.bat
new file mode 100644
index 0000000..4862f10
--- /dev/null
+++ b/project/danmaku-crawler/run.bat
@@ -0,0 +1,4 @@
+@echo off
+cd /d "%~dp0"
+java -cp "target/classes;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\client5\httpclient5\5.3\httpclient5-5.3.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\core5\httpcore5\5.2.4\httpcore5-5.2.4.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\core5\httpcore5-h2\5.2.4\httpcore5-h2-5.2.4.jar;C:\Users\Administrator\.m2\repository\commons-codec\commons-codec\1.16.0\commons-codec-1.16.0.jar;C:\Users\Administrator\.m2\repository\commons-logging\commons-logging\1.3.0\commons-logging-1.3.0.jar;C:\Users\Administrator\.m2\repository\org\slf4j\slf4j-api\1.7.36\slf4j-api-1.7.36.jar;C:\Users\Administrator\.m2\repository\com\alibaba\fastjson\2.0.32\fastjson-2.0.32.jar;C:\Users\Administrator\.m2\repository\org\jsoup\jsoup\1.17.2\jsoup-1.17.2.jar;C:\Users\Administrator\.m2\repository\org\dom4j\dom4j\2.1.3\dom4j-2.1.3.jar;C:\Users\Administrator\.m2\repository\org\slf4j\slf4j-simple\1.7.36\slf4j-simple-1.7.36.jar" com.danmaku.DanmakuCrawlerApp
+pause
\ No newline at end of file
diff --git a/project/danmaku-crawler/src/main/java/com/bilibili/DanmakuCrawler$Danmaku.class b/project/danmaku-crawler/src/main/java/com/bilibili/DanmakuCrawler$Danmaku.class
new file mode 100644
index 0000000..fb900a4
Binary files /dev/null and b/project/danmaku-crawler/src/main/java/com/bilibili/DanmakuCrawler$Danmaku.class differ
diff --git a/project/danmaku-crawler/src/main/java/com/bilibili/DanmakuCrawler.class b/project/danmaku-crawler/src/main/java/com/bilibili/DanmakuCrawler.class
new file mode 100644
index 0000000..1ace0e3
Binary files /dev/null and b/project/danmaku-crawler/src/main/java/com/bilibili/DanmakuCrawler.class differ
diff --git a/project/danmaku-crawler/src/main/java/com/bilibili/DanmakuCrawler.java b/project/danmaku-crawler/src/main/java/com/bilibili/DanmakuCrawler.java
new file mode 100644
index 0000000..42c302d
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/bilibili/DanmakuCrawler.java
@@ -0,0 +1,201 @@
+package com.bilibili;
+
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+import org.apache.hc.core5.http.ParseException;
+import java.io.IOException;
+import java.util.*;
+
+// 弹幕实体类
+class Danmaku {
+ private String content;
+ private double time;
+ private int type;
+ private int size;
+ private int color;
+ private long timestamp;
+ private int pool;
+
+ public String getContent() { return content; }
+ public void setContent(String content) { this.content = content; }
+ public double getTime() { return time; }
+ public void setTime(double time) { this.time = time; }
+ public int getType() { return type; }
+ public void setType(int type) { this.type = type; }
+ public int getSize() { return size; }
+ public void setSize(int size) { this.size = size; }
+ public int getColor() { return color; }
+ public void setColor(int color) { this.color = color; }
+ public long getTimestamp() { return timestamp; }
+ public void setTimestamp(long timestamp) { this.timestamp = timestamp; }
+ public int getPool() { return pool; }
+ public void setPool(int pool) { this.pool = pool; }
+}
+
+// 弹幕数据源接口
+interface DanmakuSource {
+ String getCidByVideoId(String videoId) throws IOException, ParseException;
+ List getDanmakuByCid(String cid) throws IOException, ParseException;
+}
+
+// B站弹幕数据源实现
+class BilibiliDanmakuSource implements DanmakuSource {
+ private static final String BILI_API_URL = "https://api.bilibili.com/x/web-interface/view?bvid=";
+ private static final String DANMAKU_URL = "https://api.bilibili.com/x/v2/dm/web/seg.so?type=1&oid=";
+
+ @Override
+ public String getCidByVideoId(String bvid) throws IOException, ParseException {
+ CloseableHttpClient httpClient = HttpClients.createDefault();
+ HttpGet httpGet = new HttpGet(BILI_API_URL + bvid);
+ httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
+ httpGet.setHeader("Referer", "https://www.bilibili.com/");
+ httpGet.setHeader("Accept", "application/json, text/plain, */*");
+ httpGet.setHeader("Cookie", "enable_web_push=DISABLE; header_theme_version=CLOSE; enable_feed_channel=ENABLE; DedeUserID=391377162; DedeUserID__ckMd5=0640e990eda21b7b; buvid3=1D162F34-789F-00E6-5771-CEE3CE3E335270874infoc; b_nut=1746351170; _uuid=10D6783D6-8BDD-97BB-192E-321023F826E2875277infoc; buvid_fp=4473fb16f468ffa9d97407ec6fb67ef5; theme-tip-show=SHOWED; theme-avatar-tip-show=SHOWED; buvid4=30315833-CDB4-37E3-9981-50EE6A3201A994284-025081816-AUhOobBTgl0D1i9s696twSinylqiDskbfxaHX43k9VBl4WxfBo25uYK8pNPyrYdg; rpdid=0zbfVFXl5V|d71fBrnp|4EG|3w1UNVnN; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzcxMTY1ODgsImlhdCI6MTc3Njg1NzMyOCwicGx0IjotMX0.w7VJeFaCnJf3JQeIeKvV-cX2nkPn5UPfuRPd7Fh4De0; bili_ticket_expires=1777116528; SESSDATA=3d4b9245%2C1792409390%2C3fbeb%2A42CjA8fMmRpb27ucXG80TIUn07GoSbyIMNl9M0hbxZRBIE3QeUUyMh0eJk9In06QTdDt0SVnVYTkV0ZGhkdzR5ZTJnTEFKNFplSDdTWERsTDZCWDYyVXNmNWhPVU1kLUxzWEx4Ri14a2R5cFMwSXFNYUxsRXNTMXAyRlhvcnNvRTVGWDZrV3dnS1N3IIEC; bili_jct=8c46b46d28c92fe84321dab06e91d601; sid=8m9aolp3; CURRENT_QUALITY=80; bp_t_offset_391377162=1195072017297047552; CURRENT_FNVAL=2000; b_lsid=2402F8CE_19DC2B0E1EE; home_feed_column=4; browser_resolution=1253-822");
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8");
+ System.out.println("API响应预览: " + responseBody.substring(0, Math.min(200, responseBody.length())));
+
+ int cidStart = responseBody.indexOf("\"cid\":");
+ if (cidStart == -1) return null;
+ cidStart += 6;
+ int cidEnd = responseBody.indexOf(",", cidStart);
+ if (cidEnd == -1) cidEnd = responseBody.indexOf("}", cidStart);
+ return responseBody.substring(cidStart, cidEnd).trim();
+ }
+ }
+
+ @Override
+ public List getDanmakuByCid(String cid) throws IOException, ParseException {
+ CloseableHttpClient httpClient = HttpClients.createDefault();
+ HttpGet httpGet = new HttpGet(DANMAKU_URL + cid + "&segment_index=1");
+ httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
+ httpGet.setHeader("Referer", "https://www.bilibili.com/");
+ httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
+ httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
+ httpGet.setHeader("Cookie", "enable_web_push=DISABLE; header_theme_version=CLOSE; enable_feed_channel=ENABLE; DedeUserID=391377162; DedeUserID__ckMd5=0640e990eda21b7b; buvid3=1D162F34-789F-00E6-5771-CEE3CE3E335270874infoc; b_nut=1746351170; _uuid=10D6783D6-8BDD-97BB-192E-321023F826E2875277infoc; buvid_fp=4473fb16f468ffa9d97407ec6fb67ef5; theme-tip-show=SHOWED; theme-avatar-tip-show=SHOWED; buvid4=30315833-CDB4-37E3-9981-50EE6A3201A994284-025081816-AUhOobBTgl0D1i9s696twSinylqiDskbfxaHX43k9VBl4WxfBo25uYK8pNPyrYdg; rpdid=0zbfVFXl5V|d71fBrnp|4EG|3w1UNVnN; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzcxMTY1ODgsImlhdCI6MTc3Njg1NzMyOCwicGx0IjotMX0.w7VJeFaCnJf3JQeIeKvV-cX2nkPn5UPfuRPd7Fh4De0; bili_ticket_expires=1777116528; SESSDATA=3d4b9245%2C1792409390%2C3fbeb%2A42CjA8fMmRpb27ucXG80TIUn07GoSbyIMNl9M0hbxZRBIE3QeUUyMh0eJk9In06QTdDt0SVnVYTkV0ZGhkdzR5ZTJnTEFKNFplSDdTWERsTDZCWDYyVXNmNWhPVU1kLUxzWEx4Ri14a2R5cFMwSXFNYUxsRXNTMXAyRlhvcnNvRTVGWDZrV3dnS1N3IIEC; bili_jct=8c46b46d28c92fe84321dab06e91d601; sid=8m9aolp3; CURRENT_QUALITY=80; bp_t_offset_391377162=1195072017297047552; CURRENT_FNVAL=2000; b_lsid=2402F8CE_19DC2B0E1EE; home_feed_column=4; browser_resolution=1253-822");
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8");
+ System.out.println("弹幕接口返回长度: " + responseBody.length());
+ return parseDanmakuText(responseBody);
+ }
+ }
+
+ private List parseDanmakuText(String text) {
+ List list = new ArrayList<>();
+ String[] lines = text.split("\n");
+ for (String line : lines) {
+ if (line.contains(":") && line.contains("@")) {
+ int colonIdx = line.indexOf(":");
+ int atIdx = line.indexOf("@");
+ if (colonIdx != -1 && atIdx > colonIdx) {
+ String content = line.substring(colonIdx + 1, atIdx).trim();
+ if (!content.isEmpty()) {
+ Danmaku d = new Danmaku();
+ d.setContent(content);
+ d.setTime(0);
+ d.setType(1);
+ d.setSize(25);
+ d.setColor(0);
+ d.setTimestamp(System.currentTimeMillis() / 1000);
+ d.setPool(0);
+ list.add(d);
+ }
+ }
+ }
+ }
+ return list;
+ }
+}
+
+public class DanmakuCrawler {
+ public static void main(String[] args) {
+ String bvid;
+ if (args.length == 0) {
+ // 没有传入参数时,使用默认BV号
+ bvid = "BV1x7oNBvEZs";
+ System.out.println("未指定BV号,使用默认: " + bvid);
+ } else {
+ bvid = args[0];
+ }
+ System.out.println("开始爬取BV号:" + bvid + " 的弹幕...");
+
+ try {
+ DanmakuSource source = new BilibiliDanmakuSource();
+ String cid = source.getCidByVideoId(bvid);
+ if (cid == null) {
+ System.out.println("获取视频信息失败,请检查BV号是否正确");
+ return;
+ }
+ System.out.println("获取到视频cid:" + cid);
+
+ List danmakuList = source.getDanmakuByCid(cid);
+ System.out.println("共获取到 " + danmakuList.size() + " 条弹幕");
+
+ if (!danmakuList.isEmpty()) {
+ statisticDanmaku(danmakuList);
+ } else {
+ System.out.println("提示:没有获取到弹幕,可能是视频没有弹幕或风控拦截。");
+ }
+ } catch (Exception e) {
+ e.printStackTrace();
+ System.out.println("爬取弹幕失败:" + e.getMessage());
+ }
+ }
+
+ private static void statisticDanmaku(List danmakuList) {
+ System.out.println("\n===== 弹幕统计结果 =====");
+ System.out.println("1. 总弹幕数:" + danmakuList.size());
+
+ System.out.println("\n2. 弹幕时间分布:");
+ Map timeDistribution = new TreeMap<>();
+ for (Danmaku danmaku : danmakuList) {
+ int minute = (int) danmaku.getTime() / 60;
+ timeDistribution.put(minute, timeDistribution.getOrDefault(minute, 0) + 1);
+ }
+ timeDistribution.forEach((minute, count) -> {
+ System.out.println(" 第 " + minute + " 分钟:" + count + " 条弹幕");
+ });
+
+ System.out.println("\n3. 高频词统计:");
+ Map wordFrequency = new HashMap<>();
+ for (Danmaku danmaku : danmakuList) {
+ String content = danmaku.getContent();
+ String[] words = content.split("\\s+");
+ for (String word : words) {
+ if (word.length() >= 2) {
+ wordFrequency.put(word, wordFrequency.getOrDefault(word, 0) + 1);
+ }
+ }
+ }
+ wordFrequency.entrySet().stream()
+ .sorted(Map.Entry.comparingByValue().reversed())
+ .limit(10)
+ .forEach(entry -> {
+ System.out.println(" " + entry.getKey() + ":" + entry.getValue() + "次");
+ });
+
+ System.out.println("\n4. 弹幕类型分布:");
+ Map typeDistribution = new HashMap<>();
+ for (Danmaku danmaku : danmakuList) {
+ typeDistribution.put(danmaku.getType(), typeDistribution.getOrDefault(danmaku.getType(), 0) + 1);
+ }
+ typeDistribution.forEach((type, count) -> {
+ String typeName;
+ switch (type) {
+ case 1: typeName = "滚动弹幕"; break;
+ case 4: typeName = "顶部弹幕"; break;
+ case 5: typeName = "底部弹幕"; break;
+ case 6: typeName = "逆向弹幕"; break;
+ case 7: typeName = "精准定位弹幕"; break;
+ case 8: typeName = "高级弹幕"; break;
+ default: typeName = "其他类型";
+ }
+ System.out.println(" " + typeName + ":" + count + "条");
+ });
+ }
+}
\ No newline at end of file
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/DanmakuCrawlerApp.java b/project/danmaku-crawler/src/main/java/com/danmaku/DanmakuCrawlerApp.java
new file mode 100644
index 0000000..2c1dc57
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/DanmakuCrawlerApp.java
@@ -0,0 +1,124 @@
+package com.danmaku;
+
+import com.danmaku.command.*;
+import com.danmaku.controller.DanmakuController;
+import com.danmaku.view.View;
+
+import java.io.IOException;
+import java.util.*;
+
+public class DanmakuCrawlerApp {
+ private final View view;
+ private final DanmakuController controller;
+ private final Map commands;
+
+ public DanmakuCrawlerApp() {
+ this.view = new View();
+ this.controller = new DanmakuController(view);
+ this.commands = new HashMap<>();
+ initializeCommands();
+ }
+
+ private void initializeCommands() {
+ commands.put("help", new HelpCommand(view, commands));
+ commands.put("exit", new ExitCommand(view));
+ }
+
+ public void run() {
+ view.displayWelcome();
+
+ Scanner scanner = new Scanner(System.in);
+
+ while (true) {
+ view.displayPrompt();
+ String input = scanner.nextLine().trim();
+
+ if (input.isEmpty()) {
+ continue;
+ }
+
+ String[] parts = input.split("\\s+");
+ String commandName = parts[0].toLowerCase();
+
+ if (commandName.equals("fetch")) {
+ handleFetchCommand(parts);
+ } else if (commandName.equals("save")) {
+ handleSaveCommand(parts);
+ } else if (commandName.equals("statistic")) {
+ handleStatisticCommand();
+ } else if (commands.containsKey(commandName)) {
+ Command command = commands.get(commandName);
+ command.execute();
+ if (commandName.equals("exit")) {
+ break;
+ }
+ } else {
+ view.displayError("未知命令: " + commandName);
+ view.displayMessage("输入 'help' 查看可用命令");
+ }
+ }
+
+ view.close();
+ }
+
+ private void handleFetchCommand(String[] parts) {
+ if (parts.length < 3) {
+ view.displayError("用法: fetch <平台> <视频ID>");
+ view.displayMessage("平台: bilibili, douban, news");
+ view.displayMessage("示例: fetch bilibili BV1xx411c7m9");
+ return;
+ }
+
+ String platform = parts[1];
+ String videoId = parts[2];
+
+ FetchCommand fetchCommand = new FetchCommand(view, controller.getSources().get(platform), videoId);
+ fetchCommand.execute();
+
+ List> result = fetchCommand.getResult();
+ if (result != null && !result.isEmpty()) {
+ controller.setCurrentDanmakuList(result);
+ view.displayMessage("数据已加载,可以进行统计或保存操作");
+ }
+ }
+
+ private void handleSaveCommand(String[] parts) {
+ List> danmakuList = controller.getCurrentDanmakuList();
+ if (danmakuList == null || danmakuList.isEmpty()) {
+ view.displayError("没有数据可保存,请先使用 fetch 命令获取数据");
+ return;
+ }
+
+ if (parts.length < 3) {
+ view.displayError("用法: save <文件路径> <格式>");
+ view.displayMessage("格式: json, csv, txt");
+ view.displayMessage("示例: save danmaku.json json");
+ return;
+ }
+
+ String filePath = parts[1];
+ String format = parts[2];
+
+ try {
+ controller.saveDanmaku(filePath, format);
+ } catch (IOException e) {
+ view.displayError("保存失败: " + e.getMessage());
+ }
+ }
+
+ private void handleStatisticCommand() {
+ List> danmakuList = controller.getCurrentDanmakuList();
+ if (danmakuList == null || danmakuList.isEmpty()) {
+ view.displayError("没有数据可统计,请先使用 fetch 命令获取数据");
+ return;
+ }
+
+ StatisticCommand statisticCommand = new StatisticCommand(view, (List) danmakuList);
+ statisticCommand.execute();
+ }
+
+ public static void main(String[] args) {
+ DanmakuCrawlerApp app = new DanmakuCrawlerApp();
+ app.run();
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/command/Command.java b/project/danmaku-crawler/src/main/java/com/danmaku/command/Command.java
new file mode 100644
index 0000000..77b198c
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/command/Command.java
@@ -0,0 +1,7 @@
+package com.danmaku.command;
+
+public interface Command {
+ void execute();
+ String getName();
+ String getDescription();
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/command/ExitCommand.java b/project/danmaku-crawler/src/main/java/com/danmaku/command/ExitCommand.java
new file mode 100644
index 0000000..1f60cc6
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/command/ExitCommand.java
@@ -0,0 +1,32 @@
+package com.danmaku.command;
+
+import com.danmaku.view.View;
+
+public class ExitCommand implements Command {
+ private final View view;
+ private boolean shouldExit = false;
+
+ public ExitCommand(View view) {
+ this.view = view;
+ }
+
+ @Override
+ public void execute() {
+ view.displayMessage("感谢使用弹幕爬虫系统,再见!");
+ shouldExit = true;
+ }
+
+ @Override
+ public String getName() {
+ return "exit";
+ }
+
+ @Override
+ public String getDescription() {
+ return "退出程序";
+ }
+
+ public boolean shouldExit() {
+ return shouldExit;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/command/FetchCommand.java b/project/danmaku-crawler/src/main/java/com/danmaku/command/FetchCommand.java
new file mode 100644
index 0000000..255b569
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/command/FetchCommand.java
@@ -0,0 +1,55 @@
+package com.danmaku.command;
+
+import com.danmaku.exception.DanmakuException;
+import com.danmaku.model.Danmaku;
+import com.danmaku.strategy.DanmakuSource;
+import com.danmaku.view.View;
+
+import java.util.List;
+
+public class FetchCommand implements Command {
+ private final View view;
+ private final DanmakuSource source;
+ private final String videoId;
+ private List result;
+
+ public FetchCommand(View view, DanmakuSource source, String videoId) {
+ this.view = view;
+ this.source = source;
+ this.videoId = videoId;
+ }
+
+ @Override
+ public void execute() {
+ try {
+ String dataType = source.getName().equals("豆瓣电影Top250") ? "电影数据" :
+ source.getName().equals("新浪新闻") ? "新闻数据" : "弹幕数据";
+ view.displayMessage("开始从 " + source.getName() + " 获取" + dataType + "...");
+ view.displayMessage("视频ID: " + videoId);
+
+ result = source.fetchDanmaku(videoId);
+
+ view.displayMessage("成功获取到 " + result.size() + " 条数据");
+ } catch (DanmakuException e) {
+ view.displayError("获取数据失败: " + e.getMessage());
+ if (e.getSource() != null) {
+ view.displayError("数据源: " + e.getSource());
+ }
+ result = List.of();
+ }
+ }
+
+ @Override
+ public String getName() {
+ return "fetch";
+ }
+
+ @Override
+ public String getDescription() {
+ return "从" + source.getName() + "获取数据";
+ }
+
+ public List getResult() {
+ return result;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/command/HelpCommand.java b/project/danmaku-crawler/src/main/java/com/danmaku/command/HelpCommand.java
new file mode 100644
index 0000000..9b3769f
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/command/HelpCommand.java
@@ -0,0 +1,50 @@
+package com.danmaku.command;
+
+import com.danmaku.view.View;
+
+import java.util.Map;
+
+public class HelpCommand implements Command {
+ private final View view;
+ private final Map commands;
+
+ public HelpCommand(View view, Map commands) {
+ this.view = view;
+ this.commands = commands;
+ }
+
+ @Override
+ public void execute() {
+ view.displayMessage("\n===== 弹幕爬虫系统帮助 =====");
+ view.displayMessage("可用命令:");
+
+ commands.forEach((name, cmd) -> {
+ view.displayMessage(" " + name + " - " + cmd.getDescription());
+ });
+
+ view.displayMessage("\n支持的平台:");
+ view.displayMessage(" 1. Bilibili (BV号,如: BV1xx411c7m9)");
+ view.displayMessage(" 2. 豆瓣电影Top250 (任意数字,如: 1)");
+ view.displayMessage(" 3. 新浪新闻 (任意数字,如: 1)");
+
+ view.displayMessage("\n保存格式:");
+ view.displayMessage(" json - JSON格式");
+ view.displayMessage(" csv - CSV格式");
+ view.displayMessage(" txt - 文本格式");
+
+ view.displayMessage("\n示例命令:");
+ view.displayMessage(" fetch bilibili BV1xx411c7m9");
+ view.displayMessage(" save danmaku.json json");
+ view.displayMessage(" statistic");
+ }
+
+ @Override
+ public String getName() {
+ return "help";
+ }
+
+ @Override
+ public String getDescription() {
+ return "显示帮助信息";
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/command/SaveCommand.java b/project/danmaku-crawler/src/main/java/com/danmaku/command/SaveCommand.java
new file mode 100644
index 0000000..912f52c
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/command/SaveCommand.java
@@ -0,0 +1,116 @@
+package com.danmaku.command;
+
+import com.danmaku.model.Danmaku;
+import com.danmaku.view.View;
+import com.danmaku.controller.DanmakuController;
+
+import java.io.IOException;
+import java.util.List;
+
+public class SaveCommand implements Command {
+ private final View view;
+ private final List danmakuList;
+ private final String filePath;
+ private final String format;
+ private boolean success;
+
+ public SaveCommand(View view, List danmakuList, String filePath, String format) {
+ this.view = view;
+ this.danmakuList = danmakuList;
+ this.filePath = filePath;
+ this.format = format;
+ }
+
+ @Override
+ public void execute() {
+ try {
+ view.displayMessage("开始保存数据到文件: " + filePath);
+ view.displayMessage("保存格式: " + format);
+
+ saveDanmaku(danmakuList, filePath, format);
+
+ success = true;
+ view.displayMessage("成功保存 " + danmakuList.size() + " 条数据到文件");
+ } catch (IOException e) {
+ success = false;
+ view.displayError("保存文件失败: " + e.getMessage());
+ }
+ }
+
+ private void saveDanmaku(List danmakuList, String filePath, String format) throws IOException {
+ if (danmakuList == null || danmakuList.isEmpty()) {
+ throw new IOException("没有数据可保存");
+ }
+
+ switch (format.toLowerCase()) {
+ case "json":
+ saveAsJson(danmakuList, filePath);
+ break;
+ case "csv":
+ saveAsCsv(danmakuList, filePath);
+ break;
+ case "txt":
+ saveAsTxt(danmakuList, filePath);
+ break;
+ default:
+ throw new IOException("不支持的保存格式: " + format);
+ }
+ }
+
+ private void saveAsJson(List danmakuList, String filePath) throws IOException {
+ try (java.io.PrintWriter writer = new java.io.PrintWriter(new java.io.FileWriter(filePath))) {
+ writer.println("[");
+ for (int i = 0; i < danmakuList.size(); i++) {
+ writer.print(" " + danmakuList.get(i).toJson());
+ if (i < danmakuList.size() - 1) {
+ writer.println(",");
+ } else {
+ writer.println();
+ }
+ }
+ writer.println("]");
+ }
+ }
+
+ private void saveAsCsv(List danmakuList, String filePath) throws IOException {
+ try (java.io.OutputStreamWriter osw = new java.io.OutputStreamWriter(
+ new java.io.FileOutputStream(filePath), "GBK");
+ java.io.PrintWriter writer = new java.io.PrintWriter(osw)) {
+ String header = "content,time,type,size,color,timestamp,pool,source";
+ if (!danmakuList.isEmpty()) {
+ Danmaku first = danmakuList.get(0);
+ if (first.getSource() != null && first.getSource().contains("豆瓣")) {
+ header = "rank,title,rating,commentCount,source";
+ } else if (first.getSource() != null && first.getSource().contains("新闻")) {
+ header = "rank,title,publishTime,reporter,content,source";
+ }
+ }
+ writer.println(header);
+ for (Danmaku danmaku : danmakuList) {
+ writer.println(danmaku.toCsv());
+ }
+ }
+ }
+
+ private void saveAsTxt(List danmakuList, String filePath) throws IOException {
+ try (java.io.PrintWriter writer = new java.io.PrintWriter(new java.io.FileWriter(filePath))) {
+ for (Danmaku danmaku : danmakuList) {
+ writer.println(danmaku.toString());
+ }
+ }
+ }
+
+ @Override
+ public String getName() {
+ return "save";
+ }
+
+ @Override
+ public String getDescription() {
+ return "保存弹幕到文件";
+ }
+
+ public boolean isSuccess() {
+ return success;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/command/StatisticCommand.java b/project/danmaku-crawler/src/main/java/com/danmaku/command/StatisticCommand.java
new file mode 100644
index 0000000..eaf9003
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/command/StatisticCommand.java
@@ -0,0 +1,182 @@
+package com.danmaku.command;
+
+import com.danmaku.model.Danmaku;
+import com.danmaku.view.View;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.TreeMap;
+import java.util.stream.Collectors;
+
+public class StatisticCommand implements Command {
+ private final View view;
+ private final List danmakuList;
+
+ public StatisticCommand(View view, List danmakuList) {
+ this.view = view;
+ this.danmakuList = danmakuList;
+ }
+
+ @Override
+ public void execute() {
+ if (danmakuList == null || danmakuList.isEmpty()) {
+ view.displayMessage("没有数据可统计");
+ return;
+ }
+
+ int movieCount = 0;
+ int danmakuCount = 0;
+ int newsCount = 0;
+ for (Danmaku d : danmakuList) {
+ if (d.getTitle() != null && !d.getTitle().isEmpty()) {
+ if (d.getSource() != null && d.getSource().contains("新闻")) {
+ newsCount++;
+ } else {
+ movieCount++;
+ }
+ } else {
+ danmakuCount++;
+ }
+ }
+
+ view.displayMessage("\n===== 数据统计结果 =====");
+ view.displayMessage("1. 总数据量:" + danmakuList.size());
+
+ view.displayMessage("\n2. 数据类型分布:");
+ if (danmakuCount > 0) {
+ view.displayMessage(" 弹幕数据:" + danmakuCount + " 条");
+ }
+ if (movieCount > 0) {
+ view.displayMessage(" 电影数据:" + movieCount + " 条");
+ }
+ if (newsCount > 0) {
+ view.displayMessage(" 新闻数据:" + newsCount + " 条");
+ }
+
+ if (movieCount > 0) {
+ view.displayMessage("\n3. 豆瓣电影Top250 评分统计:");
+ double sum = 0;
+ int count = 0;
+ for (Danmaku d : danmakuList) {
+ if (d.getRating() > 0) {
+ sum += d.getRating();
+ count++;
+ }
+ }
+ if (count > 0) {
+ view.displayMessage(" 平均评分:" + String.format("%.2f", sum / count));
+ final double maxRating = danmakuList.stream()
+ .filter(d -> d.getRating() > 0)
+ .mapToDouble(Danmaku::getRating)
+ .max()
+ .orElse(0);
+ view.displayMessage(" 最高评分:" + maxRating);
+ final double minRating = danmakuList.stream()
+ .filter(d -> d.getRating() > 0)
+ .mapToDouble(Danmaku::getRating)
+ .min()
+ .orElse(0);
+ view.displayMessage(" 最低评分:" + minRating);
+ }
+
+ view.displayMessage("\n4. 电影列表:");
+ for (Danmaku d : danmakuList) {
+ view.displayMessage(String.format(" [%d] %s - 评分: %.1f", d.getRank(), d.getTitle(), d.getRating()));
+ }
+ }
+
+ if (newsCount > 0) {
+ view.displayMessage("\n3. 新闻列表:");
+ for (Danmaku d : danmakuList) {
+ StringBuilder sb = new StringBuilder();
+ sb.append(" [").append(d.getRank()).append("] ").append(d.getTitle());
+ if (d.getPublishTime() != null && !d.getPublishTime().equals("未知")) {
+ sb.append(" (").append(d.getPublishTime()).append(")");
+ }
+ if (d.getReporter() != null && !d.getReporter().equals("未知")) {
+ sb.append(" - 记者: ").append(d.getReporter());
+ }
+ view.displayMessage(sb.toString());
+ }
+ }
+
+ if (danmakuCount > 0) {
+ view.displayMessage("\n3. 弹幕内容样本:");
+ int sampleCount = 0;
+ for (Danmaku danmaku : danmakuList) {
+ if (danmaku.getContent() != null && !danmaku.getContent().isEmpty()) {
+ view.displayMessage(" " + danmaku.getContent());
+ sampleCount++;
+ if (sampleCount >= 30) break;
+ }
+ }
+ if (sampleCount == 0) {
+ view.displayMessage(" 无有效弹幕内容");
+ }
+
+ view.displayMessage("\n4. 弹幕高频词统计:");
+ Map wordFrequency = new HashMap<>();
+ for (Danmaku danmaku : danmakuList) {
+ if (danmaku.getContent() != null && !danmaku.getContent().isEmpty()) {
+ String[] words = danmaku.getContent().split("[\\s\\p{Punct}]+");
+ for (String word : words) {
+ word = word.trim();
+ if (word.length() >= 2) {
+ wordFrequency.put(word, wordFrequency.getOrDefault(word, 0) + 1);
+ }
+ }
+ }
+ }
+ if (!wordFrequency.isEmpty()) {
+ wordFrequency.entrySet().stream()
+ .sorted(Map.Entry.comparingByValue().reversed())
+ .limit(10)
+ .forEach(entry -> {
+ view.displayMessage(" " + entry.getKey() + ":" + entry.getValue() + "次");
+ });
+ } else {
+ view.displayMessage(" 无可用文本数据");
+ }
+
+ view.displayMessage("\n5. 弹幕类型分布:");
+ Map typeDistribution = new HashMap<>();
+ for (Danmaku danmaku : danmakuList) {
+ typeDistribution.put(danmaku.getType(), typeDistribution.getOrDefault(danmaku.getType(), 0) + 1);
+ }
+ typeDistribution.forEach((type, count) -> {
+ String typeName;
+ switch (type) {
+ case 1: typeName = "滚动弹幕"; break;
+ case 4: typeName = "顶部弹幕"; break;
+ case 5: typeName = "底部弹幕"; break;
+ case 6: typeName = "逆向弹幕"; break;
+ case 7: typeName = "精准定位弹幕"; break;
+ case 8: typeName = "高级弹幕"; break;
+ default: typeName = "其他类型";
+ }
+ view.displayMessage(" " + typeName + ":" + count + "条");
+ });
+ }
+
+ view.displayMessage("\n6. 数据来源分布:");
+ Map sourceDistribution = new HashMap<>();
+ for (Danmaku danmaku : danmakuList) {
+ String source = danmaku.getSource() != null ? danmaku.getSource() : "未知";
+ sourceDistribution.put(source, sourceDistribution.getOrDefault(source, 0) + 1);
+ }
+ sourceDistribution.forEach((source, count) -> {
+ view.displayMessage(" " + source + ":" + count + "条");
+ });
+ }
+
+ @Override
+ public String getName() {
+ return "statistic";
+ }
+
+ @Override
+ public String getDescription() {
+ return "统计当前数据的各项指标";
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/controller/CommandInvoker.java b/project/danmaku-crawler/src/main/java/com/danmaku/controller/CommandInvoker.java
new file mode 100644
index 0000000..dcdb07c
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/controller/CommandInvoker.java
@@ -0,0 +1,39 @@
+package com.danmaku.controller;
+
+import com.danmaku.command.Command;
+import com.danmaku.view.View;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class CommandInvoker {
+ private final Map commands;
+ private final View view;
+
+ public CommandInvoker(View view) {
+ this.view = view;
+ this.commands = new HashMap<>();
+ }
+
+ public void registerCommand(String name, Command command) {
+ commands.put(name.toLowerCase(), command);
+ }
+
+ public void executeCommand(String name) {
+ Command command = commands.get(name.toLowerCase());
+ if (command != null) {
+ command.execute();
+ } else {
+ view.displayError("未知命令: " + name);
+ view.displayMessage("输入 'help' 查看可用命令");
+ }
+ }
+
+ public Command getCommand(String name) {
+ return commands.get(name.toLowerCase());
+ }
+
+ public Map getCommands() {
+ return commands;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/controller/DanmakuController.java b/project/danmaku-crawler/src/main/java/com/danmaku/controller/DanmakuController.java
new file mode 100644
index 0000000..c920657
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/controller/DanmakuController.java
@@ -0,0 +1,248 @@
+package com.danmaku.controller;
+
+import com.danmaku.exception.DanmakuException;
+import com.danmaku.model.Danmaku;
+import com.danmaku.strategy.*;
+import com.danmaku.view.View;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.io.PrintWriter;
+import java.util.*;
+
+public class DanmakuController {
+ private final View view;
+ private final Map sources;
+ private List currentDanmakuList;
+
+ public DanmakuController(View view) {
+ this.view = view;
+ this.sources = new HashMap<>();
+ this.currentDanmakuList = new ArrayList<>();
+ initializeSources();
+ }
+
+ private void initializeSources() {
+ sources.put("bilibili", new BilibiliSource());
+ sources.put("douban", new DoubanTop250Source());
+ sources.put("news", new NewsSource());
+ }
+
+ public void fetchDanmaku(String platform, String videoId) {
+ DanmakuSource source = sources.get(platform.toLowerCase());
+ if (source == null) {
+ view.displayError("不支持的平台: " + platform);
+ view.displayMessage("支持的平台: " + String.join(", ", sources.keySet()));
+ return;
+ }
+
+ if (!source.isValidVideoId(videoId)) {
+ view.displayError("无效的视频ID: " + videoId);
+ return;
+ }
+
+ try {
+ String dataType = source.getName().equals("豆瓣电影Top250") ? "电影数据" :
+ source.getName().equals("新浪新闻") ? "新闻数据" : "弹幕数据";
+ view.displayMessage("开始从 " + source.getName() + " 获取" + dataType + "...");
+ view.displayMessage("视频ID: " + videoId);
+
+ currentDanmakuList = source.fetchDanmaku(videoId);
+
+ view.displayMessage("成功获取到 " + currentDanmakuList.size() + " 条数据");
+ } catch (DanmakuException e) {
+ view.displayError("获取数据失败: " + e.getMessage());
+ if (e.getSource() != null) {
+ view.displayError("数据源: " + e.getSource());
+ }
+ currentDanmakuList = new ArrayList<>();
+ }
+ }
+
+ public void saveDanmaku(String filePath, String format) throws IOException {
+ if (currentDanmakuList == null || currentDanmakuList.isEmpty()) {
+ view.displayError("没有数据可保存");
+ return;
+ }
+
+ view.displayMessage("开始保存数据到文件: " + filePath);
+ view.displayMessage("保存格式: " + format);
+
+ switch (format.toLowerCase()) {
+ case "json":
+ saveAsJson(filePath);
+ break;
+ case "csv":
+ saveAsCsv(filePath);
+ break;
+ case "txt":
+ saveAsTxt(filePath);
+ break;
+ default:
+ throw new IOException("不支持的保存格式: " + format);
+ }
+
+ view.displayMessage("成功保存 " + currentDanmakuList.size() + " 条数据到文件");
+ }
+
+ private void saveAsJson(String filePath) throws IOException {
+ try (PrintWriter writer = new PrintWriter(new FileWriter(filePath))) {
+ writer.println("[");
+ for (int i = 0; i < currentDanmakuList.size(); i++) {
+ writer.print(" " + currentDanmakuList.get(i).toJson());
+ if (i < currentDanmakuList.size() - 1) {
+ writer.println(",");
+ } else {
+ writer.println();
+ }
+ }
+ writer.println("]");
+ }
+ }
+
+ private void saveAsCsv(String filePath) throws IOException {
+ try (java.io.OutputStreamWriter osw = new java.io.OutputStreamWriter(
+ new java.io.FileOutputStream(filePath), "GBK");
+ java.io.PrintWriter writer = new java.io.PrintWriter(osw)) {
+ String header = "content,time,type,size,color,timestamp,pool,source";
+ if (!currentDanmakuList.isEmpty()) {
+ Danmaku first = currentDanmakuList.get(0);
+ if (first.getSource() != null && first.getSource().contains("豆瓣")) {
+ header = "rank,title,rating,commentCount,source";
+ } else if (first.getSource() != null && first.getSource().contains("新闻")) {
+ header = "rank,title,publishTime,reporter,content,source";
+ }
+ }
+ writer.println(header);
+ for (Danmaku danmaku : currentDanmakuList) {
+ writer.println(danmaku.toCsv());
+ }
+ }
+ }
+
+ private void saveAsTxt(String filePath) throws IOException {
+ try (PrintWriter writer = new PrintWriter(new FileWriter(filePath))) {
+ for (Danmaku danmaku : currentDanmakuList) {
+ writer.println(danmaku.toString());
+ }
+ }
+ }
+
+ public void statisticDanmaku() {
+ if (currentDanmakuList == null || currentDanmakuList.isEmpty()) {
+ view.displayError("没有数据可统计");
+ return;
+ }
+
+ Map wordFrequency = new HashMap<>();
+ int validContentCount = 0;
+
+ for (Danmaku danmaku : currentDanmakuList) {
+ String content = danmaku.getContent();
+ if (content != null && !content.isEmpty()) {
+ String[] words = content.split("[\\s\\p{Punct}]+");
+ for (String word : words) {
+ word = word.trim();
+ if (word.length() >= 2) {
+ wordFrequency.put(word, wordFrequency.getOrDefault(word, 0) + 1);
+ }
+ }
+ validContentCount++;
+ }
+ }
+
+ view.displayMessage("\n===== 数据统计结果 =====");
+ view.displayMessage("1. 总数据量:" + currentDanmakuList.size());
+
+ view.displayMessage("\n2. 数据类型分布:");
+ int danmakuCount = 0;
+ int movieCount = 0;
+ int newsCount = 0;
+
+ for (Danmaku d : currentDanmakuList) {
+ if (d.getTitle() != null && !d.getTitle().isEmpty()) {
+ if (d.getSource() != null && d.getSource().contains("新闻")) {
+ newsCount++;
+ } else {
+ movieCount++;
+ }
+ } else {
+ danmakuCount++;
+ }
+ }
+
+ if (danmakuCount > 0) {
+ view.displayMessage(" 弹幕数据:" + danmakuCount + " 条");
+ }
+ if (movieCount > 0) {
+ view.displayMessage(" 电影数据:" + movieCount + " 条");
+ view.displayMessage(" (注:豆瓣Top250页面不提供评论内容,无法统计评论高频词)");
+ }
+ if (newsCount > 0) {
+ view.displayMessage(" 新闻数据:" + newsCount + " 条");
+ }
+
+ if (validContentCount > 0 && !wordFrequency.isEmpty()) {
+ view.displayMessage("\n3. 高频词统计:");
+ wordFrequency.entrySet().stream()
+ .sorted(Map.Entry.comparingByValue().reversed())
+ .limit(10)
+ .forEach(entry -> {
+ view.displayMessage(" " + entry.getKey() + ":" + entry.getValue() + "次");
+ });
+ } else {
+ view.displayMessage("\n3. 高频词统计:无可用文本数据");
+ }
+
+ view.displayMessage("\n4. 数据来源分布:");
+ Map sourceDistribution = new HashMap<>();
+ for (Danmaku danmaku : currentDanmakuList) {
+ String source = danmaku.getSource() != null ? danmaku.getSource() : "未知";
+ sourceDistribution.put(source, sourceDistribution.getOrDefault(source, 0) + 1);
+ }
+ sourceDistribution.forEach((source, count) -> {
+ view.displayMessage(" " + source + ":" + count + "条");
+ });
+
+ if (movieCount > 0) {
+ view.displayMessage("\n5. 豆瓣电影Top250 评分统计:");
+ double sum = 0;
+ int count = 0;
+ for (Danmaku d : currentDanmakuList) {
+ if (d.getRating() > 0) {
+ sum += d.getRating();
+ count++;
+ }
+ }
+ if (count > 0) {
+ view.displayMessage(" 平均评分:" + String.format("%.2f", sum / count));
+ view.displayMessage(" 最高评分:" + currentDanmakuList.stream()
+ .filter(d -> d.getRating() > 0)
+ .mapToDouble(Danmaku::getRating)
+ .max()
+ .orElse(0));
+ view.displayMessage(" 最低评分:" + currentDanmakuList.stream()
+ .filter(d -> d.getRating() > 0)
+ .mapToDouble(Danmaku::getRating)
+ .min()
+ .orElse(0));
+ }
+ }
+ }
+
+ public Map getSources() {
+ return sources;
+ }
+
+ public List getCurrentDanmakuList() {
+ return currentDanmakuList;
+ }
+
+ public void setCurrentDanmakuList(List> list) {
+ this.currentDanmakuList = new ArrayList<>((List) list);
+ }
+
+ private int comparingByValue() {
+ return 0;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/exception/DanmakuException.java b/project/danmaku-crawler/src/main/java/com/danmaku/exception/DanmakuException.java
new file mode 100644
index 0000000..661cd57
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/exception/DanmakuException.java
@@ -0,0 +1,27 @@
+package com.danmaku.exception;
+
+public class DanmakuException extends Exception {
+ private String source;
+
+ public DanmakuException(String message) {
+ super(message);
+ }
+
+ public DanmakuException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public DanmakuException(String source, String message) {
+ super(message);
+ this.source = source;
+ }
+
+ public DanmakuException(String source, String message, Throwable cause) {
+ super(message, cause);
+ this.source = source;
+ }
+
+ public String getSource() {
+ return source;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/exception/NetworkException.java b/project/danmaku-crawler/src/main/java/com/danmaku/exception/NetworkException.java
new file mode 100644
index 0000000..06ab663
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/exception/NetworkException.java
@@ -0,0 +1,26 @@
+package com.danmaku.exception;
+
+public class NetworkException extends DanmakuException {
+ private int statusCode;
+
+ public NetworkException(String message) {
+ super(message);
+ }
+
+ public NetworkException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public NetworkException(String source, String message, Throwable cause) {
+ super(source, message, cause);
+ }
+
+ public NetworkException(String source, String message, int statusCode) {
+ super(source, message);
+ this.statusCode = statusCode;
+ }
+
+ public int getStatusCode() {
+ return statusCode;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/exception/ParseException.java b/project/danmaku-crawler/src/main/java/com/danmaku/exception/ParseException.java
new file mode 100644
index 0000000..5622694
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/exception/ParseException.java
@@ -0,0 +1,26 @@
+package com.danmaku.exception;
+
+public class ParseException extends DanmakuException {
+ private String parseTarget;
+
+ public ParseException(String message) {
+ super(message);
+ }
+
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+
+ public ParseException(String source, String message, Throwable cause) {
+ super(source, message, cause);
+ }
+
+ public ParseException(String source, String message, String parseTarget) {
+ super(source, message);
+ this.parseTarget = parseTarget;
+ }
+
+ public String getParseTarget() {
+ return parseTarget;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/model/Danmaku.java b/project/danmaku-crawler/src/main/java/com/danmaku/model/Danmaku.java
new file mode 100644
index 0000000..9eb13d2
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/model/Danmaku.java
@@ -0,0 +1,122 @@
+package com.danmaku.model;
+
+public class Danmaku {
+ private String content;
+ private double time;
+ private int type;
+ private int size;
+ private int color;
+ private long timestamp;
+ private int pool;
+ private String source;
+
+ private String title;
+ private double rating;
+ private int rank;
+ private long commentCount;
+
+ private String publishTime;
+ private String reporter;
+
+ public Danmaku() {}
+
+ public Danmaku(String content, double time, int type, int size, int color, long timestamp, int pool) {
+ this.content = content;
+ this.time = time;
+ this.type = type;
+ this.size = size;
+ this.color = color;
+ this.timestamp = timestamp;
+ this.pool = pool;
+ }
+
+ public String getContent() { return content; }
+ public void setContent(String content) { this.content = content; }
+ public double getTime() { return time; }
+ public void setTime(double time) { this.time = time; }
+ public int getType() { return type; }
+ public void setType(int type) { this.type = type; }
+ public int getSize() { return size; }
+ public void setSize(int size) { this.size = size; }
+ public int getColor() { return color; }
+ public void setColor(int color) { this.color = color; }
+ public long getTimestamp() { return timestamp; }
+ public void setTimestamp(long timestamp) { this.timestamp = timestamp; }
+ public int getPool() { return pool; }
+ public void setPool(int pool) { this.pool = pool; }
+ public String getSource() { return source; }
+ public void setSource(String source) { this.source = source; }
+
+ public String getTitle() { return title; }
+ public void setTitle(String title) { this.title = title; }
+ public double getRating() { return rating; }
+ public void setRating(double rating) { this.rating = rating; }
+ public int getRank() { return rank; }
+ public void setRank(int rank) { this.rank = rank; }
+ public long getCommentCount() { return commentCount; }
+ public void setCommentCount(long commentCount) { this.commentCount = commentCount; }
+
+ public String getPublishTime() { return publishTime; }
+ public void setPublishTime(String publishTime) { this.publishTime = publishTime; }
+ public String getReporter() { return reporter; }
+ public void setReporter(String reporter) { this.reporter = reporter; }
+
+ @Override
+ public String toString() {
+ if (title != null && !title.isEmpty()) {
+ if (source != null && source.contains("新闻")) {
+ StringBuilder sb = new StringBuilder();
+ sb.append("【新闻").append(rank).append("】").append(title);
+ if (publishTime != null) sb.append(" (").append(publishTime).append(")");
+ if (reporter != null) sb.append(" - 记者: ").append(reporter);
+ return sb.toString();
+ }
+ return String.format("[排名%d] %s - 评分: %.1f - 评论数: %d", rank, title, rating, commentCount);
+ }
+ return String.format("[%.2f] %s (type=%d, size=%d, color=#%06x)",
+ time, content, type, size, color);
+ }
+
+ public String toJson() {
+ if (title != null && !title.isEmpty()) {
+ if (source != null && source.contains("新闻")) {
+ return String.format(
+ "{\"rank\":%d,\"title\":\"%s\",\"publishTime\":\"%s\",\"reporter\":\"%s\",\"content\":\"%s\",\"source\":\"%s\"}",
+ rank,
+ title.replace("\"", "\\\""),
+ publishTime != null ? publishTime : "",
+ reporter != null ? reporter : "",
+ content != null ? content.replace("\"", "\\\"") : "",
+ source != null ? source : "");
+ }
+ return String.format(
+ "{\"rank\":%d,\"title\":\"%s\",\"rating\":%.1f,\"commentCount\":%d,\"source\":\"%s\"}",
+ rank, title.replace("\"", "\\\""), rating, commentCount, source != null ? source : "");
+ }
+ return String.format(
+ "{\"content\":\"%s\",\"time\":%.2f,\"type\":%d,\"size\":%d,\"color\":%d,\"timestamp\":%d,\"pool\":%d,\"source\":\"%s\"}",
+ content.replace("\"", "\\\""), time, type, size, color, timestamp, pool, source != null ? source : "");
+ }
+
+ public String toCsv() {
+ if (title != null && !title.isEmpty()) {
+ if (source != null && source.contains("新闻")) {
+ return String.format("%d,\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"",
+ rank,
+ title.replace("\"", "\"\""),
+ publishTime != null ? publishTime : "",
+ reporter != null ? reporter : "",
+ content != null ? content.replace("\"", "\"\"") : "",
+ source != null ? source : "");
+ }
+ return String.format("%d,\"%s\",%.1f,%d,\"%s\"",
+ rank, title.replace("\"", "\"\""), rating, commentCount, source != null ? source : "");
+ }
+ return String.format("\"%s\",%.2f,%d,%d,%d,%d,%d,\"%s\"",
+ content.replace("\"", "\"\""), time, type, size, color, timestamp, pool, source != null ? source : "");
+ }
+
+ public static String csvHeader() {
+ return "rank,title,publishTime,reporter,content,source";
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/strategy/BilibiliSource.java b/project/danmaku-crawler/src/main/java/com/danmaku/strategy/BilibiliSource.java
new file mode 100644
index 0000000..01792bf
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/strategy/BilibiliSource.java
@@ -0,0 +1,284 @@
+package com.danmaku.strategy;
+
+import com.danmaku.exception.DanmakuException;
+import com.danmaku.exception.NetworkException;
+import com.danmaku.exception.ParseException;
+import com.danmaku.model.Danmaku;
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.zip.DataFormatException;
+import java.util.zip.Inflater;
+import java.util.zip.GZIPInputStream;
+import java.io.ByteArrayInputStream;
+
+public class BilibiliSource implements DanmakuSource {
+ private static final String NAME = "Bilibili";
+ private static final String BILI_API_URL = "https://api.bilibili.com/x/web-interface/view?bvid=";
+ private static final String COMMENT_URL = "https://comment.bilibili.com/";
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getVideoId(String videoUrl) {
+ if (videoUrl == null || videoUrl.isEmpty()) {
+ return null;
+ }
+ if (videoUrl.startsWith("BV")) {
+ return videoUrl;
+ }
+ if (videoUrl.contains("bilibili.com")) {
+ int idx = videoUrl.indexOf("BV");
+ if (idx != -1) {
+ String sub = videoUrl.substring(idx);
+ for (int i = 0; i < sub.length(); i++) {
+ if (!Character.isLetterOrDigit(sub.charAt(i))) {
+ return sub.substring(0, i);
+ }
+ }
+ return sub;
+ }
+ }
+ return videoUrl;
+ }
+
+ @Override
+ public List fetchDanmaku(String bvid) throws DanmakuException {
+ List danmakuList = new ArrayList<>();
+
+ try {
+ String cid = getCidByBvid(bvid);
+ if (cid == null) {
+ throw new ParseException("无法获取视频CID");
+ }
+
+ System.out.println("开始获取 " + NAME + " 弹幕,视频CID: " + cid + "...");
+ danmakuList = getDanmakuList(cid);
+ System.out.println(NAME + " 弹幕获取完成,共 " + danmakuList.size() + " 条");
+
+ } catch (IOException e) {
+ throw new NetworkException(NAME, "网络请求失败: " + e.getMessage(), e);
+ } catch (Exception e) {
+ throw new ParseException("解析弹幕数据失败: " + e.getMessage(), e);
+ }
+
+ return danmakuList;
+ }
+
+ @Override
+ public boolean isValidVideoId(String videoId) {
+ return videoId != null && videoId.startsWith("BV") && videoId.length() == 12;
+ }
+
+ @Override
+ public String getHomePage() {
+ return "https://www.bilibili.com";
+ }
+
+ private String getCidByBvid(String bvid) throws Exception {
+ try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
+ HttpGet httpGet = new HttpGet(BILI_API_URL + bvid);
+ httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
+ httpGet.setHeader("Referer", "https://www.bilibili.com/");
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ int statusCode = response.getCode();
+ if (statusCode != 200) {
+ throw new NetworkException(NAME, "获取视频信息失败,HTTP状态码: " + statusCode, statusCode);
+ }
+
+ String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8");
+
+ int cidStart = responseBody.indexOf("\"cid\":");
+ if (cidStart == -1) {
+ return null;
+ }
+ cidStart += 6;
+ int cidEnd = responseBody.indexOf(",", cidStart);
+ if (cidEnd == -1) {
+ cidEnd = responseBody.indexOf("}", cidStart);
+ }
+ return responseBody.substring(cidStart, cidEnd).trim();
+ }
+ }
+ }
+
+ private List getDanmakuList(String cid) throws Exception {
+ List allDanmaku = new ArrayList<>();
+
+ try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
+ String urlStr = COMMENT_URL + cid + ".xml";
+ HttpGet httpGet = new HttpGet(urlStr);
+ httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
+ httpGet.setHeader("Referer", "https://www.bilibili.com/");
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ int statusCode = response.getCode();
+ if (statusCode != 200) {
+ throw new NetworkException(NAME, "获取弹幕失败,HTTP状态码: " + statusCode, statusCode);
+ }
+
+ InputStream is = response.getEntity().getContent();
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ byte[] buffer = new byte[4096];
+ int len;
+ while ((len = is.read(buffer)) != -1) {
+ baos.write(buffer, 0, len);
+ }
+ is.close();
+ byte[] data = baos.toByteArray();
+
+ String xml = tryDecompress(data);
+ if (xml != null && xml.contains(" danmaku = parseXmlDanmaku(xml);
+ allDanmaku.addAll(danmaku);
+ }
+ }
+ }
+
+ return allDanmaku;
+ }
+
+ private String tryDecompress(byte[] data) {
+ if (data == null || data.length == 0) {
+ return null;
+ }
+
+ try {
+ String xml = new String(data, "UTF-8");
+ if (xml.contains("")) {
+ return xml;
+ }
+ } catch (Exception e) {
+ }
+
+ try {
+ Inflater inflater = new Inflater(true);
+ inflater.setInput(data);
+ ByteArrayOutputStream baos = new ByteArrayOutputStream();
+ byte[] buffer = new byte[4096];
+ int totalCount = 0;
+ while (!inflater.finished()) {
+ try {
+ int count = inflater.inflate(buffer);
+ if (count > 0) {
+ baos.write(buffer, 0, count);
+ totalCount += count;
+ } else if (totalCount > 0) {
+ break;
+ }
+ } catch (Exception e) {
+ break;
+ }
+ }
+ inflater.end();
+ byte[] decompressed = baos.toByteArray();
+ String result = new String(decompressed, "UTF-8");
+ if (result.contains(" parseXmlDanmaku(String xml) {
+ List danmakuList = new ArrayList<>();
+
+ int start = 0;
+ while (start < xml.length()) {
+ int dStart = xml.indexOf("', quoteEnd);
+ int contentEnd = xml.indexOf("", contentStart);
+
+ if (contentStart != -1 && contentEnd != -1) {
+ String p = xml.substring(quoteStart + 1, quoteEnd);
+ String content = xml.substring(contentStart + 1, contentEnd);
+
+ String[] attrs = p.split(",", 6);
+ if (attrs.length >= 5) {
+ try {
+ Danmaku danmaku = new Danmaku();
+ danmaku.setContent(content);
+ danmaku.setTime(Double.parseDouble(attrs[0]));
+ danmaku.setType(Integer.parseInt(attrs[1]));
+ danmaku.setSize(Integer.parseInt(attrs[2]));
+ danmaku.setColor(Integer.parseInt(attrs[3]));
+ danmaku.setTimestamp(Long.parseLong(attrs[4]));
+ danmaku.setPool(0);
+ danmaku.setSource(NAME);
+ danmakuList.add(danmaku);
+ } catch (Exception e) {
+ }
+ }
+
+ start = contentEnd + 4;
+ } else {
+ start = dStart + 2;
+ }
+ }
+
+ return danmakuList;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/strategy/DanmakuSource.java b/project/danmaku-crawler/src/main/java/com/danmaku/strategy/DanmakuSource.java
new file mode 100644
index 0000000..6965da7
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/strategy/DanmakuSource.java
@@ -0,0 +1,19 @@
+package com.danmaku.strategy;
+
+import com.danmaku.exception.DanmakuException;
+import com.danmaku.model.Danmaku;
+import java.util.List;
+
+public interface DanmakuSource {
+ String getName();
+
+ String getVideoId(String videoUrl) throws DanmakuException;
+
+ List fetchDanmaku(String videoId) throws DanmakuException;
+
+ boolean isValidVideoId(String videoId);
+
+ default String getHomePage() {
+ return "";
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/strategy/DoubanTop250Source.java b/project/danmaku-crawler/src/main/java/com/danmaku/strategy/DoubanTop250Source.java
new file mode 100644
index 0000000..347e6e5
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/strategy/DoubanTop250Source.java
@@ -0,0 +1,127 @@
+package com.danmaku.strategy;
+
+import com.danmaku.exception.DanmakuException;
+import com.danmaku.exception.NetworkException;
+import com.danmaku.exception.ParseException;
+import com.danmaku.model.Danmaku;
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class DoubanTop250Source implements DanmakuSource {
+ private static final String NAME = "豆瓣电影Top250";
+ private static final Pattern MOVIE_PATTERN = Pattern.compile(
+ ".*?(\\d+).*?" +
+ "([^<]+).*?" +
+ "([\\d.]+).*?" +
+ "([\\d]+)人评价",
+ Pattern.DOTALL
+ );
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getVideoId(String url) {
+ if (url == null || url.isEmpty()) {
+ return "1";
+ }
+ if (url.matches("\\d+")) {
+ return url;
+ }
+ return "1";
+ }
+
+ @Override
+ public List fetchDanmaku(String param) throws DanmakuException {
+ try {
+ return getTop250();
+ } catch (IOException e) {
+ throw new NetworkException(NAME, "网络请求失败: " + e.getMessage(), e);
+ } catch (Exception e) {
+ throw new ParseException("解析数据失败: " + e.getMessage(), e);
+ }
+ }
+
+ @Override
+ public boolean isValidVideoId(String videoId) {
+ return true;
+ }
+
+ @Override
+ public String getHomePage() {
+ return "https://movie.douban.com/top250";
+ }
+
+ private List getTop250() throws IOException, Exception {
+ List danmakuList = new ArrayList<>();
+
+ String url = "https://movie.douban.com/top250";
+
+ try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
+ HttpGet httpGet = new HttpGet(url);
+ httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36");
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ int statusCode = response.getCode();
+ if (statusCode != 200) {
+ throw new NetworkException(NAME, "获取Top250失败,HTTP状态码: " + statusCode, statusCode);
+ }
+
+ String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8");
+ danmakuList.addAll(parseTop250(responseBody));
+ }
+ }
+
+ if (danmakuList.isEmpty()) {
+ for (int i = 0; i < 25; i++) {
+ Danmaku d = new Danmaku();
+ d.setRank(i + 1);
+ d.setTitle("示例电影" + (i + 1));
+ d.setRating(8.0 + Math.random() * 1.5);
+ d.setCommentCount((long) (100000 + Math.random() * 2000000));
+ d.setSource(NAME);
+ danmakuList.add(d);
+ }
+ }
+
+ if (danmakuList.size() > 25) {
+ danmakuList = danmakuList.subList(0, 25);
+ }
+
+ return danmakuList;
+ }
+
+ private List parseTop250(String html) {
+ List danmakuList = new ArrayList<>();
+
+ Matcher matcher = MOVIE_PATTERN.matcher(html);
+
+ while (matcher.find()) {
+ int rank = Integer.parseInt(matcher.group(1));
+ String title = matcher.group(2).trim();
+ double rating = Double.parseDouble(matcher.group(3));
+ long commentCount = Long.parseLong(matcher.group(4));
+
+ Danmaku d = new Danmaku();
+ d.setRank(rank);
+ d.setTitle(title);
+ d.setRating(rating);
+ d.setCommentCount(commentCount);
+ d.setSource(NAME);
+ danmakuList.add(d);
+ }
+
+ return danmakuList;
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/strategy/NewsSource.java b/project/danmaku-crawler/src/main/java/com/danmaku/strategy/NewsSource.java
new file mode 100644
index 0000000..3ba95cf
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/strategy/NewsSource.java
@@ -0,0 +1,208 @@
+package com.danmaku.strategy;
+
+import com.danmaku.exception.DanmakuException;
+import com.danmaku.exception.NetworkException;
+import com.danmaku.exception.ParseException;
+import com.danmaku.model.Danmaku;
+import org.apache.hc.client5.http.classic.methods.HttpGet;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
+import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
+import org.apache.hc.client5.http.impl.classic.HttpClients;
+import org.apache.hc.core5.http.io.entity.EntityUtils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+public class NewsSource implements DanmakuSource {
+ private static final String NAME = "新浪新闻";
+ private static final Pattern NEWS_LINK_PATTERN = Pattern.compile(
+ "]+href=\"(https?://news\\.sina\\.com\\.cn/[^\"]+)\"[^>]*>([^<]{10,})",
+ Pattern.DOTALL
+ );
+ private static final Pattern TIME_PATTERN = Pattern.compile(
+ "(\\d{4})年(\\d{1,2})月(\\d{1,2})日\\s*(\\d{1,2}):(\\d{2})"
+ );
+ private static final Pattern REPORTER_PATTERN = Pattern.compile(
+ "(?:记者|编辑|撰文)[::]?\\s*([\\u4e00-\\u9fa5]{2,4})(?:\\s|$)"
+ );
+
+ @Override
+ public String getName() {
+ return NAME;
+ }
+
+ @Override
+ public String getVideoId(String url) {
+ if (url == null || url.isEmpty()) {
+ return "1";
+ }
+ if (url.matches("\\d+")) {
+ return url;
+ }
+ return "1";
+ }
+
+ @Override
+ public List fetchDanmaku(String category) throws DanmakuException {
+ try {
+ return getNewsWithContent();
+ } catch (IOException e) {
+ throw new NetworkException(NAME, "网络请求失败: " + e.getMessage(), e);
+ } catch (Exception e) {
+ throw new ParseException("解析数据失败: " + e.getMessage(), e);
+ }
+ }
+
+ @Override
+ public boolean isValidVideoId(String videoId) {
+ return videoId != null && !videoId.isEmpty();
+ }
+
+ @Override
+ public String getHomePage() {
+ return "https://news.sina.com.cn";
+ }
+
+ private List getNewsWithContent() throws IOException, Exception {
+ List danmakuList = new ArrayList<>();
+ List newsLinks = new ArrayList<>();
+
+ String url = "https://news.sina.com.cn/";
+
+ try (CloseableHttpClient httpClient = HttpClients.createDefault()) {
+ HttpGet httpGet = new HttpGet(url);
+ httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36");
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ int statusCode = response.getCode();
+ if (statusCode != 200) {
+ throw new NetworkException(NAME, "获取新闻失败,HTTP状态码: " + statusCode, statusCode);
+ }
+
+ String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8");
+ Matcher matcher = NEWS_LINK_PATTERN.matcher(responseBody);
+
+ while (matcher.find() && newsLinks.size() < 15) {
+ String link = matcher.group(1);
+ String title = matcher.group(2).trim();
+
+ if (!title.isEmpty() && title.length() > 8 && !title.contains("图片") && !title.contains("视频")) {
+ newsLinks.add(new String[]{link, title});
+ }
+ }
+ }
+
+ for (int i = 0; i < Math.min(newsLinks.size(), 10); i++) {
+ String[] news = newsLinks.get(i);
+ String link = news[0];
+ String title = news[1];
+
+ String[] newsInfo = fetchNewsContent(httpClient, link);
+
+ Danmaku d = new Danmaku();
+ d.setRank(i + 1);
+ d.setTitle(title);
+ d.setPublishTime(newsInfo[0]);
+ d.setReporter(newsInfo[1]);
+ d.setContent(newsInfo[2]);
+ d.setSource(NAME);
+ danmakuList.add(d);
+
+ Thread.sleep(300);
+ }
+ }
+
+ if (danmakuList.isEmpty()) {
+ for (int i = 0; i < 10; i++) {
+ Danmaku d = new Danmaku();
+ d.setRank(i + 1);
+ d.setTitle("示例新闻标题" + (i + 1));
+ d.setPublishTime("2025年1月1日 12:00");
+ d.setReporter("记者小明");
+ d.setContent("这是示例新闻正文内容,用于演示功能。");
+ d.setSource(NAME);
+ danmakuList.add(d);
+ }
+ }
+
+ return danmakuList;
+ }
+
+ private String[] fetchNewsContent(CloseableHttpClient httpClient, String url) {
+ String publishTime = "未知";
+ String reporter = "未知";
+ String content = "";
+
+ try {
+ HttpGet httpGet = new HttpGet(url);
+ httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36");
+
+ try (CloseableHttpResponse response = httpClient.execute(httpGet)) {
+ if (response.getCode() == 200) {
+ String html = EntityUtils.toString(response.getEntity(), "UTF-8");
+
+ Matcher timeMatcher = TIME_PATTERN.matcher(html);
+ if (timeMatcher.find()) {
+ publishTime = timeMatcher.group(1) + "年" + timeMatcher.group(2) + "月" +
+ timeMatcher.group(3) + "日 " + timeMatcher.group(4) + ":" + timeMatcher.group(5);
+ }
+
+ Matcher reporterMatcher = REPORTER_PATTERN.matcher(html);
+ if (reporterMatcher.find()) {
+ reporter = reporterMatcher.group(1);
+ }
+
+ int articleStart = Math.max(html.indexOf("id=\"article\""), html.indexOf("class=\"article\""));
+ if (articleStart == -1) {
+ articleStart = html.indexOf("id=\"cont_article\"");
+ }
+ if (articleStart == -1) {
+ articleStart = html.indexOf("class=\"content\"");
+ }
+
+ if (articleStart != -1) {
+ int articleEnd = html.indexOf("", articleStart + 500);
+ if (articleEnd == -1) {
+ articleEnd = Math.min(articleStart + 5000, html.length());
+ }
+ String articleSection = html.substring(articleStart, Math.min(articleStart + 5000, html.length()));
+
+ StringBuilder contentBuilder = new StringBuilder();
+ Pattern pTagPattern = Pattern.compile("]*>([^<]{20,})
");
+ Matcher matcher = pTagPattern.matcher(articleSection);
+
+ int count = 0;
+ while (matcher.find() && count < 3) {
+ String paragraph = matcher.group(1).trim();
+ if (!paragraph.contains("编辑") && !paragraph.contains("Copyright") &&
+ !paragraph.contains("举报") && !paragraph.contains("来源:")) {
+ if (contentBuilder.length() > 0) {
+ contentBuilder.append(" ");
+ }
+ contentBuilder.append(paragraph);
+ count++;
+ }
+ }
+
+ if (contentBuilder.length() > 0) {
+ content = contentBuilder.toString();
+ if (content.length() > 150) {
+ content = content.substring(0, 150) + "...";
+ }
+ }
+ }
+ }
+ }
+ } catch (Exception e) {
+ }
+
+ if (content.isEmpty()) {
+ content = "(无法获取正文内容)";
+ }
+
+ return new String[]{publishTime, reporter, content};
+ }
+}
diff --git a/project/danmaku-crawler/src/main/java/com/danmaku/view/View.java b/project/danmaku-crawler/src/main/java/com/danmaku/view/View.java
new file mode 100644
index 0000000..da91160
--- /dev/null
+++ b/project/danmaku-crawler/src/main/java/com/danmaku/view/View.java
@@ -0,0 +1,46 @@
+package com.danmaku.view;
+
+import java.util.Scanner;
+
+public class View {
+ private final Scanner scanner;
+
+ public View() {
+ this.scanner = new Scanner(System.in);
+ }
+
+ public void displayMessage(String message) {
+ System.out.println(message);
+ }
+
+ public void displayError(String error) {
+ System.err.println("[错误] " + error);
+ }
+
+ public void displayWelcome() {
+ System.out.println("╔════════════════════════════════════════╗");
+ System.out.println("║ 弹幕爬虫系统 v2.0 ║");
+ System.out.println("║ 支持多平台弹幕爬取 ║");
+ System.out.println("╚════════════════════════════════════════╝");
+ System.out.println();
+ }
+
+ public void displayPrompt() {
+ System.out.print("\n> ");
+ }
+
+ public String getInput() {
+ return scanner.nextLine().trim();
+ }
+
+ public String getInput(String prompt) {
+ System.out.print(prompt);
+ return scanner.nextLine().trim();
+ }
+
+ public void close() {
+ if (scanner != null) {
+ scanner.close();
+ }
+ }
+}
diff --git a/project/danmaku-crawler/src/test/java/com/bilibili/DanmakuCrawlerTest.java b/project/danmaku-crawler/src/test/java/com/bilibili/DanmakuCrawlerTest.java
new file mode 100644
index 0000000..e69de29
diff --git a/project/danmaku-crawler/target/classes/com/bilibili/BilibiliDanmakuSource.class b/project/danmaku-crawler/target/classes/com/bilibili/BilibiliDanmakuSource.class
new file mode 100644
index 0000000..ffd5ac3
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/bilibili/BilibiliDanmakuSource.class differ
diff --git a/project/danmaku-crawler/target/classes/com/bilibili/Danmaku.class b/project/danmaku-crawler/target/classes/com/bilibili/Danmaku.class
new file mode 100644
index 0000000..802a4c7
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/bilibili/Danmaku.class differ
diff --git a/project/danmaku-crawler/target/classes/com/bilibili/DanmakuCrawler.class b/project/danmaku-crawler/target/classes/com/bilibili/DanmakuCrawler.class
new file mode 100644
index 0000000..f4f2373
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/bilibili/DanmakuCrawler.class differ
diff --git a/project/danmaku-crawler/target/classes/com/bilibili/DanmakuSource.class b/project/danmaku-crawler/target/classes/com/bilibili/DanmakuSource.class
new file mode 100644
index 0000000..99c3397
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/bilibili/DanmakuSource.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/DanmakuCrawlerApp.class b/project/danmaku-crawler/target/classes/com/danmaku/DanmakuCrawlerApp.class
new file mode 100644
index 0000000..99da55c
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/DanmakuCrawlerApp.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/command/Command.class b/project/danmaku-crawler/target/classes/com/danmaku/command/Command.class
new file mode 100644
index 0000000..bcd39cb
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/command/Command.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/command/ExitCommand.class b/project/danmaku-crawler/target/classes/com/danmaku/command/ExitCommand.class
new file mode 100644
index 0000000..d2b7e93
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/command/ExitCommand.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/command/FetchCommand.class b/project/danmaku-crawler/target/classes/com/danmaku/command/FetchCommand.class
new file mode 100644
index 0000000..4762ca5
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/command/FetchCommand.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/command/HelpCommand.class b/project/danmaku-crawler/target/classes/com/danmaku/command/HelpCommand.class
new file mode 100644
index 0000000..bfb5485
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/command/HelpCommand.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/command/SaveCommand.class b/project/danmaku-crawler/target/classes/com/danmaku/command/SaveCommand.class
new file mode 100644
index 0000000..3f97a4d
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/command/SaveCommand.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/command/StatisticCommand.class b/project/danmaku-crawler/target/classes/com/danmaku/command/StatisticCommand.class
new file mode 100644
index 0000000..5e80653
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/command/StatisticCommand.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/controller/CommandInvoker.class b/project/danmaku-crawler/target/classes/com/danmaku/controller/CommandInvoker.class
new file mode 100644
index 0000000..d917435
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/controller/CommandInvoker.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/controller/DanmakuController.class b/project/danmaku-crawler/target/classes/com/danmaku/controller/DanmakuController.class
new file mode 100644
index 0000000..6965a84
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/controller/DanmakuController.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/exception/DanmakuException.class b/project/danmaku-crawler/target/classes/com/danmaku/exception/DanmakuException.class
new file mode 100644
index 0000000..694f254
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/exception/DanmakuException.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/exception/NetworkException.class b/project/danmaku-crawler/target/classes/com/danmaku/exception/NetworkException.class
new file mode 100644
index 0000000..01d890d
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/exception/NetworkException.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/exception/ParseException.class b/project/danmaku-crawler/target/classes/com/danmaku/exception/ParseException.class
new file mode 100644
index 0000000..a32a7b2
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/exception/ParseException.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/model/Danmaku.class b/project/danmaku-crawler/target/classes/com/danmaku/model/Danmaku.class
new file mode 100644
index 0000000..bc6ce3a
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/model/Danmaku.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/strategy/BilibiliSource.class b/project/danmaku-crawler/target/classes/com/danmaku/strategy/BilibiliSource.class
new file mode 100644
index 0000000..86d37d3
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/strategy/BilibiliSource.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/strategy/DanmakuSource.class b/project/danmaku-crawler/target/classes/com/danmaku/strategy/DanmakuSource.class
new file mode 100644
index 0000000..7947eb9
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/strategy/DanmakuSource.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/strategy/DoubanTop250Source.class b/project/danmaku-crawler/target/classes/com/danmaku/strategy/DoubanTop250Source.class
new file mode 100644
index 0000000..230b623
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/strategy/DoubanTop250Source.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/strategy/NewsSource.class b/project/danmaku-crawler/target/classes/com/danmaku/strategy/NewsSource.class
new file mode 100644
index 0000000..6077231
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/strategy/NewsSource.class differ
diff --git a/project/danmaku-crawler/target/classes/com/danmaku/view/View.class b/project/danmaku-crawler/target/classes/com/danmaku/view/View.class
new file mode 100644
index 0000000..e04ee4b
Binary files /dev/null and b/project/danmaku-crawler/target/classes/com/danmaku/view/View.class differ