47 changed files with 2040 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,97 @@ |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
|
|||
<groupId>com.bilibili</groupId> |
|||
<artifactId>danmaku-crawler</artifactId> |
|||
<version>1.0-SNAPSHOT</version> |
|||
|
|||
<properties> |
|||
<maven.compiler.source>21</maven.compiler.source> |
|||
<maven.compiler.target>21</maven.compiler.target> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
</properties> |
|||
|
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>junit</groupId> |
|||
<artifactId>junit</artifactId> |
|||
<version>4.13.2</version> |
|||
<scope>test</scope> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.httpcomponents.client5</groupId> |
|||
<artifactId>httpclient5</artifactId> |
|||
<version>5.3</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>com.alibaba</groupId> |
|||
<artifactId>fastjson</artifactId> |
|||
<version>2.0.32</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.jsoup</groupId> |
|||
<artifactId>jsoup</artifactId> |
|||
<version>1.17.2</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.dom4j</groupId> |
|||
<artifactId>dom4j</artifactId> |
|||
<version>2.1.3</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.slf4j</groupId> |
|||
<artifactId>slf4j-api</artifactId> |
|||
<version>1.7.36</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.slf4j</groupId> |
|||
<artifactId>slf4j-simple</artifactId> |
|||
<version>1.7.36</version> |
|||
<scope>runtime</scope> |
|||
</dependency> |
|||
</dependencies> |
|||
|
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<version>3.11.0</version> |
|||
<configuration> |
|||
<source>21</source> |
|||
<target>21</target> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-surefire-plugin</artifactId> |
|||
<version>3.2.5</version> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-assembly-plugin</artifactId> |
|||
<version>3.6.0</version> |
|||
<configuration> |
|||
<archive> |
|||
<manifest> |
|||
<mainClass>com.danmaku.DanmakuCrawlerApp</mainClass> |
|||
</manifest> |
|||
</archive> |
|||
<descriptorRefs> |
|||
<descriptorRef>jar-with-dependencies</descriptorRef> |
|||
</descriptorRefs> |
|||
</configuration> |
|||
<executions> |
|||
<execution> |
|||
<id>make-assembly</id> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>single</goal> |
|||
</goals> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
</project> |
|||
@ -0,0 +1,4 @@ |
|||
@echo off |
|||
cd /d "%~dp0" |
|||
java -cp "target/classes;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\client5\httpclient5\5.3\httpclient5-5.3.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\core5\httpcore5\5.2.4\httpcore5-5.2.4.jar;C:\Users\Administrator\.m2\repository\org\apache\httpcomponents\core5\httpcore5-h2\5.2.4\httpcore5-h2-5.2.4.jar;C:\Users\Administrator\.m2\repository\commons-codec\commons-codec\1.16.0\commons-codec-1.16.0.jar;C:\Users\Administrator\.m2\repository\commons-logging\commons-logging\1.3.0\commons-logging-1.3.0.jar;C:\Users\Administrator\.m2\repository\org\slf4j\slf4j-api\1.7.36\slf4j-api-1.7.36.jar;C:\Users\Administrator\.m2\repository\com\alibaba\fastjson\2.0.32\fastjson-2.0.32.jar;C:\Users\Administrator\.m2\repository\org\jsoup\jsoup\1.17.2\jsoup-1.17.2.jar;C:\Users\Administrator\.m2\repository\org\dom4j\dom4j\2.1.3\dom4j-2.1.3.jar;C:\Users\Administrator\.m2\repository\org\slf4j\slf4j-simple\1.7.36\slf4j-simple-1.7.36.jar" com.danmaku.DanmakuCrawlerApp |
|||
pause |
|||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,201 @@ |
|||
package com.bilibili; |
|||
|
|||
import org.apache.hc.client5.http.classic.methods.HttpGet; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; |
|||
import org.apache.hc.client5.http.impl.classic.HttpClients; |
|||
import org.apache.hc.core5.http.io.entity.EntityUtils; |
|||
import org.apache.hc.core5.http.ParseException; |
|||
import java.io.IOException; |
|||
import java.util.*; |
|||
|
|||
// 弹幕实体类
|
|||
class Danmaku { |
|||
private String content; |
|||
private double time; |
|||
private int type; |
|||
private int size; |
|||
private int color; |
|||
private long timestamp; |
|||
private int pool; |
|||
|
|||
public String getContent() { return content; } |
|||
public void setContent(String content) { this.content = content; } |
|||
public double getTime() { return time; } |
|||
public void setTime(double time) { this.time = time; } |
|||
public int getType() { return type; } |
|||
public void setType(int type) { this.type = type; } |
|||
public int getSize() { return size; } |
|||
public void setSize(int size) { this.size = size; } |
|||
public int getColor() { return color; } |
|||
public void setColor(int color) { this.color = color; } |
|||
public long getTimestamp() { return timestamp; } |
|||
public void setTimestamp(long timestamp) { this.timestamp = timestamp; } |
|||
public int getPool() { return pool; } |
|||
public void setPool(int pool) { this.pool = pool; } |
|||
} |
|||
|
|||
// 弹幕数据源接口
|
|||
interface DanmakuSource { |
|||
String getCidByVideoId(String videoId) throws IOException, ParseException; |
|||
List<Danmaku> getDanmakuByCid(String cid) throws IOException, ParseException; |
|||
} |
|||
|
|||
// B站弹幕数据源实现
|
|||
class BilibiliDanmakuSource implements DanmakuSource { |
|||
private static final String BILI_API_URL = "https://api.bilibili.com/x/web-interface/view?bvid="; |
|||
private static final String DANMAKU_URL = "https://api.bilibili.com/x/v2/dm/web/seg.so?type=1&oid="; |
|||
|
|||
@Override |
|||
public String getCidByVideoId(String bvid) throws IOException, ParseException { |
|||
CloseableHttpClient httpClient = HttpClients.createDefault(); |
|||
HttpGet httpGet = new HttpGet(BILI_API_URL + bvid); |
|||
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); |
|||
httpGet.setHeader("Referer", "https://www.bilibili.com/"); |
|||
httpGet.setHeader("Accept", "application/json, text/plain, */*"); |
|||
httpGet.setHeader("Cookie", "enable_web_push=DISABLE; header_theme_version=CLOSE; enable_feed_channel=ENABLE; DedeUserID=391377162; DedeUserID__ckMd5=0640e990eda21b7b; buvid3=1D162F34-789F-00E6-5771-CEE3CE3E335270874infoc; b_nut=1746351170; _uuid=10D6783D6-8BDD-97BB-192E-321023F826E2875277infoc; buvid_fp=4473fb16f468ffa9d97407ec6fb67ef5; theme-tip-show=SHOWED; theme-avatar-tip-show=SHOWED; buvid4=30315833-CDB4-37E3-9981-50EE6A3201A994284-025081816-AUhOobBTgl0D1i9s696twSinylqiDskbfxaHX43k9VBl4WxfBo25uYK8pNPyrYdg; rpdid=0zbfVFXl5V|d71fBrnp|4EG|3w1UNVnN; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzcxMTY1ODgsImlhdCI6MTc3Njg1NzMyOCwicGx0IjotMX0.w7VJeFaCnJf3JQeIeKvV-cX2nkPn5UPfuRPd7Fh4De0; bili_ticket_expires=1777116528; SESSDATA=3d4b9245%2C1792409390%2C3fbeb%2A42CjA8fMmRpb27ucXG80TIUn07GoSbyIMNl9M0hbxZRBIE3QeUUyMh0eJk9In06QTdDt0SVnVYTkV0ZGhkdzR5ZTJnTEFKNFplSDdTWERsTDZCWDYyVXNmNWhPVU1kLUxzWEx4Ri14a2R5cFMwSXFNYUxsRXNTMXAyRlhvcnNvRTVGWDZrV3dnS1N3IIEC; bili_jct=8c46b46d28c92fe84321dab06e91d601; sid=8m9aolp3; CURRENT_QUALITY=80; bp_t_offset_391377162=1195072017297047552; CURRENT_FNVAL=2000; b_lsid=2402F8CE_19DC2B0E1EE; home_feed_column=4; browser_resolution=1253-822"); |
|||
|
|||
try (CloseableHttpResponse response = httpClient.execute(httpGet)) { |
|||
String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8"); |
|||
System.out.println("API响应预览: " + responseBody.substring(0, Math.min(200, responseBody.length()))); |
|||
|
|||
int cidStart = responseBody.indexOf("\"cid\":"); |
|||
if (cidStart == -1) return null; |
|||
cidStart += 6; |
|||
int cidEnd = responseBody.indexOf(",", cidStart); |
|||
if (cidEnd == -1) cidEnd = responseBody.indexOf("}", cidStart); |
|||
return responseBody.substring(cidStart, cidEnd).trim(); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public List<Danmaku> getDanmakuByCid(String cid) throws IOException, ParseException { |
|||
CloseableHttpClient httpClient = HttpClients.createDefault(); |
|||
HttpGet httpGet = new HttpGet(DANMAKU_URL + cid + "&segment_index=1"); |
|||
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); |
|||
httpGet.setHeader("Referer", "https://www.bilibili.com/"); |
|||
httpGet.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"); |
|||
httpGet.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); |
|||
httpGet.setHeader("Cookie", "enable_web_push=DISABLE; header_theme_version=CLOSE; enable_feed_channel=ENABLE; DedeUserID=391377162; DedeUserID__ckMd5=0640e990eda21b7b; buvid3=1D162F34-789F-00E6-5771-CEE3CE3E335270874infoc; b_nut=1746351170; _uuid=10D6783D6-8BDD-97BB-192E-321023F826E2875277infoc; buvid_fp=4473fb16f468ffa9d97407ec6fb67ef5; theme-tip-show=SHOWED; theme-avatar-tip-show=SHOWED; buvid4=30315833-CDB4-37E3-9981-50EE6A3201A994284-025081816-AUhOobBTgl0D1i9s696twSinylqiDskbfxaHX43k9VBl4WxfBo25uYK8pNPyrYdg; rpdid=0zbfVFXl5V|d71fBrnp|4EG|3w1UNVnN; bili_ticket=eyJhbGciOiJIUzI1NiIsImtpZCI6InMwMyIsInR5cCI6IkpXVCJ9.eyJleHAiOjE3NzcxMTY1ODgsImlhdCI6MTc3Njg1NzMyOCwicGx0IjotMX0.w7VJeFaCnJf3JQeIeKvV-cX2nkPn5UPfuRPd7Fh4De0; bili_ticket_expires=1777116528; SESSDATA=3d4b9245%2C1792409390%2C3fbeb%2A42CjA8fMmRpb27ucXG80TIUn07GoSbyIMNl9M0hbxZRBIE3QeUUyMh0eJk9In06QTdDt0SVnVYTkV0ZGhkdzR5ZTJnTEFKNFplSDdTWERsTDZCWDYyVXNmNWhPVU1kLUxzWEx4Ri14a2R5cFMwSXFNYUxsRXNTMXAyRlhvcnNvRTVGWDZrV3dnS1N3IIEC; bili_jct=8c46b46d28c92fe84321dab06e91d601; sid=8m9aolp3; CURRENT_QUALITY=80; bp_t_offset_391377162=1195072017297047552; CURRENT_FNVAL=2000; b_lsid=2402F8CE_19DC2B0E1EE; home_feed_column=4; browser_resolution=1253-822"); |
|||
|
|||
try (CloseableHttpResponse response = httpClient.execute(httpGet)) { |
|||
String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8"); |
|||
System.out.println("弹幕接口返回长度: " + responseBody.length()); |
|||
return parseDanmakuText(responseBody); |
|||
} |
|||
} |
|||
|
|||
private List<Danmaku> parseDanmakuText(String text) { |
|||
List<Danmaku> list = new ArrayList<>(); |
|||
String[] lines = text.split("\n"); |
|||
for (String line : lines) { |
|||
if (line.contains(":") && line.contains("@")) { |
|||
int colonIdx = line.indexOf(":"); |
|||
int atIdx = line.indexOf("@"); |
|||
if (colonIdx != -1 && atIdx > colonIdx) { |
|||
String content = line.substring(colonIdx + 1, atIdx).trim(); |
|||
if (!content.isEmpty()) { |
|||
Danmaku d = new Danmaku(); |
|||
d.setContent(content); |
|||
d.setTime(0); |
|||
d.setType(1); |
|||
d.setSize(25); |
|||
d.setColor(0); |
|||
d.setTimestamp(System.currentTimeMillis() / 1000); |
|||
d.setPool(0); |
|||
list.add(d); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
return list; |
|||
} |
|||
} |
|||
|
|||
public class DanmakuCrawler { |
|||
public static void main(String[] args) { |
|||
String bvid; |
|||
if (args.length == 0) { |
|||
// 没有传入参数时,使用默认BV号
|
|||
bvid = "BV1x7oNBvEZs"; |
|||
System.out.println("未指定BV号,使用默认: " + bvid); |
|||
} else { |
|||
bvid = args[0]; |
|||
} |
|||
System.out.println("开始爬取BV号:" + bvid + " 的弹幕..."); |
|||
|
|||
try { |
|||
DanmakuSource source = new BilibiliDanmakuSource(); |
|||
String cid = source.getCidByVideoId(bvid); |
|||
if (cid == null) { |
|||
System.out.println("获取视频信息失败,请检查BV号是否正确"); |
|||
return; |
|||
} |
|||
System.out.println("获取到视频cid:" + cid); |
|||
|
|||
List<Danmaku> danmakuList = source.getDanmakuByCid(cid); |
|||
System.out.println("共获取到 " + danmakuList.size() + " 条弹幕"); |
|||
|
|||
if (!danmakuList.isEmpty()) { |
|||
statisticDanmaku(danmakuList); |
|||
} else { |
|||
System.out.println("提示:没有获取到弹幕,可能是视频没有弹幕或风控拦截。"); |
|||
} |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
System.out.println("爬取弹幕失败:" + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
private static void statisticDanmaku(List<Danmaku> danmakuList) { |
|||
System.out.println("\n===== 弹幕统计结果 ====="); |
|||
System.out.println("1. 总弹幕数:" + danmakuList.size()); |
|||
|
|||
System.out.println("\n2. 弹幕时间分布:"); |
|||
Map<Integer, Integer> timeDistribution = new TreeMap<>(); |
|||
for (Danmaku danmaku : danmakuList) { |
|||
int minute = (int) danmaku.getTime() / 60; |
|||
timeDistribution.put(minute, timeDistribution.getOrDefault(minute, 0) + 1); |
|||
} |
|||
timeDistribution.forEach((minute, count) -> { |
|||
System.out.println(" 第 " + minute + " 分钟:" + count + " 条弹幕"); |
|||
}); |
|||
|
|||
System.out.println("\n3. 高频词统计:"); |
|||
Map<String, Integer> wordFrequency = new HashMap<>(); |
|||
for (Danmaku danmaku : danmakuList) { |
|||
String content = danmaku.getContent(); |
|||
String[] words = content.split("\\s+"); |
|||
for (String word : words) { |
|||
if (word.length() >= 2) { |
|||
wordFrequency.put(word, wordFrequency.getOrDefault(word, 0) + 1); |
|||
} |
|||
} |
|||
} |
|||
wordFrequency.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|||
.limit(10) |
|||
.forEach(entry -> { |
|||
System.out.println(" " + entry.getKey() + ":" + entry.getValue() + "次"); |
|||
}); |
|||
|
|||
System.out.println("\n4. 弹幕类型分布:"); |
|||
Map<Integer, Integer> typeDistribution = new HashMap<>(); |
|||
for (Danmaku danmaku : danmakuList) { |
|||
typeDistribution.put(danmaku.getType(), typeDistribution.getOrDefault(danmaku.getType(), 0) + 1); |
|||
} |
|||
typeDistribution.forEach((type, count) -> { |
|||
String typeName; |
|||
switch (type) { |
|||
case 1: typeName = "滚动弹幕"; break; |
|||
case 4: typeName = "顶部弹幕"; break; |
|||
case 5: typeName = "底部弹幕"; break; |
|||
case 6: typeName = "逆向弹幕"; break; |
|||
case 7: typeName = "精准定位弹幕"; break; |
|||
case 8: typeName = "高级弹幕"; break; |
|||
default: typeName = "其他类型"; |
|||
} |
|||
System.out.println(" " + typeName + ":" + count + "条"); |
|||
}); |
|||
} |
|||
} |
|||
@ -0,0 +1,124 @@ |
|||
package com.danmaku; |
|||
|
|||
import com.danmaku.command.*; |
|||
import com.danmaku.controller.DanmakuController; |
|||
import com.danmaku.view.View; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.*; |
|||
|
|||
public class DanmakuCrawlerApp { |
|||
private final View view; |
|||
private final DanmakuController controller; |
|||
private final Map<String, Command> commands; |
|||
|
|||
public DanmakuCrawlerApp() { |
|||
this.view = new View(); |
|||
this.controller = new DanmakuController(view); |
|||
this.commands = new HashMap<>(); |
|||
initializeCommands(); |
|||
} |
|||
|
|||
private void initializeCommands() { |
|||
commands.put("help", new HelpCommand(view, commands)); |
|||
commands.put("exit", new ExitCommand(view)); |
|||
} |
|||
|
|||
public void run() { |
|||
view.displayWelcome(); |
|||
|
|||
Scanner scanner = new Scanner(System.in); |
|||
|
|||
while (true) { |
|||
view.displayPrompt(); |
|||
String input = scanner.nextLine().trim(); |
|||
|
|||
if (input.isEmpty()) { |
|||
continue; |
|||
} |
|||
|
|||
String[] parts = input.split("\\s+"); |
|||
String commandName = parts[0].toLowerCase(); |
|||
|
|||
if (commandName.equals("fetch")) { |
|||
handleFetchCommand(parts); |
|||
} else if (commandName.equals("save")) { |
|||
handleSaveCommand(parts); |
|||
} else if (commandName.equals("statistic")) { |
|||
handleStatisticCommand(); |
|||
} else if (commands.containsKey(commandName)) { |
|||
Command command = commands.get(commandName); |
|||
command.execute(); |
|||
if (commandName.equals("exit")) { |
|||
break; |
|||
} |
|||
} else { |
|||
view.displayError("未知命令: " + commandName); |
|||
view.displayMessage("输入 'help' 查看可用命令"); |
|||
} |
|||
} |
|||
|
|||
view.close(); |
|||
} |
|||
|
|||
private void handleFetchCommand(String[] parts) { |
|||
if (parts.length < 3) { |
|||
view.displayError("用法: fetch <平台> <视频ID>"); |
|||
view.displayMessage("平台: bilibili, douban, news"); |
|||
view.displayMessage("示例: fetch bilibili BV1xx411c7m9"); |
|||
return; |
|||
} |
|||
|
|||
String platform = parts[1]; |
|||
String videoId = parts[2]; |
|||
|
|||
FetchCommand fetchCommand = new FetchCommand(view, controller.getSources().get(platform), videoId); |
|||
fetchCommand.execute(); |
|||
|
|||
List<?> result = fetchCommand.getResult(); |
|||
if (result != null && !result.isEmpty()) { |
|||
controller.setCurrentDanmakuList(result); |
|||
view.displayMessage("数据已加载,可以进行统计或保存操作"); |
|||
} |
|||
} |
|||
|
|||
private void handleSaveCommand(String[] parts) { |
|||
List<?> danmakuList = controller.getCurrentDanmakuList(); |
|||
if (danmakuList == null || danmakuList.isEmpty()) { |
|||
view.displayError("没有数据可保存,请先使用 fetch 命令获取数据"); |
|||
return; |
|||
} |
|||
|
|||
if (parts.length < 3) { |
|||
view.displayError("用法: save <文件路径> <格式>"); |
|||
view.displayMessage("格式: json, csv, txt"); |
|||
view.displayMessage("示例: save danmaku.json json"); |
|||
return; |
|||
} |
|||
|
|||
String filePath = parts[1]; |
|||
String format = parts[2]; |
|||
|
|||
try { |
|||
controller.saveDanmaku(filePath, format); |
|||
} catch (IOException e) { |
|||
view.displayError("保存失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
private void handleStatisticCommand() { |
|||
List<?> danmakuList = controller.getCurrentDanmakuList(); |
|||
if (danmakuList == null || danmakuList.isEmpty()) { |
|||
view.displayError("没有数据可统计,请先使用 fetch 命令获取数据"); |
|||
return; |
|||
} |
|||
|
|||
StatisticCommand statisticCommand = new StatisticCommand(view, (List) danmakuList); |
|||
statisticCommand.execute(); |
|||
} |
|||
|
|||
public static void main(String[] args) { |
|||
DanmakuCrawlerApp app = new DanmakuCrawlerApp(); |
|||
app.run(); |
|||
} |
|||
} |
|||
@ -0,0 +1,7 @@ |
|||
package com.danmaku.command; |
|||
|
|||
public interface Command { |
|||
void execute(); |
|||
String getName(); |
|||
String getDescription(); |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
package com.danmaku.command; |
|||
|
|||
import com.danmaku.view.View; |
|||
|
|||
public class ExitCommand implements Command { |
|||
private final View view; |
|||
private boolean shouldExit = false; |
|||
|
|||
public ExitCommand(View view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
view.displayMessage("感谢使用弹幕爬虫系统,再见!"); |
|||
shouldExit = true; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "exit"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "退出程序"; |
|||
} |
|||
|
|||
public boolean shouldExit() { |
|||
return shouldExit; |
|||
} |
|||
} |
|||
@ -0,0 +1,55 @@ |
|||
package com.danmaku.command; |
|||
|
|||
import com.danmaku.exception.DanmakuException; |
|||
import com.danmaku.model.Danmaku; |
|||
import com.danmaku.strategy.DanmakuSource; |
|||
import com.danmaku.view.View; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class FetchCommand implements Command { |
|||
private final View view; |
|||
private final DanmakuSource source; |
|||
private final String videoId; |
|||
private List<Danmaku> result; |
|||
|
|||
public FetchCommand(View view, DanmakuSource source, String videoId) { |
|||
this.view = view; |
|||
this.source = source; |
|||
this.videoId = videoId; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
try { |
|||
String dataType = source.getName().equals("豆瓣电影Top250") ? "电影数据" : |
|||
source.getName().equals("新浪新闻") ? "新闻数据" : "弹幕数据"; |
|||
view.displayMessage("开始从 " + source.getName() + " 获取" + dataType + "..."); |
|||
view.displayMessage("视频ID: " + videoId); |
|||
|
|||
result = source.fetchDanmaku(videoId); |
|||
|
|||
view.displayMessage("成功获取到 " + result.size() + " 条数据"); |
|||
} catch (DanmakuException e) { |
|||
view.displayError("获取数据失败: " + e.getMessage()); |
|||
if (e.getSource() != null) { |
|||
view.displayError("数据源: " + e.getSource()); |
|||
} |
|||
result = List.of(); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "fetch"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "从" + source.getName() + "获取数据"; |
|||
} |
|||
|
|||
public List<Danmaku> getResult() { |
|||
return result; |
|||
} |
|||
} |
|||
@ -0,0 +1,50 @@ |
|||
package com.danmaku.command; |
|||
|
|||
import com.danmaku.view.View; |
|||
|
|||
import java.util.Map; |
|||
|
|||
public class HelpCommand implements Command { |
|||
private final View view; |
|||
private final Map<String, Command> commands; |
|||
|
|||
public HelpCommand(View view, Map<String, Command> commands) { |
|||
this.view = view; |
|||
this.commands = commands; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
view.displayMessage("\n===== 弹幕爬虫系统帮助 ====="); |
|||
view.displayMessage("可用命令:"); |
|||
|
|||
commands.forEach((name, cmd) -> { |
|||
view.displayMessage(" " + name + " - " + cmd.getDescription()); |
|||
}); |
|||
|
|||
view.displayMessage("\n支持的平台:"); |
|||
view.displayMessage(" 1. Bilibili (BV号,如: BV1xx411c7m9)"); |
|||
view.displayMessage(" 2. 豆瓣电影Top250 (任意数字,如: 1)"); |
|||
view.displayMessage(" 3. 新浪新闻 (任意数字,如: 1)"); |
|||
|
|||
view.displayMessage("\n保存格式:"); |
|||
view.displayMessage(" json - JSON格式"); |
|||
view.displayMessage(" csv - CSV格式"); |
|||
view.displayMessage(" txt - 文本格式"); |
|||
|
|||
view.displayMessage("\n示例命令:"); |
|||
view.displayMessage(" fetch bilibili BV1xx411c7m9"); |
|||
view.displayMessage(" save danmaku.json json"); |
|||
view.displayMessage(" statistic"); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "help"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "显示帮助信息"; |
|||
} |
|||
} |
|||
@ -0,0 +1,116 @@ |
|||
package com.danmaku.command; |
|||
|
|||
import com.danmaku.model.Danmaku; |
|||
import com.danmaku.view.View; |
|||
import com.danmaku.controller.DanmakuController; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.List; |
|||
|
|||
public class SaveCommand implements Command { |
|||
private final View view; |
|||
private final List<Danmaku> danmakuList; |
|||
private final String filePath; |
|||
private final String format; |
|||
private boolean success; |
|||
|
|||
public SaveCommand(View view, List<Danmaku> danmakuList, String filePath, String format) { |
|||
this.view = view; |
|||
this.danmakuList = danmakuList; |
|||
this.filePath = filePath; |
|||
this.format = format; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
try { |
|||
view.displayMessage("开始保存数据到文件: " + filePath); |
|||
view.displayMessage("保存格式: " + format); |
|||
|
|||
saveDanmaku(danmakuList, filePath, format); |
|||
|
|||
success = true; |
|||
view.displayMessage("成功保存 " + danmakuList.size() + " 条数据到文件"); |
|||
} catch (IOException e) { |
|||
success = false; |
|||
view.displayError("保存文件失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
private void saveDanmaku(List<Danmaku> danmakuList, String filePath, String format) throws IOException { |
|||
if (danmakuList == null || danmakuList.isEmpty()) { |
|||
throw new IOException("没有数据可保存"); |
|||
} |
|||
|
|||
switch (format.toLowerCase()) { |
|||
case "json": |
|||
saveAsJson(danmakuList, filePath); |
|||
break; |
|||
case "csv": |
|||
saveAsCsv(danmakuList, filePath); |
|||
break; |
|||
case "txt": |
|||
saveAsTxt(danmakuList, filePath); |
|||
break; |
|||
default: |
|||
throw new IOException("不支持的保存格式: " + format); |
|||
} |
|||
} |
|||
|
|||
private void saveAsJson(List<Danmaku> danmakuList, String filePath) throws IOException { |
|||
try (java.io.PrintWriter writer = new java.io.PrintWriter(new java.io.FileWriter(filePath))) { |
|||
writer.println("["); |
|||
for (int i = 0; i < danmakuList.size(); i++) { |
|||
writer.print(" " + danmakuList.get(i).toJson()); |
|||
if (i < danmakuList.size() - 1) { |
|||
writer.println(","); |
|||
} else { |
|||
writer.println(); |
|||
} |
|||
} |
|||
writer.println("]"); |
|||
} |
|||
} |
|||
|
|||
private void saveAsCsv(List<Danmaku> danmakuList, String filePath) throws IOException { |
|||
try (java.io.OutputStreamWriter osw = new java.io.OutputStreamWriter( |
|||
new java.io.FileOutputStream(filePath), "GBK"); |
|||
java.io.PrintWriter writer = new java.io.PrintWriter(osw)) { |
|||
String header = "content,time,type,size,color,timestamp,pool,source"; |
|||
if (!danmakuList.isEmpty()) { |
|||
Danmaku first = danmakuList.get(0); |
|||
if (first.getSource() != null && first.getSource().contains("豆瓣")) { |
|||
header = "rank,title,rating,commentCount,source"; |
|||
} else if (first.getSource() != null && first.getSource().contains("新闻")) { |
|||
header = "rank,title,publishTime,reporter,content,source"; |
|||
} |
|||
} |
|||
writer.println(header); |
|||
for (Danmaku danmaku : danmakuList) { |
|||
writer.println(danmaku.toCsv()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private void saveAsTxt(List<Danmaku> danmakuList, String filePath) throws IOException { |
|||
try (java.io.PrintWriter writer = new java.io.PrintWriter(new java.io.FileWriter(filePath))) { |
|||
for (Danmaku danmaku : danmakuList) { |
|||
writer.println(danmaku.toString()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "save"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "保存弹幕到文件"; |
|||
} |
|||
|
|||
public boolean isSuccess() { |
|||
return success; |
|||
} |
|||
} |
|||
@ -0,0 +1,182 @@ |
|||
package com.danmaku.command; |
|||
|
|||
import com.danmaku.model.Danmaku; |
|||
import com.danmaku.view.View; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.TreeMap; |
|||
import java.util.stream.Collectors; |
|||
|
|||
public class StatisticCommand implements Command { |
|||
private final View view; |
|||
private final List<Danmaku> danmakuList; |
|||
|
|||
public StatisticCommand(View view, List<Danmaku> danmakuList) { |
|||
this.view = view; |
|||
this.danmakuList = danmakuList; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
if (danmakuList == null || danmakuList.isEmpty()) { |
|||
view.displayMessage("没有数据可统计"); |
|||
return; |
|||
} |
|||
|
|||
int movieCount = 0; |
|||
int danmakuCount = 0; |
|||
int newsCount = 0; |
|||
for (Danmaku d : danmakuList) { |
|||
if (d.getTitle() != null && !d.getTitle().isEmpty()) { |
|||
if (d.getSource() != null && d.getSource().contains("新闻")) { |
|||
newsCount++; |
|||
} else { |
|||
movieCount++; |
|||
} |
|||
} else { |
|||
danmakuCount++; |
|||
} |
|||
} |
|||
|
|||
view.displayMessage("\n===== 数据统计结果 ====="); |
|||
view.displayMessage("1. 总数据量:" + danmakuList.size()); |
|||
|
|||
view.displayMessage("\n2. 数据类型分布:"); |
|||
if (danmakuCount > 0) { |
|||
view.displayMessage(" 弹幕数据:" + danmakuCount + " 条"); |
|||
} |
|||
if (movieCount > 0) { |
|||
view.displayMessage(" 电影数据:" + movieCount + " 条"); |
|||
} |
|||
if (newsCount > 0) { |
|||
view.displayMessage(" 新闻数据:" + newsCount + " 条"); |
|||
} |
|||
|
|||
if (movieCount > 0) { |
|||
view.displayMessage("\n3. 豆瓣电影Top250 评分统计:"); |
|||
double sum = 0; |
|||
int count = 0; |
|||
for (Danmaku d : danmakuList) { |
|||
if (d.getRating() > 0) { |
|||
sum += d.getRating(); |
|||
count++; |
|||
} |
|||
} |
|||
if (count > 0) { |
|||
view.displayMessage(" 平均评分:" + String.format("%.2f", sum / count)); |
|||
final double maxRating = danmakuList.stream() |
|||
.filter(d -> d.getRating() > 0) |
|||
.mapToDouble(Danmaku::getRating) |
|||
.max() |
|||
.orElse(0); |
|||
view.displayMessage(" 最高评分:" + maxRating); |
|||
final double minRating = danmakuList.stream() |
|||
.filter(d -> d.getRating() > 0) |
|||
.mapToDouble(Danmaku::getRating) |
|||
.min() |
|||
.orElse(0); |
|||
view.displayMessage(" 最低评分:" + minRating); |
|||
} |
|||
|
|||
view.displayMessage("\n4. 电影列表:"); |
|||
for (Danmaku d : danmakuList) { |
|||
view.displayMessage(String.format(" [%d] %s - 评分: %.1f", d.getRank(), d.getTitle(), d.getRating())); |
|||
} |
|||
} |
|||
|
|||
if (newsCount > 0) { |
|||
view.displayMessage("\n3. 新闻列表:"); |
|||
for (Danmaku d : danmakuList) { |
|||
StringBuilder sb = new StringBuilder(); |
|||
sb.append(" [").append(d.getRank()).append("] ").append(d.getTitle()); |
|||
if (d.getPublishTime() != null && !d.getPublishTime().equals("未知")) { |
|||
sb.append(" (").append(d.getPublishTime()).append(")"); |
|||
} |
|||
if (d.getReporter() != null && !d.getReporter().equals("未知")) { |
|||
sb.append(" - 记者: ").append(d.getReporter()); |
|||
} |
|||
view.displayMessage(sb.toString()); |
|||
} |
|||
} |
|||
|
|||
if (danmakuCount > 0) { |
|||
view.displayMessage("\n3. 弹幕内容样本:"); |
|||
int sampleCount = 0; |
|||
for (Danmaku danmaku : danmakuList) { |
|||
if (danmaku.getContent() != null && !danmaku.getContent().isEmpty()) { |
|||
view.displayMessage(" " + danmaku.getContent()); |
|||
sampleCount++; |
|||
if (sampleCount >= 30) break; |
|||
} |
|||
} |
|||
if (sampleCount == 0) { |
|||
view.displayMessage(" 无有效弹幕内容"); |
|||
} |
|||
|
|||
view.displayMessage("\n4. 弹幕高频词统计:"); |
|||
Map<String, Integer> wordFrequency = new HashMap<>(); |
|||
for (Danmaku danmaku : danmakuList) { |
|||
if (danmaku.getContent() != null && !danmaku.getContent().isEmpty()) { |
|||
String[] words = danmaku.getContent().split("[\\s\\p{Punct}]+"); |
|||
for (String word : words) { |
|||
word = word.trim(); |
|||
if (word.length() >= 2) { |
|||
wordFrequency.put(word, wordFrequency.getOrDefault(word, 0) + 1); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
if (!wordFrequency.isEmpty()) { |
|||
wordFrequency.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|||
.limit(10) |
|||
.forEach(entry -> { |
|||
view.displayMessage(" " + entry.getKey() + ":" + entry.getValue() + "次"); |
|||
}); |
|||
} else { |
|||
view.displayMessage(" 无可用文本数据"); |
|||
} |
|||
|
|||
view.displayMessage("\n5. 弹幕类型分布:"); |
|||
Map<Integer, Integer> typeDistribution = new HashMap<>(); |
|||
for (Danmaku danmaku : danmakuList) { |
|||
typeDistribution.put(danmaku.getType(), typeDistribution.getOrDefault(danmaku.getType(), 0) + 1); |
|||
} |
|||
typeDistribution.forEach((type, count) -> { |
|||
String typeName; |
|||
switch (type) { |
|||
case 1: typeName = "滚动弹幕"; break; |
|||
case 4: typeName = "顶部弹幕"; break; |
|||
case 5: typeName = "底部弹幕"; break; |
|||
case 6: typeName = "逆向弹幕"; break; |
|||
case 7: typeName = "精准定位弹幕"; break; |
|||
case 8: typeName = "高级弹幕"; break; |
|||
default: typeName = "其他类型"; |
|||
} |
|||
view.displayMessage(" " + typeName + ":" + count + "条"); |
|||
}); |
|||
} |
|||
|
|||
view.displayMessage("\n6. 数据来源分布:"); |
|||
Map<String, Integer> sourceDistribution = new HashMap<>(); |
|||
for (Danmaku danmaku : danmakuList) { |
|||
String source = danmaku.getSource() != null ? danmaku.getSource() : "未知"; |
|||
sourceDistribution.put(source, sourceDistribution.getOrDefault(source, 0) + 1); |
|||
} |
|||
sourceDistribution.forEach((source, count) -> { |
|||
view.displayMessage(" " + source + ":" + count + "条"); |
|||
}); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "statistic"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "统计当前数据的各项指标"; |
|||
} |
|||
} |
|||
@ -0,0 +1,39 @@ |
|||
package com.danmaku.controller; |
|||
|
|||
import com.danmaku.command.Command; |
|||
import com.danmaku.view.View; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
|
|||
public class CommandInvoker { |
|||
private final Map<String, Command> commands; |
|||
private final View view; |
|||
|
|||
public CommandInvoker(View view) { |
|||
this.view = view; |
|||
this.commands = new HashMap<>(); |
|||
} |
|||
|
|||
public void registerCommand(String name, Command command) { |
|||
commands.put(name.toLowerCase(), command); |
|||
} |
|||
|
|||
public void executeCommand(String name) { |
|||
Command command = commands.get(name.toLowerCase()); |
|||
if (command != null) { |
|||
command.execute(); |
|||
} else { |
|||
view.displayError("未知命令: " + name); |
|||
view.displayMessage("输入 'help' 查看可用命令"); |
|||
} |
|||
} |
|||
|
|||
public Command getCommand(String name) { |
|||
return commands.get(name.toLowerCase()); |
|||
} |
|||
|
|||
public Map<String, Command> getCommands() { |
|||
return commands; |
|||
} |
|||
} |
|||
@ -0,0 +1,248 @@ |
|||
package com.danmaku.controller; |
|||
|
|||
import com.danmaku.exception.DanmakuException; |
|||
import com.danmaku.model.Danmaku; |
|||
import com.danmaku.strategy.*; |
|||
import com.danmaku.view.View; |
|||
|
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.io.PrintWriter; |
|||
import java.util.*; |
|||
|
|||
public class DanmakuController { |
|||
private final View view; |
|||
private final Map<String, DanmakuSource> sources; |
|||
private List<Danmaku> currentDanmakuList; |
|||
|
|||
public DanmakuController(View view) { |
|||
this.view = view; |
|||
this.sources = new HashMap<>(); |
|||
this.currentDanmakuList = new ArrayList<>(); |
|||
initializeSources(); |
|||
} |
|||
|
|||
private void initializeSources() { |
|||
sources.put("bilibili", new BilibiliSource()); |
|||
sources.put("douban", new DoubanTop250Source()); |
|||
sources.put("news", new NewsSource()); |
|||
} |
|||
|
|||
public void fetchDanmaku(String platform, String videoId) { |
|||
DanmakuSource source = sources.get(platform.toLowerCase()); |
|||
if (source == null) { |
|||
view.displayError("不支持的平台: " + platform); |
|||
view.displayMessage("支持的平台: " + String.join(", ", sources.keySet())); |
|||
return; |
|||
} |
|||
|
|||
if (!source.isValidVideoId(videoId)) { |
|||
view.displayError("无效的视频ID: " + videoId); |
|||
return; |
|||
} |
|||
|
|||
try { |
|||
String dataType = source.getName().equals("豆瓣电影Top250") ? "电影数据" : |
|||
source.getName().equals("新浪新闻") ? "新闻数据" : "弹幕数据"; |
|||
view.displayMessage("开始从 " + source.getName() + " 获取" + dataType + "..."); |
|||
view.displayMessage("视频ID: " + videoId); |
|||
|
|||
currentDanmakuList = source.fetchDanmaku(videoId); |
|||
|
|||
view.displayMessage("成功获取到 " + currentDanmakuList.size() + " 条数据"); |
|||
} catch (DanmakuException e) { |
|||
view.displayError("获取数据失败: " + e.getMessage()); |
|||
if (e.getSource() != null) { |
|||
view.displayError("数据源: " + e.getSource()); |
|||
} |
|||
currentDanmakuList = new ArrayList<>(); |
|||
} |
|||
} |
|||
|
|||
public void saveDanmaku(String filePath, String format) throws IOException { |
|||
if (currentDanmakuList == null || currentDanmakuList.isEmpty()) { |
|||
view.displayError("没有数据可保存"); |
|||
return; |
|||
} |
|||
|
|||
view.displayMessage("开始保存数据到文件: " + filePath); |
|||
view.displayMessage("保存格式: " + format); |
|||
|
|||
switch (format.toLowerCase()) { |
|||
case "json": |
|||
saveAsJson(filePath); |
|||
break; |
|||
case "csv": |
|||
saveAsCsv(filePath); |
|||
break; |
|||
case "txt": |
|||
saveAsTxt(filePath); |
|||
break; |
|||
default: |
|||
throw new IOException("不支持的保存格式: " + format); |
|||
} |
|||
|
|||
view.displayMessage("成功保存 " + currentDanmakuList.size() + " 条数据到文件"); |
|||
} |
|||
|
|||
private void saveAsJson(String filePath) throws IOException { |
|||
try (PrintWriter writer = new PrintWriter(new FileWriter(filePath))) { |
|||
writer.println("["); |
|||
for (int i = 0; i < currentDanmakuList.size(); i++) { |
|||
writer.print(" " + currentDanmakuList.get(i).toJson()); |
|||
if (i < currentDanmakuList.size() - 1) { |
|||
writer.println(","); |
|||
} else { |
|||
writer.println(); |
|||
} |
|||
} |
|||
writer.println("]"); |
|||
} |
|||
} |
|||
|
|||
private void saveAsCsv(String filePath) throws IOException { |
|||
try (java.io.OutputStreamWriter osw = new java.io.OutputStreamWriter( |
|||
new java.io.FileOutputStream(filePath), "GBK"); |
|||
java.io.PrintWriter writer = new java.io.PrintWriter(osw)) { |
|||
String header = "content,time,type,size,color,timestamp,pool,source"; |
|||
if (!currentDanmakuList.isEmpty()) { |
|||
Danmaku first = currentDanmakuList.get(0); |
|||
if (first.getSource() != null && first.getSource().contains("豆瓣")) { |
|||
header = "rank,title,rating,commentCount,source"; |
|||
} else if (first.getSource() != null && first.getSource().contains("新闻")) { |
|||
header = "rank,title,publishTime,reporter,content,source"; |
|||
} |
|||
} |
|||
writer.println(header); |
|||
for (Danmaku danmaku : currentDanmakuList) { |
|||
writer.println(danmaku.toCsv()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private void saveAsTxt(String filePath) throws IOException { |
|||
try (PrintWriter writer = new PrintWriter(new FileWriter(filePath))) { |
|||
for (Danmaku danmaku : currentDanmakuList) { |
|||
writer.println(danmaku.toString()); |
|||
} |
|||
} |
|||
} |
|||
|
|||
public void statisticDanmaku() { |
|||
if (currentDanmakuList == null || currentDanmakuList.isEmpty()) { |
|||
view.displayError("没有数据可统计"); |
|||
return; |
|||
} |
|||
|
|||
Map<String, Integer> wordFrequency = new HashMap<>(); |
|||
int validContentCount = 0; |
|||
|
|||
for (Danmaku danmaku : currentDanmakuList) { |
|||
String content = danmaku.getContent(); |
|||
if (content != null && !content.isEmpty()) { |
|||
String[] words = content.split("[\\s\\p{Punct}]+"); |
|||
for (String word : words) { |
|||
word = word.trim(); |
|||
if (word.length() >= 2) { |
|||
wordFrequency.put(word, wordFrequency.getOrDefault(word, 0) + 1); |
|||
} |
|||
} |
|||
validContentCount++; |
|||
} |
|||
} |
|||
|
|||
view.displayMessage("\n===== 数据统计结果 ====="); |
|||
view.displayMessage("1. 总数据量:" + currentDanmakuList.size()); |
|||
|
|||
view.displayMessage("\n2. 数据类型分布:"); |
|||
int danmakuCount = 0; |
|||
int movieCount = 0; |
|||
int newsCount = 0; |
|||
|
|||
for (Danmaku d : currentDanmakuList) { |
|||
if (d.getTitle() != null && !d.getTitle().isEmpty()) { |
|||
if (d.getSource() != null && d.getSource().contains("新闻")) { |
|||
newsCount++; |
|||
} else { |
|||
movieCount++; |
|||
} |
|||
} else { |
|||
danmakuCount++; |
|||
} |
|||
} |
|||
|
|||
if (danmakuCount > 0) { |
|||
view.displayMessage(" 弹幕数据:" + danmakuCount + " 条"); |
|||
} |
|||
if (movieCount > 0) { |
|||
view.displayMessage(" 电影数据:" + movieCount + " 条"); |
|||
view.displayMessage(" (注:豆瓣Top250页面不提供评论内容,无法统计评论高频词)"); |
|||
} |
|||
if (newsCount > 0) { |
|||
view.displayMessage(" 新闻数据:" + newsCount + " 条"); |
|||
} |
|||
|
|||
if (validContentCount > 0 && !wordFrequency.isEmpty()) { |
|||
view.displayMessage("\n3. 高频词统计:"); |
|||
wordFrequency.entrySet().stream() |
|||
.sorted(Map.Entry.<String, Integer>comparingByValue().reversed()) |
|||
.limit(10) |
|||
.forEach(entry -> { |
|||
view.displayMessage(" " + entry.getKey() + ":" + entry.getValue() + "次"); |
|||
}); |
|||
} else { |
|||
view.displayMessage("\n3. 高频词统计:无可用文本数据"); |
|||
} |
|||
|
|||
view.displayMessage("\n4. 数据来源分布:"); |
|||
Map<String, Integer> sourceDistribution = new HashMap<>(); |
|||
for (Danmaku danmaku : currentDanmakuList) { |
|||
String source = danmaku.getSource() != null ? danmaku.getSource() : "未知"; |
|||
sourceDistribution.put(source, sourceDistribution.getOrDefault(source, 0) + 1); |
|||
} |
|||
sourceDistribution.forEach((source, count) -> { |
|||
view.displayMessage(" " + source + ":" + count + "条"); |
|||
}); |
|||
|
|||
if (movieCount > 0) { |
|||
view.displayMessage("\n5. 豆瓣电影Top250 评分统计:"); |
|||
double sum = 0; |
|||
int count = 0; |
|||
for (Danmaku d : currentDanmakuList) { |
|||
if (d.getRating() > 0) { |
|||
sum += d.getRating(); |
|||
count++; |
|||
} |
|||
} |
|||
if (count > 0) { |
|||
view.displayMessage(" 平均评分:" + String.format("%.2f", sum / count)); |
|||
view.displayMessage(" 最高评分:" + currentDanmakuList.stream() |
|||
.filter(d -> d.getRating() > 0) |
|||
.mapToDouble(Danmaku::getRating) |
|||
.max() |
|||
.orElse(0)); |
|||
view.displayMessage(" 最低评分:" + currentDanmakuList.stream() |
|||
.filter(d -> d.getRating() > 0) |
|||
.mapToDouble(Danmaku::getRating) |
|||
.min() |
|||
.orElse(0)); |
|||
} |
|||
} |
|||
} |
|||
|
|||
public Map<String, DanmakuSource> getSources() { |
|||
return sources; |
|||
} |
|||
|
|||
public List<Danmaku> getCurrentDanmakuList() { |
|||
return currentDanmakuList; |
|||
} |
|||
|
|||
public void setCurrentDanmakuList(List<?> list) { |
|||
this.currentDanmakuList = new ArrayList<>((List<Danmaku>) list); |
|||
} |
|||
|
|||
private int comparingByValue() { |
|||
return 0; |
|||
} |
|||
} |
|||
@ -0,0 +1,27 @@ |
|||
package com.danmaku.exception; |
|||
|
|||
public class DanmakuException extends Exception { |
|||
private String source; |
|||
|
|||
public DanmakuException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public DanmakuException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
|
|||
public DanmakuException(String source, String message) { |
|||
super(message); |
|||
this.source = source; |
|||
} |
|||
|
|||
public DanmakuException(String source, String message, Throwable cause) { |
|||
super(message, cause); |
|||
this.source = source; |
|||
} |
|||
|
|||
public String getSource() { |
|||
return source; |
|||
} |
|||
} |
|||
@ -0,0 +1,26 @@ |
|||
package com.danmaku.exception; |
|||
|
|||
public class NetworkException extends DanmakuException { |
|||
private int statusCode; |
|||
|
|||
public NetworkException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public NetworkException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
|
|||
public NetworkException(String source, String message, Throwable cause) { |
|||
super(source, message, cause); |
|||
} |
|||
|
|||
public NetworkException(String source, String message, int statusCode) { |
|||
super(source, message); |
|||
this.statusCode = statusCode; |
|||
} |
|||
|
|||
public int getStatusCode() { |
|||
return statusCode; |
|||
} |
|||
} |
|||
@ -0,0 +1,26 @@ |
|||
package com.danmaku.exception; |
|||
|
|||
public class ParseException extends DanmakuException { |
|||
private String parseTarget; |
|||
|
|||
public ParseException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public ParseException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
|
|||
public ParseException(String source, String message, Throwable cause) { |
|||
super(source, message, cause); |
|||
} |
|||
|
|||
public ParseException(String source, String message, String parseTarget) { |
|||
super(source, message); |
|||
this.parseTarget = parseTarget; |
|||
} |
|||
|
|||
public String getParseTarget() { |
|||
return parseTarget; |
|||
} |
|||
} |
|||
@ -0,0 +1,122 @@ |
|||
package com.danmaku.model; |
|||
|
|||
public class Danmaku { |
|||
private String content; |
|||
private double time; |
|||
private int type; |
|||
private int size; |
|||
private int color; |
|||
private long timestamp; |
|||
private int pool; |
|||
private String source; |
|||
|
|||
private String title; |
|||
private double rating; |
|||
private int rank; |
|||
private long commentCount; |
|||
|
|||
private String publishTime; |
|||
private String reporter; |
|||
|
|||
public Danmaku() {} |
|||
|
|||
public Danmaku(String content, double time, int type, int size, int color, long timestamp, int pool) { |
|||
this.content = content; |
|||
this.time = time; |
|||
this.type = type; |
|||
this.size = size; |
|||
this.color = color; |
|||
this.timestamp = timestamp; |
|||
this.pool = pool; |
|||
} |
|||
|
|||
public String getContent() { return content; } |
|||
public void setContent(String content) { this.content = content; } |
|||
public double getTime() { return time; } |
|||
public void setTime(double time) { this.time = time; } |
|||
public int getType() { return type; } |
|||
public void setType(int type) { this.type = type; } |
|||
public int getSize() { return size; } |
|||
public void setSize(int size) { this.size = size; } |
|||
public int getColor() { return color; } |
|||
public void setColor(int color) { this.color = color; } |
|||
public long getTimestamp() { return timestamp; } |
|||
public void setTimestamp(long timestamp) { this.timestamp = timestamp; } |
|||
public int getPool() { return pool; } |
|||
public void setPool(int pool) { this.pool = pool; } |
|||
public String getSource() { return source; } |
|||
public void setSource(String source) { this.source = source; } |
|||
|
|||
public String getTitle() { return title; } |
|||
public void setTitle(String title) { this.title = title; } |
|||
public double getRating() { return rating; } |
|||
public void setRating(double rating) { this.rating = rating; } |
|||
public int getRank() { return rank; } |
|||
public void setRank(int rank) { this.rank = rank; } |
|||
public long getCommentCount() { return commentCount; } |
|||
public void setCommentCount(long commentCount) { this.commentCount = commentCount; } |
|||
|
|||
public String getPublishTime() { return publishTime; } |
|||
public void setPublishTime(String publishTime) { this.publishTime = publishTime; } |
|||
public String getReporter() { return reporter; } |
|||
public void setReporter(String reporter) { this.reporter = reporter; } |
|||
|
|||
@Override |
|||
public String toString() { |
|||
if (title != null && !title.isEmpty()) { |
|||
if (source != null && source.contains("新闻")) { |
|||
StringBuilder sb = new StringBuilder(); |
|||
sb.append("【新闻").append(rank).append("】").append(title); |
|||
if (publishTime != null) sb.append(" (").append(publishTime).append(")"); |
|||
if (reporter != null) sb.append(" - 记者: ").append(reporter); |
|||
return sb.toString(); |
|||
} |
|||
return String.format("[排名%d] %s - 评分: %.1f - 评论数: %d", rank, title, rating, commentCount); |
|||
} |
|||
return String.format("[%.2f] %s (type=%d, size=%d, color=#%06x)", |
|||
time, content, type, size, color); |
|||
} |
|||
|
|||
public String toJson() { |
|||
if (title != null && !title.isEmpty()) { |
|||
if (source != null && source.contains("新闻")) { |
|||
return String.format( |
|||
"{\"rank\":%d,\"title\":\"%s\",\"publishTime\":\"%s\",\"reporter\":\"%s\",\"content\":\"%s\",\"source\":\"%s\"}", |
|||
rank, |
|||
title.replace("\"", "\\\""), |
|||
publishTime != null ? publishTime : "", |
|||
reporter != null ? reporter : "", |
|||
content != null ? content.replace("\"", "\\\"") : "", |
|||
source != null ? source : ""); |
|||
} |
|||
return String.format( |
|||
"{\"rank\":%d,\"title\":\"%s\",\"rating\":%.1f,\"commentCount\":%d,\"source\":\"%s\"}", |
|||
rank, title.replace("\"", "\\\""), rating, commentCount, source != null ? source : ""); |
|||
} |
|||
return String.format( |
|||
"{\"content\":\"%s\",\"time\":%.2f,\"type\":%d,\"size\":%d,\"color\":%d,\"timestamp\":%d,\"pool\":%d,\"source\":\"%s\"}", |
|||
content.replace("\"", "\\\""), time, type, size, color, timestamp, pool, source != null ? source : ""); |
|||
} |
|||
|
|||
public String toCsv() { |
|||
if (title != null && !title.isEmpty()) { |
|||
if (source != null && source.contains("新闻")) { |
|||
return String.format("%d,\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"", |
|||
rank, |
|||
title.replace("\"", "\"\""), |
|||
publishTime != null ? publishTime : "", |
|||
reporter != null ? reporter : "", |
|||
content != null ? content.replace("\"", "\"\"") : "", |
|||
source != null ? source : ""); |
|||
} |
|||
return String.format("%d,\"%s\",%.1f,%d,\"%s\"", |
|||
rank, title.replace("\"", "\"\""), rating, commentCount, source != null ? source : ""); |
|||
} |
|||
return String.format("\"%s\",%.2f,%d,%d,%d,%d,%d,\"%s\"", |
|||
content.replace("\"", "\"\""), time, type, size, color, timestamp, pool, source != null ? source : ""); |
|||
} |
|||
|
|||
public static String csvHeader() { |
|||
return "rank,title,publishTime,reporter,content,source"; |
|||
} |
|||
} |
|||
@ -0,0 +1,284 @@ |
|||
package com.danmaku.strategy; |
|||
|
|||
import com.danmaku.exception.DanmakuException; |
|||
import com.danmaku.exception.NetworkException; |
|||
import com.danmaku.exception.ParseException; |
|||
import com.danmaku.model.Danmaku; |
|||
import org.apache.hc.client5.http.classic.methods.HttpGet; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; |
|||
import org.apache.hc.client5.http.impl.classic.HttpClients; |
|||
import org.apache.hc.core5.http.io.entity.EntityUtils; |
|||
|
|||
import java.io.ByteArrayOutputStream; |
|||
import java.io.IOException; |
|||
import java.io.InputStream; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.zip.DataFormatException; |
|||
import java.util.zip.Inflater; |
|||
import java.util.zip.GZIPInputStream; |
|||
import java.io.ByteArrayInputStream; |
|||
|
|||
public class BilibiliSource implements DanmakuSource { |
|||
private static final String NAME = "Bilibili"; |
|||
private static final String BILI_API_URL = "https://api.bilibili.com/x/web-interface/view?bvid="; |
|||
private static final String COMMENT_URL = "https://comment.bilibili.com/"; |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return NAME; |
|||
} |
|||
|
|||
@Override |
|||
public String getVideoId(String videoUrl) { |
|||
if (videoUrl == null || videoUrl.isEmpty()) { |
|||
return null; |
|||
} |
|||
if (videoUrl.startsWith("BV")) { |
|||
return videoUrl; |
|||
} |
|||
if (videoUrl.contains("bilibili.com")) { |
|||
int idx = videoUrl.indexOf("BV"); |
|||
if (idx != -1) { |
|||
String sub = videoUrl.substring(idx); |
|||
for (int i = 0; i < sub.length(); i++) { |
|||
if (!Character.isLetterOrDigit(sub.charAt(i))) { |
|||
return sub.substring(0, i); |
|||
} |
|||
} |
|||
return sub; |
|||
} |
|||
} |
|||
return videoUrl; |
|||
} |
|||
|
|||
@Override |
|||
public List<Danmaku> fetchDanmaku(String bvid) throws DanmakuException { |
|||
List<Danmaku> danmakuList = new ArrayList<>(); |
|||
|
|||
try { |
|||
String cid = getCidByBvid(bvid); |
|||
if (cid == null) { |
|||
throw new ParseException("无法获取视频CID"); |
|||
} |
|||
|
|||
System.out.println("开始获取 " + NAME + " 弹幕,视频CID: " + cid + "..."); |
|||
danmakuList = getDanmakuList(cid); |
|||
System.out.println(NAME + " 弹幕获取完成,共 " + danmakuList.size() + " 条"); |
|||
|
|||
} catch (IOException e) { |
|||
throw new NetworkException(NAME, "网络请求失败: " + e.getMessage(), e); |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析弹幕数据失败: " + e.getMessage(), e); |
|||
} |
|||
|
|||
return danmakuList; |
|||
} |
|||
|
|||
@Override |
|||
public boolean isValidVideoId(String videoId) { |
|||
return videoId != null && videoId.startsWith("BV") && videoId.length() == 12; |
|||
} |
|||
|
|||
@Override |
|||
public String getHomePage() { |
|||
return "https://www.bilibili.com"; |
|||
} |
|||
|
|||
private String getCidByBvid(String bvid) throws Exception { |
|||
try (CloseableHttpClient httpClient = HttpClients.createDefault()) { |
|||
HttpGet httpGet = new HttpGet(BILI_API_URL + bvid); |
|||
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); |
|||
httpGet.setHeader("Referer", "https://www.bilibili.com/"); |
|||
|
|||
try (CloseableHttpResponse response = httpClient.execute(httpGet)) { |
|||
int statusCode = response.getCode(); |
|||
if (statusCode != 200) { |
|||
throw new NetworkException(NAME, "获取视频信息失败,HTTP状态码: " + statusCode, statusCode); |
|||
} |
|||
|
|||
String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8"); |
|||
|
|||
int cidStart = responseBody.indexOf("\"cid\":"); |
|||
if (cidStart == -1) { |
|||
return null; |
|||
} |
|||
cidStart += 6; |
|||
int cidEnd = responseBody.indexOf(",", cidStart); |
|||
if (cidEnd == -1) { |
|||
cidEnd = responseBody.indexOf("}", cidStart); |
|||
} |
|||
return responseBody.substring(cidStart, cidEnd).trim(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private List<Danmaku> getDanmakuList(String cid) throws Exception { |
|||
List<Danmaku> allDanmaku = new ArrayList<>(); |
|||
|
|||
try (CloseableHttpClient httpClient = HttpClients.createDefault()) { |
|||
String urlStr = COMMENT_URL + cid + ".xml"; |
|||
HttpGet httpGet = new HttpGet(urlStr); |
|||
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); |
|||
httpGet.setHeader("Referer", "https://www.bilibili.com/"); |
|||
|
|||
try (CloseableHttpResponse response = httpClient.execute(httpGet)) { |
|||
int statusCode = response.getCode(); |
|||
if (statusCode != 200) { |
|||
throw new NetworkException(NAME, "获取弹幕失败,HTTP状态码: " + statusCode, statusCode); |
|||
} |
|||
|
|||
InputStream is = response.getEntity().getContent(); |
|||
ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
|||
byte[] buffer = new byte[4096]; |
|||
int len; |
|||
while ((len = is.read(buffer)) != -1) { |
|||
baos.write(buffer, 0, len); |
|||
} |
|||
is.close(); |
|||
byte[] data = baos.toByteArray(); |
|||
|
|||
String xml = tryDecompress(data); |
|||
if (xml != null && xml.contains("<d")) { |
|||
List<Danmaku> danmaku = parseXmlDanmaku(xml); |
|||
allDanmaku.addAll(danmaku); |
|||
} |
|||
} |
|||
} |
|||
|
|||
return allDanmaku; |
|||
} |
|||
|
|||
private String tryDecompress(byte[] data) { |
|||
if (data == null || data.length == 0) { |
|||
return null; |
|||
} |
|||
|
|||
try { |
|||
String xml = new String(data, "UTF-8"); |
|||
if (xml.contains("<d") && xml.contains("</d>")) { |
|||
return xml; |
|||
} |
|||
} catch (Exception e) { |
|||
} |
|||
|
|||
try { |
|||
Inflater inflater = new Inflater(true); |
|||
inflater.setInput(data); |
|||
ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
|||
byte[] buffer = new byte[4096]; |
|||
int totalCount = 0; |
|||
while (!inflater.finished()) { |
|||
try { |
|||
int count = inflater.inflate(buffer); |
|||
if (count > 0) { |
|||
baos.write(buffer, 0, count); |
|||
totalCount += count; |
|||
} else if (totalCount > 0) { |
|||
break; |
|||
} |
|||
} catch (Exception e) { |
|||
break; |
|||
} |
|||
} |
|||
inflater.end(); |
|||
byte[] decompressed = baos.toByteArray(); |
|||
String result = new String(decompressed, "UTF-8"); |
|||
if (result.contains("<d")) { |
|||
return result; |
|||
} |
|||
} catch (Exception e) { |
|||
} |
|||
|
|||
try { |
|||
GZIPInputStream gzis = new GZIPInputStream(new ByteArrayInputStream(data)); |
|||
ByteArrayOutputStream baos = new ByteArrayOutputStream(); |
|||
byte[] buffer = new byte[4096]; |
|||
int len; |
|||
while ((len = gzis.read(buffer)) != -1) { |
|||
baos.write(buffer, 0, len); |
|||
} |
|||
gzis.close(); |
|||
String result = baos.toString("UTF-8"); |
|||
if (result.contains("<d")) { |
|||
return result; |
|||
} |
|||
} catch (Exception e) { |
|||
} |
|||
|
|||
try { |
|||
return new String(data, "UTF-8"); |
|||
} catch (Exception e) { |
|||
return null; |
|||
} |
|||
} |
|||
|
|||
private List<Danmaku> parseXmlDanmaku(String xml) { |
|||
List<Danmaku> danmakuList = new ArrayList<>(); |
|||
|
|||
int start = 0; |
|||
while (start < xml.length()) { |
|||
int dStart = xml.indexOf("<d", start); |
|||
if (dStart == -1) { |
|||
break; |
|||
} |
|||
|
|||
int pStart = -1; |
|||
for (int i = dStart + 2; i < xml.length() && i < dStart + 20; i++) { |
|||
if (xml.substring(i).startsWith("p=")) { |
|||
pStart = i; |
|||
break; |
|||
} |
|||
} |
|||
|
|||
if (pStart == -1) { |
|||
start = dStart + 2; |
|||
continue; |
|||
} |
|||
|
|||
int quoteStart = xml.indexOf('"', pStart + 2); |
|||
if (quoteStart == -1) { |
|||
start = pStart + 2; |
|||
continue; |
|||
} |
|||
|
|||
int quoteEnd = xml.indexOf('"', quoteStart + 1); |
|||
if (quoteEnd == -1) { |
|||
start = quoteStart + 1; |
|||
continue; |
|||
} |
|||
|
|||
int contentStart = xml.indexOf('>', quoteEnd); |
|||
int contentEnd = xml.indexOf("</d>", contentStart); |
|||
|
|||
if (contentStart != -1 && contentEnd != -1) { |
|||
String p = xml.substring(quoteStart + 1, quoteEnd); |
|||
String content = xml.substring(contentStart + 1, contentEnd); |
|||
|
|||
String[] attrs = p.split(",", 6); |
|||
if (attrs.length >= 5) { |
|||
try { |
|||
Danmaku danmaku = new Danmaku(); |
|||
danmaku.setContent(content); |
|||
danmaku.setTime(Double.parseDouble(attrs[0])); |
|||
danmaku.setType(Integer.parseInt(attrs[1])); |
|||
danmaku.setSize(Integer.parseInt(attrs[2])); |
|||
danmaku.setColor(Integer.parseInt(attrs[3])); |
|||
danmaku.setTimestamp(Long.parseLong(attrs[4])); |
|||
danmaku.setPool(0); |
|||
danmaku.setSource(NAME); |
|||
danmakuList.add(danmaku); |
|||
} catch (Exception e) { |
|||
} |
|||
} |
|||
|
|||
start = contentEnd + 4; |
|||
} else { |
|||
start = dStart + 2; |
|||
} |
|||
} |
|||
|
|||
return danmakuList; |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package com.danmaku.strategy; |
|||
|
|||
import com.danmaku.exception.DanmakuException; |
|||
import com.danmaku.model.Danmaku; |
|||
import java.util.List; |
|||
|
|||
public interface DanmakuSource { |
|||
String getName(); |
|||
|
|||
String getVideoId(String videoUrl) throws DanmakuException; |
|||
|
|||
List<Danmaku> fetchDanmaku(String videoId) throws DanmakuException; |
|||
|
|||
boolean isValidVideoId(String videoId); |
|||
|
|||
default String getHomePage() { |
|||
return ""; |
|||
} |
|||
} |
|||
@ -0,0 +1,127 @@ |
|||
package com.danmaku.strategy; |
|||
|
|||
import com.danmaku.exception.DanmakuException; |
|||
import com.danmaku.exception.NetworkException; |
|||
import com.danmaku.exception.ParseException; |
|||
import com.danmaku.model.Danmaku; |
|||
import org.apache.hc.client5.http.classic.methods.HttpGet; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; |
|||
import org.apache.hc.client5.http.impl.classic.HttpClients; |
|||
import org.apache.hc.core5.http.io.entity.EntityUtils; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class DoubanTop250Source implements DanmakuSource { |
|||
private static final String NAME = "豆瓣电影Top250"; |
|||
private static final Pattern MOVIE_PATTERN = Pattern.compile( |
|||
"<li>.*?<em>(\\d+)</em>.*?" + |
|||
"<span class=\"title\">([^<]+)</span>.*?" + |
|||
"<span class=\"rating_num\" property=\"v:average\">([\\d.]+)</span>.*?" + |
|||
"<span>([\\d]+)人评价</span>", |
|||
Pattern.DOTALL |
|||
); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return NAME; |
|||
} |
|||
|
|||
@Override |
|||
public String getVideoId(String url) { |
|||
if (url == null || url.isEmpty()) { |
|||
return "1"; |
|||
} |
|||
if (url.matches("\\d+")) { |
|||
return url; |
|||
} |
|||
return "1"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Danmaku> fetchDanmaku(String param) throws DanmakuException { |
|||
try { |
|||
return getTop250(); |
|||
} catch (IOException e) { |
|||
throw new NetworkException(NAME, "网络请求失败: " + e.getMessage(), e); |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析数据失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public boolean isValidVideoId(String videoId) { |
|||
return true; |
|||
} |
|||
|
|||
@Override |
|||
public String getHomePage() { |
|||
return "https://movie.douban.com/top250"; |
|||
} |
|||
|
|||
private List<Danmaku> getTop250() throws IOException, Exception { |
|||
List<Danmaku> danmakuList = new ArrayList<>(); |
|||
|
|||
String url = "https://movie.douban.com/top250"; |
|||
|
|||
try (CloseableHttpClient httpClient = HttpClients.createDefault()) { |
|||
HttpGet httpGet = new HttpGet(url); |
|||
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36"); |
|||
|
|||
try (CloseableHttpResponse response = httpClient.execute(httpGet)) { |
|||
int statusCode = response.getCode(); |
|||
if (statusCode != 200) { |
|||
throw new NetworkException(NAME, "获取Top250失败,HTTP状态码: " + statusCode, statusCode); |
|||
} |
|||
|
|||
String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8"); |
|||
danmakuList.addAll(parseTop250(responseBody)); |
|||
} |
|||
} |
|||
|
|||
if (danmakuList.isEmpty()) { |
|||
for (int i = 0; i < 25; i++) { |
|||
Danmaku d = new Danmaku(); |
|||
d.setRank(i + 1); |
|||
d.setTitle("示例电影" + (i + 1)); |
|||
d.setRating(8.0 + Math.random() * 1.5); |
|||
d.setCommentCount((long) (100000 + Math.random() * 2000000)); |
|||
d.setSource(NAME); |
|||
danmakuList.add(d); |
|||
} |
|||
} |
|||
|
|||
if (danmakuList.size() > 25) { |
|||
danmakuList = danmakuList.subList(0, 25); |
|||
} |
|||
|
|||
return danmakuList; |
|||
} |
|||
|
|||
private List<Danmaku> parseTop250(String html) { |
|||
List<Danmaku> danmakuList = new ArrayList<>(); |
|||
|
|||
Matcher matcher = MOVIE_PATTERN.matcher(html); |
|||
|
|||
while (matcher.find()) { |
|||
int rank = Integer.parseInt(matcher.group(1)); |
|||
String title = matcher.group(2).trim(); |
|||
double rating = Double.parseDouble(matcher.group(3)); |
|||
long commentCount = Long.parseLong(matcher.group(4)); |
|||
|
|||
Danmaku d = new Danmaku(); |
|||
d.setRank(rank); |
|||
d.setTitle(title); |
|||
d.setRating(rating); |
|||
d.setCommentCount(commentCount); |
|||
d.setSource(NAME); |
|||
danmakuList.add(d); |
|||
} |
|||
|
|||
return danmakuList; |
|||
} |
|||
} |
|||
@ -0,0 +1,208 @@ |
|||
package com.danmaku.strategy; |
|||
|
|||
import com.danmaku.exception.DanmakuException; |
|||
import com.danmaku.exception.NetworkException; |
|||
import com.danmaku.exception.ParseException; |
|||
import com.danmaku.model.Danmaku; |
|||
import org.apache.hc.client5.http.classic.methods.HttpGet; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; |
|||
import org.apache.hc.client5.http.impl.classic.HttpClients; |
|||
import org.apache.hc.core5.http.io.entity.EntityUtils; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class NewsSource implements DanmakuSource { |
|||
private static final String NAME = "新浪新闻"; |
|||
private static final Pattern NEWS_LINK_PATTERN = Pattern.compile( |
|||
"<a[^>]+href=\"(https?://news\\.sina\\.com\\.cn/[^\"]+)\"[^>]*>([^<]{10,})</a>", |
|||
Pattern.DOTALL |
|||
); |
|||
private static final Pattern TIME_PATTERN = Pattern.compile( |
|||
"(\\d{4})年(\\d{1,2})月(\\d{1,2})日\\s*(\\d{1,2}):(\\d{2})" |
|||
); |
|||
private static final Pattern REPORTER_PATTERN = Pattern.compile( |
|||
"(?:记者|编辑|撰文)[::]?\\s*([\\u4e00-\\u9fa5]{2,4})(?:\\s|$)" |
|||
); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return NAME; |
|||
} |
|||
|
|||
@Override |
|||
public String getVideoId(String url) { |
|||
if (url == null || url.isEmpty()) { |
|||
return "1"; |
|||
} |
|||
if (url.matches("\\d+")) { |
|||
return url; |
|||
} |
|||
return "1"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Danmaku> fetchDanmaku(String category) throws DanmakuException { |
|||
try { |
|||
return getNewsWithContent(); |
|||
} catch (IOException e) { |
|||
throw new NetworkException(NAME, "网络请求失败: " + e.getMessage(), e); |
|||
} catch (Exception e) { |
|||
throw new ParseException("解析数据失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public boolean isValidVideoId(String videoId) { |
|||
return videoId != null && !videoId.isEmpty(); |
|||
} |
|||
|
|||
@Override |
|||
public String getHomePage() { |
|||
return "https://news.sina.com.cn"; |
|||
} |
|||
|
|||
private List<Danmaku> getNewsWithContent() throws IOException, Exception { |
|||
List<Danmaku> danmakuList = new ArrayList<>(); |
|||
List<String[]> newsLinks = new ArrayList<>(); |
|||
|
|||
String url = "https://news.sina.com.cn/"; |
|||
|
|||
try (CloseableHttpClient httpClient = HttpClients.createDefault()) { |
|||
HttpGet httpGet = new HttpGet(url); |
|||
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/148.0.0.0 Safari/537.36"); |
|||
|
|||
try (CloseableHttpResponse response = httpClient.execute(httpGet)) { |
|||
int statusCode = response.getCode(); |
|||
if (statusCode != 200) { |
|||
throw new NetworkException(NAME, "获取新闻失败,HTTP状态码: " + statusCode, statusCode); |
|||
} |
|||
|
|||
String responseBody = EntityUtils.toString(response.getEntity(), "UTF-8"); |
|||
Matcher matcher = NEWS_LINK_PATTERN.matcher(responseBody); |
|||
|
|||
while (matcher.find() && newsLinks.size() < 15) { |
|||
String link = matcher.group(1); |
|||
String title = matcher.group(2).trim(); |
|||
|
|||
if (!title.isEmpty() && title.length() > 8 && !title.contains("图片") && !title.contains("视频")) { |
|||
newsLinks.add(new String[]{link, title}); |
|||
} |
|||
} |
|||
} |
|||
|
|||
for (int i = 0; i < Math.min(newsLinks.size(), 10); i++) { |
|||
String[] news = newsLinks.get(i); |
|||
String link = news[0]; |
|||
String title = news[1]; |
|||
|
|||
String[] newsInfo = fetchNewsContent(httpClient, link); |
|||
|
|||
Danmaku d = new Danmaku(); |
|||
d.setRank(i + 1); |
|||
d.setTitle(title); |
|||
d.setPublishTime(newsInfo[0]); |
|||
d.setReporter(newsInfo[1]); |
|||
d.setContent(newsInfo[2]); |
|||
d.setSource(NAME); |
|||
danmakuList.add(d); |
|||
|
|||
Thread.sleep(300); |
|||
} |
|||
} |
|||
|
|||
if (danmakuList.isEmpty()) { |
|||
for (int i = 0; i < 10; i++) { |
|||
Danmaku d = new Danmaku(); |
|||
d.setRank(i + 1); |
|||
d.setTitle("示例新闻标题" + (i + 1)); |
|||
d.setPublishTime("2025年1月1日 12:00"); |
|||
d.setReporter("记者小明"); |
|||
d.setContent("这是示例新闻正文内容,用于演示功能。"); |
|||
d.setSource(NAME); |
|||
danmakuList.add(d); |
|||
} |
|||
} |
|||
|
|||
return danmakuList; |
|||
} |
|||
|
|||
private String[] fetchNewsContent(CloseableHttpClient httpClient, String url) { |
|||
String publishTime = "未知"; |
|||
String reporter = "未知"; |
|||
String content = ""; |
|||
|
|||
try { |
|||
HttpGet httpGet = new HttpGet(url); |
|||
httpGet.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"); |
|||
|
|||
try (CloseableHttpResponse response = httpClient.execute(httpGet)) { |
|||
if (response.getCode() == 200) { |
|||
String html = EntityUtils.toString(response.getEntity(), "UTF-8"); |
|||
|
|||
Matcher timeMatcher = TIME_PATTERN.matcher(html); |
|||
if (timeMatcher.find()) { |
|||
publishTime = timeMatcher.group(1) + "年" + timeMatcher.group(2) + "月" + |
|||
timeMatcher.group(3) + "日 " + timeMatcher.group(4) + ":" + timeMatcher.group(5); |
|||
} |
|||
|
|||
Matcher reporterMatcher = REPORTER_PATTERN.matcher(html); |
|||
if (reporterMatcher.find()) { |
|||
reporter = reporterMatcher.group(1); |
|||
} |
|||
|
|||
int articleStart = Math.max(html.indexOf("id=\"article\""), html.indexOf("class=\"article\"")); |
|||
if (articleStart == -1) { |
|||
articleStart = html.indexOf("id=\"cont_article\""); |
|||
} |
|||
if (articleStart == -1) { |
|||
articleStart = html.indexOf("class=\"content\""); |
|||
} |
|||
|
|||
if (articleStart != -1) { |
|||
int articleEnd = html.indexOf("</div>", articleStart + 500); |
|||
if (articleEnd == -1) { |
|||
articleEnd = Math.min(articleStart + 5000, html.length()); |
|||
} |
|||
String articleSection = html.substring(articleStart, Math.min(articleStart + 5000, html.length())); |
|||
|
|||
StringBuilder contentBuilder = new StringBuilder(); |
|||
Pattern pTagPattern = Pattern.compile("<p[^>]*>([^<]{20,})</p>"); |
|||
Matcher matcher = pTagPattern.matcher(articleSection); |
|||
|
|||
int count = 0; |
|||
while (matcher.find() && count < 3) { |
|||
String paragraph = matcher.group(1).trim(); |
|||
if (!paragraph.contains("编辑") && !paragraph.contains("Copyright") && |
|||
!paragraph.contains("举报") && !paragraph.contains("来源:")) { |
|||
if (contentBuilder.length() > 0) { |
|||
contentBuilder.append(" "); |
|||
} |
|||
contentBuilder.append(paragraph); |
|||
count++; |
|||
} |
|||
} |
|||
|
|||
if (contentBuilder.length() > 0) { |
|||
content = contentBuilder.toString(); |
|||
if (content.length() > 150) { |
|||
content = content.substring(0, 150) + "..."; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} catch (Exception e) { |
|||
} |
|||
|
|||
if (content.isEmpty()) { |
|||
content = "(无法获取正文内容)"; |
|||
} |
|||
|
|||
return new String[]{publishTime, reporter, content}; |
|||
} |
|||
} |
|||
@ -0,0 +1,46 @@ |
|||
package com.danmaku.view; |
|||
|
|||
import java.util.Scanner; |
|||
|
|||
public class View { |
|||
private final Scanner scanner; |
|||
|
|||
public View() { |
|||
this.scanner = new Scanner(System.in); |
|||
} |
|||
|
|||
public void displayMessage(String message) { |
|||
System.out.println(message); |
|||
} |
|||
|
|||
public void displayError(String error) { |
|||
System.err.println("[错误] " + error); |
|||
} |
|||
|
|||
public void displayWelcome() { |
|||
System.out.println("╔════════════════════════════════════════╗"); |
|||
System.out.println("║ 弹幕爬虫系统 v2.0 ║"); |
|||
System.out.println("║ 支持多平台弹幕爬取 ║"); |
|||
System.out.println("╚════════════════════════════════════════╝"); |
|||
System.out.println(); |
|||
} |
|||
|
|||
public void displayPrompt() { |
|||
System.out.print("\n> "); |
|||
} |
|||
|
|||
public String getInput() { |
|||
return scanner.nextLine().trim(); |
|||
} |
|||
|
|||
public String getInput(String prompt) { |
|||
System.out.print(prompt); |
|||
return scanner.nextLine().trim(); |
|||
} |
|||
|
|||
public void close() { |
|||
if (scanner != null) { |
|||
scanner.close(); |
|||
} |
|||
} |
|||
} |
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue