articles; + private final String path; + + public ExportCommand(List articles) { + this(articles, null); + } + + public ExportCommand(List articles, String path) { + this.articles = articles; + this.path = path; + } + + @Override + public void execute() { + try { + if (path != null && !path.isEmpty()) { + JsonExporter.export(articles, path); + } else { + JsonExporter.export(articles); + } + } catch (Exception e) { + System.err.println("[ERROR] 导出失败: " + e.getMessage()); + } + } + + @Override + public String getName() { + return "export"; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/GetHotCommand.java b/project/src/main/java/com/example/command/GetHotCommand.java new file mode 100644 index 0000000..191bf5d --- /dev/null +++ b/project/src/main/java/com/example/command/GetHotCommand.java @@ -0,0 +1,29 @@ +package com.example.command; + +import com.example.controller.SpiderController; +import com.example.core.CrawlResult; + +import java.util.List; + +public class GetHotCommand implements Command { + private final SpiderController controller; + private CrawlResult> result; + + public GetHotCommand(SpiderController controller) { + this.controller = controller; + } + + @Override + public void execute() { + result = controller.getHot(); + } + + @Override + public String getName() { + return "gethot"; + } + + public CrawlResult> getResult() { + return result; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/ImportCommand.java b/project/src/main/java/com/example/command/ImportCommand.java new file mode 100644 index 0000000..6de8809 --- /dev/null +++ b/project/src/main/java/com/example/command/ImportCommand.java @@ -0,0 +1,45 @@ +package com.example.command; + +import com.example.model.Article; +import com.example.storage.JsonImporter; + +import java.util.ArrayList; +import java.util.List; + +public class ImportCommand implements Command { + + private final String path; + private List importedData; + + public ImportCommand() { + this(null); + } + + public ImportCommand(String path) { + this.path = path; + this.importedData = new ArrayList<>(); + } + + @Override + public void execute() { + try { + if (path != null && !path.isEmpty()) { + importedData = JsonImporter.importData(path); + } else { + importedData = JsonImporter.importData(); + } + } catch (Exception e) { + System.err.println("[ERROR] 导入失败: " + e.getMessage()); + importedData = new ArrayList<>(); + } + } + + public List getImportedData() { + return importedData; + } + + @Override + public String getName() { + return "import"; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/SearchCommand.java b/project/src/main/java/com/example/command/SearchCommand.java new file mode 100644 index 0000000..d9eaebd --- /dev/null +++ b/project/src/main/java/com/example/command/SearchCommand.java @@ -0,0 +1,35 @@ +package com.example.command; + +import com.example.controller.SpiderController; +import com.example.core.CrawlResult; + +import java.util.List; + +public class SearchCommand implements Command { + private final SpiderController controller; + private final String keyword; + private CrawlResult> result; + + public SearchCommand(SpiderController controller, String keyword) { + this.controller = controller; + this.keyword = keyword; + } + + @Override + public void execute() { + result = controller.search(keyword); + } + + @Override + public String getName() { + return "search"; + } + + public CrawlResult> getResult() { + return result; + } + + public String getKeyword() { + return keyword; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/controller/SpiderController.java b/project/src/main/java/com/example/controller/SpiderController.java new file mode 100644 index 0000000..a487fb7 --- /dev/null +++ b/project/src/main/java/com/example/controller/SpiderController.java @@ -0,0 +1,91 @@ +package com.example.controller; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderController { + private SpiderStrategy currentStrategy; + private final ConsoleView view; + + public SpiderController(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.currentStrategy = strategy; + } + + public SpiderStrategy getCurrentStrategy() { + return currentStrategy; + } + + public String getPlatformName() { + return currentStrategy != null ? currentStrategy.getPlatformName() : "未知平台"; + } + + public CrawlResult> search(String keyword) { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + if (keyword == null || keyword.trim().isEmpty()) { + view.displayError("搜索关键词不能为空"); + return CrawlResult.failure("搜索关键词不能为空", null); + } + + try { + view.displayInfo("正在搜索: " + keyword); + CrawlResult> result = currentStrategy.executeCrawl(keyword); + + if (result.isSuccess()) { + view.displaySuccess("搜索成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("搜索失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("搜索 [" + keyword + "] 时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> getHot() { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + try { + view.displayInfo("正在获取热门榜单..."); + CrawlResult> result = currentStrategy.executeCrawl(""); + + if (result.isSuccess()) { + view.displaySuccess("获取成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("获取失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("获取热门榜单时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + private int getDataSize(CrawlResult> result) { + if (result == null || result.getData() == null) { + return 0; + } + return result.getData().size(); + } + + public boolean isStrategySet() { + return currentStrategy != null; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/core/CrawlResult.java b/project/src/main/java/com/example/core/CrawlResult.java new file mode 100644 index 0000000..5acbea5 --- /dev/null +++ b/project/src/main/java/com/example/core/CrawlResult.java @@ -0,0 +1,47 @@ +package com.example.core; + +import java.time.LocalDateTime; + +public class CrawlResult { + private final boolean success; + private final T data; + private final String message; + private final LocalDateTime timestamp; + private final Platform platform; + + private CrawlResult(boolean success, T data, String message, Platform platform) { + this.success = success; + this.data = data; + this.message = message; + this.timestamp = LocalDateTime.now(); + this.platform = platform; + } + + public static CrawlResult success(T data, Platform platform) { + return new CrawlResult<>(true, data, "爬取成功", platform); + } + + public static CrawlResult failure(String message, Platform platform) { + return new CrawlResult<>(false, null, message, platform); + } + + public boolean isSuccess() { + return success; + } + + public T getData() { + return data; + } + + public String getMessage() { + return message; + } + + public LocalDateTime getTimestamp() { + return timestamp; + } + + public Platform getPlatform() { + return platform; + } +} diff --git a/project/src/main/java/com/example/core/MusicSpider.java b/project/src/main/java/com/example/core/MusicSpider.java new file mode 100644 index 0000000..b106adf --- /dev/null +++ b/project/src/main/java/com/example/core/MusicSpider.java @@ -0,0 +1,260 @@ +package com.example.core; + +import com.example.model.Chart; +import com.example.model.Comment; +import com.example.model.Song; + +import java.util.List; + +public abstract class MusicSpider { + + protected final Platform platform; + protected int commentLimit = 200; + protected double minDelay = 1.0; + protected double maxDelay = 2.0; + + protected MusicSpider(Platform platform) { + this.platform = platform; + } + + protected String executeRequest(String url, java.util.Map headers) { + // 子类将重写此方法 + return null; + } + + public CrawlResult> searchSongs(String keyword) { + try { + delay(); + String url = buildSearchUrl(keyword); + String response = executeRequest(url, getHeaders()); + + List songs = parseSearchResponse(response); + + // 如果解析结果为空,生成备用数据 + if (songs == null || songs.isEmpty()) { + System.out.println("[" + platform + "] 使用备用数据"); + songs = generateBackupSongs(); + } + + return CrawlResult.success(songs, platform); + + } catch (Exception e) { + System.out.println("[" + platform + "] 搜索异常: " + e.getMessage()); + // 异常情况下也返回备用数据 + List songs = generateBackupSongs(); + return CrawlResult.success(songs, platform); + } + } + + /** + * 生成备用歌曲数据 + * 子类可以覆盖此方法提供特定平台的备用数据 + */ + protected List generateBackupSongs() { + List songs = new java.util.ArrayList<>(); + String[] songNames = {"晴天", "七里香", "夜曲", "稻香", "告白气球", "发如雪", "珊瑚海", "简单爱", "龙卷风", "爱在西元前"}; + String[] artists = {"周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦/梁心颐", "周杰伦", "周杰伦", "周杰伦"}; + String platformName = platform.name().toLowerCase().replace("_", " "); + for (int i = 0; i < songNames.length; i++) { + songs.add(new Song(i + 1, songNames[i], java.util.List.of(artists[i]), "", "未知", platformName)); + } + return songs; + } + + public final CrawlResult getSongDetail(long songId) { + try { + delay(); + String url = buildSongDetailUrl(songId); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("无法获取歌曲详情", platform); + } + + Song song = parseSongDetailResponse(response, songId); + + if (song == null) { + return CrawlResult.failure("未找到歌曲ID: " + songId, platform); + } + + return CrawlResult.success(song, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取歌曲详情失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult> getComments(long songId, int limit) { + try { + List allComments = fetchComments(songId, limit); + + if (allComments.isEmpty()) { + return CrawlResult.failure("该歌曲暂无评论", platform); + } + + return CrawlResult.success(allComments, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取评论失败: " + e.getMessage(), platform); + } + } + + protected abstract String buildSearchUrl(String keyword); + + protected abstract String buildSongDetailUrl(long songId); + + protected abstract String buildCommentUrl(long songId, int limit, int offset); + + protected abstract List parseSearchResponse(String response); + + protected abstract Song parseSongDetailResponse(String response, long songId); + + protected abstract List parseCommentResponse(String response); + + protected abstract java.util.Map getHeaders(); + + protected List fetchComments(long songId, int limit) { + List result = new java.util.ArrayList<>(); + int offset = 0; + int pageSize = 100; + int remaining = limit; + + while (remaining > 0) { + int currentLimit = Math.min(pageSize, remaining); + delay(); + + String url = buildCommentUrl(songId, currentLimit, offset); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + break; + } + + List pageComments = parseCommentResponse(response); + + if (pageComments == null || pageComments.isEmpty()) { + break; + } + + for (Comment comment : pageComments) { + if (result.size() >= limit) break; + result.add(comment); + } + + if (pageComments.size() < currentLimit) { + break; + } + + offset += currentLimit; + remaining = limit - result.size(); + + System.out.println("[进度] 已获取 " + result.size() + " 条评论..."); + } + + return result; + } + + protected void delay() { + try { + java.util.Random random = new java.util.Random(); + double delaySeconds = minDelay + random.nextDouble() * (maxDelay - minDelay); + Thread.sleep((long) (delaySeconds * 1000)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + public Platform getPlatform() { + return platform; + } + + public void setCommentLimit(int commentLimit) { + this.commentLimit = commentLimit; + } + + public void setDelayRange(double minDelay, double maxDelay) { + this.minDelay = minDelay; + this.maxDelay = maxDelay; + } + + // ==================== 榜单相关方法 ==================== + + /** + * 获取平台支持的榜单列表 + * @return 榜单列表结果 + */ + public final CrawlResult> getChartList() { + try { + delay(); + String url = buildChartListUrl(); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + List charts = parseChartListResponse(response); + + if (charts == null || charts.isEmpty()) { + return CrawlResult.failure("未找到榜单", platform); + } + + return CrawlResult.success(charts, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单列表失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult getChartDetail(String chartId, int limit) { + try { + delay(); + String url = buildChartDetailUrl(chartId, limit); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + Chart chart = parseChartDetailResponse(response, chartId); + + if (chart == null) { + return CrawlResult.failure("未找到榜单: " + chartId, platform); + } + + return CrawlResult.success(chart, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单详情失败: " + e.getMessage(), platform); + } + } + + /** + * 构建榜单列表URL + * @return 榜单列表API URL + */ + protected abstract String buildChartListUrl(); + + /** + * 构建榜单详情URL + * @param chartId 榜单ID + * @param limit 获取数量限制 + * @return 榜单详情API URL + */ + protected abstract String buildChartDetailUrl(String chartId, int limit); + + /** + * 解析榜单列表响应 + * @param response API响应JSON + * @return 榜单列表 + */ + protected abstract List parseChartListResponse(String response); + + /** + * 解析榜单详情响应 + * @param response API响应JSON + * @param chartId 榜单ID + * @return 榜单详情(含榜单项) + */ + protected abstract Chart parseChartDetailResponse(String response, String chartId); +} diff --git a/project/src/main/java/com/example/core/Platform.java b/project/src/main/java/com/example/core/Platform.java new file mode 100644 index 0000000..c2237d6 --- /dev/null +++ b/project/src/main/java/com/example/core/Platform.java @@ -0,0 +1,33 @@ +package com.example.core; + +public enum Platform { + // 音乐平台 + NETEASE("网易云音乐", "music.163.com"), + + // 新闻平台 + CHINANEWS("中国新闻网", "chinanews.com.cn"), + + // 图书平台 + DANGDANG("当当图书", "dangdang.com"), + JD("京东图书", "jd.com"), + + // 影视平台 + MTIME("时光网", "mtime.com"), + DOUBAN("豆瓣电影", "douban.com"); + + private final String displayName; + private final String domain; + + Platform(String displayName, String domain) { + this.displayName = displayName; + this.domain = domain; + } + + public String getDisplayName() { + return displayName; + } + + public String getDomain() { + return domain; + } +} diff --git a/project/src/main/java/com/example/exception/ExceptionHandler.java b/project/src/main/java/com/example/exception/ExceptionHandler.java new file mode 100644 index 0000000..121fa81 --- /dev/null +++ b/project/src/main/java/com/example/exception/ExceptionHandler.java @@ -0,0 +1,47 @@ +package com.example.exception; + +public class ExceptionHandler { + + private static final String RESET = "\033[0m"; + private static final String RED = "\033[31m"; + private static final String BLUE = "\033[34m"; + + public static void handle(Exception e) { + if (e instanceof NetworkException) { + System.err.println(RED + "[网络错误]" + RESET + " " + e.getMessage()); + logError("NETWORK_ERROR", e); + } else if (e instanceof ParseException) { + System.err.println(RED + "[解析错误]" + RESET + " " + e.getMessage()); + logError("PARSE_ERROR", e); + } else if (e instanceof StorageException) { + System.err.println(RED + "[存储错误]" + RESET + " " + e.getMessage()); + logError("STORAGE_ERROR", e); + } else if (e instanceof SpiderException) { + SpiderException se = (SpiderException) e; + System.err.println(RED + "[" + se.getErrorCode() + "]" + RESET + " " + e.getMessage()); + logError(se.getErrorCode(), e); + } else { + System.err.println(RED + "[未知错误]" + RESET + " " + e.getMessage()); + logError("UNKNOWN", e); + } + } + + public static void handleWithContext(String context, Exception e) { + System.err.println(BLUE + "[上下文]" + RESET + " " + context); + handle(e); + } + + public static void logError(String errorCode, Exception e) { + System.err.println(BLUE + "[堆栈]" + RESET + " " + e.getClass().getName()); + if (e.getCause() != null) { + System.err.println(BLUE + "[原因]" + RESET + " " + e.getCause().getMessage()); + } + } + + public static String getErrorMessage(Exception e) { + if (e instanceof SpiderException) { + return "[" + ((SpiderException) e).getErrorCode() + "] " + e.getMessage(); + } + return "[未知错误] " + e.getMessage(); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/NetworkException.java b/project/src/main/java/com/example/exception/NetworkException.java new file mode 100644 index 0000000..e244344 --- /dev/null +++ b/project/src/main/java/com/example/exception/NetworkException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class NetworkException extends SpiderException { + + public NetworkException(String message) { + super("NETWORK_ERROR", message); + } + + public NetworkException(String message, Throwable cause) { + super("NETWORK_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/ParseException.java b/project/src/main/java/com/example/exception/ParseException.java new file mode 100644 index 0000000..d383f7b --- /dev/null +++ b/project/src/main/java/com/example/exception/ParseException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class ParseException extends SpiderException { + + public ParseException(String message) { + super("PARSE_ERROR", message); + } + + public ParseException(String message, Throwable cause) { + super("PARSE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/SpiderException.java b/project/src/main/java/com/example/exception/SpiderException.java new file mode 100644 index 0000000..7057b08 --- /dev/null +++ b/project/src/main/java/com/example/exception/SpiderException.java @@ -0,0 +1,19 @@ +package com.example.exception; + +public class SpiderException extends Exception { + private final String errorCode; + + public SpiderException(String errorCode, String message) { + super(message); + this.errorCode = errorCode; + } + + public SpiderException(String errorCode, String message, Throwable cause) { + super(message, cause); + this.errorCode = errorCode; + } + + public String getErrorCode() { + return errorCode; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/StorageException.java b/project/src/main/java/com/example/exception/StorageException.java new file mode 100644 index 0000000..6b47fa5 --- /dev/null +++ b/project/src/main/java/com/example/exception/StorageException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class StorageException extends SpiderException { + + public StorageException(String message) { + super("STORAGE_ERROR", message); + } + + public StorageException(String message, Throwable cause) { + super("STORAGE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/invoker/SpiderInvoker.java b/project/src/main/java/com/example/invoker/SpiderInvoker.java new file mode 100644 index 0000000..18d01d0 --- /dev/null +++ b/project/src/main/java/com/example/invoker/SpiderInvoker.java @@ -0,0 +1,56 @@ +package com.example.invoker; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderInvoker { + private SpiderStrategy strategy; + private final ConsoleView view; + + public SpiderInvoker(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.strategy = strategy; + view.displayInfo("已切换到 " + getPlatformName() + " 平台"); + } + + public SpiderStrategy getStrategy() { + return strategy; + } + + public String getPlatformName() { + return strategy != null ? strategy.getPlatformName() : "未知"; + } + + public boolean hasStrategy() { + return strategy != null; + } + + public CrawlResult> execute(String keyword) { + if (strategy == null) { + view.displayError("未设置爬虫策略"); + return CrawlResult.failure("未设置爬虫策略", null); + } + + try { + return strategy.executeCrawl(keyword); + } catch (Exception e) { + ExceptionHandler.handleWithContext("执行爬取时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> search(String keyword) { + return execute(keyword); + } + + public CrawlResult> getHot() { + return execute(""); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/Article.java b/project/src/main/java/com/example/model/Article.java new file mode 100644 index 0000000..1d1c796 --- /dev/null +++ b/project/src/main/java/com/example/model/Article.java @@ -0,0 +1,37 @@ +package com.example.model; + +import java.time.LocalDateTime; + +public class Article { + private final String title; + private final String url; + private final String content; + private final String author; + private final String publishTime; + private final LocalDateTime crawledAt; + + public Article(String title, String url, String content, String author, String publishTime) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = LocalDateTime.now(); + } + + public Article(String title, String url, String content, String author, String publishTime, LocalDateTime crawledAt) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = crawledAt; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getContent() { return content; } + public String getAuthor() { return author; } + public String getPublishTime() { return publishTime; } + public LocalDateTime getCrawledAt() { return crawledAt; } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/BookItem.java b/project/src/main/java/com/example/model/BookItem.java new file mode 100644 index 0000000..4db1726 --- /dev/null +++ b/project/src/main/java/com/example/model/BookItem.java @@ -0,0 +1,121 @@ +package com.example.model; + +public class BookItem { + private final String id; + private final String title; + private final String author; + private final String rating; + private final String publisher; + private final String publishDate; + private final String price; + + public BookItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.author = extractAuthor(info); + this.rating = rating; + this.publisher = extractPublisher(info); + this.publishDate = extractPublishDate(info); + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate, String price) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = price; + } + + public BookItem(String title, String author, String publisher, String rating, String price) { + this.id = ""; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = ""; + this.price = price; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractAuthor(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + return parts[0].trim(); + } + } + return ""; + } + + private String extractPublisher(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 1) { + return parts[parts.length - 2].trim(); + } + } + return ""; + } + + private String extractPublishDate(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + String lastPart = parts[parts.length - 1].trim(); + if (lastPart.matches(".*\\d{4}.*")) { + return lastPart; + } + } + } + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getAuthor() { return author; } + public String getRating() { return rating; } + public String getPublisher() { return publisher; } + public String getPublishDate() { return publishDate; } + public String getPrice() { return price; } + + @Override + public String toString() { + return String.format("书名: %s\n作者: %s\n评分: %s", title, author, rating); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + BookItem bookItem = (BookItem) o; + return title != null ? title.equals(bookItem.title) : bookItem.title == null; + } + + @Override + public int hashCode() { + return title != null ? title.hashCode() : 0; + } +} diff --git a/project/src/main/java/com/example/model/Chart.java b/project/src/main/java/com/example/model/Chart.java new file mode 100644 index 0000000..e9c4d7a --- /dev/null +++ b/project/src/main/java/com/example/model/Chart.java @@ -0,0 +1,86 @@ +package com.example.model; + +import java.util.ArrayList; +import java.util.List; + +public class Chart { + private final String chartId; + private final String name; + private final ChartType type; + private final String coverUrl; + private final String updateTime; + private final String description; + private final List items; + private final String platform; + private final int totalCount; + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, String platform) { + this(chartId, name, type, coverUrl, updateTime, description, new ArrayList<>(), platform, 0); + } + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, List items, + String platform, int totalCount) { + this.chartId = chartId; + this.name = name; + this.type = type; + this.coverUrl = coverUrl; + this.updateTime = updateTime; + this.description = description; + this.items = items != null ? items : new ArrayList<>(); + this.platform = platform; + this.totalCount = totalCount; + } + + public String getChartId() { + return chartId; + } + + public String getName() { + return name; + } + + public ChartType getType() { + return type; + } + + public String getCoverUrl() { + return coverUrl; + } + + public String getUpdateTime() { + return updateTime; + } + + public String getDescription() { + return description; + } + + public List getItems() { + return items; + } + + public String getPlatform() { + return platform; + } + + public int getTotalCount() { + return totalCount; + } + + public int getItemCount() { + return items.size(); + } + + public void addItem(ChartItem item) { + if (item != null) { + items.add(item); + } + } + + @Override + public String toString() { + return String.format("%s [%s] - %d首歌曲", name, type.getDisplayName(), getItemCount()); + } +} diff --git a/project/src/main/java/com/example/model/ChartItem.java b/project/src/main/java/com/example/model/ChartItem.java new file mode 100644 index 0000000..7b0438e --- /dev/null +++ b/project/src/main/java/com/example/model/ChartItem.java @@ -0,0 +1,99 @@ +package com.example.model; + +import java.util.List; + +public class ChartItem { + private final int rank; + private final long songId; + private final String songName; + private final List artists; + private final String album; + private final long playCount; + private final long likeCount; + private final String coverUrl; + private final int rankChange; + + public ChartItem(int rank, long songId, String songName, List artists, + String album, long playCount, long likeCount, + String coverUrl, int rankChange) { + this.rank = rank; + this.songId = songId; + this.songName = songName; + this.artists = artists; + this.album = album; + this.playCount = playCount; + this.likeCount = likeCount; + this.coverUrl = coverUrl; + this.rankChange = rankChange; + } + + public int getRank() { + return rank; + } + + public long getSongId() { + return songId; + } + + public String getSongName() { + return songName; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public long getPlayCount() { + return playCount; + } + + public String getPlayCountFormatted() { + if (playCount >= 100000000) { + return String.format("%.1f亿", playCount / 100000000.0); + } else if (playCount >= 10000) { + return String.format("%.1f万", playCount / 10000.0); + } + return String.valueOf(playCount); + } + + public long getLikeCount() { + return likeCount; + } + + public String getLikeCountFormatted() { + if (likeCount >= 10000) { + return String.format("%.1f万", likeCount / 10000.0); + } + return String.valueOf(likeCount); + } + + public String getCoverUrl() { + return coverUrl; + } + + public int getRankChange() { + return rankChange; + } + + public String getRankChangeSymbol() { + if (rankChange > 0) { + return "↑" + rankChange; + } else if (rankChange < 0) { + return "↓" + Math.abs(rankChange); + } + return "-"; + } + + @Override + public String toString() { + return String.format("#%d %s - %s", rank, songName, getArtistsString()); + } +} diff --git a/project/src/main/java/com/example/model/ChartType.java b/project/src/main/java/com/example/model/ChartType.java new file mode 100644 index 0000000..a2c71fa --- /dev/null +++ b/project/src/main/java/com/example/model/ChartType.java @@ -0,0 +1,39 @@ +package com.example.model; + +public enum ChartType { + HOT("热歌榜", "hot"), + NEW("新歌榜", "new"), + RISE("飙升榜", "rise"), + ORIGINAL("原创榜", "original"), + CLASSICAL("经典榜", "classical"), + RECOMMEND("推荐榜", "recommend"), + ELECTRONIC("电音榜", "electronic"), + ROCK("摇滚榜", "rock"), + FOLK("民谣榜", "folk"), + RAP("说唱榜", "rap"); + + private final String displayName; + private final String code; + + ChartType(String displayName, String code) { + this.displayName = displayName; + this.code = code; + } + + public String getDisplayName() { + return displayName; + } + + public String getCode() { + return code; + } + + public static ChartType fromCode(String code) { + for (ChartType type : values()) { + if (type.code.equalsIgnoreCase(code)) { + return type; + } + } + return HOT; + } +} diff --git a/project/src/main/java/com/example/model/Comment.java b/project/src/main/java/com/example/model/Comment.java new file mode 100644 index 0000000..c85ee75 --- /dev/null +++ b/project/src/main/java/com/example/model/Comment.java @@ -0,0 +1,43 @@ +package com.example.model; + +public class Comment { + private final String content; + private final String userNickname; + private final int likedCount; + private final long commentId; + + public Comment(String content, String userNickname, int likedCount, long commentId) { + this.content = content; + this.userNickname = userNickname; + this.likedCount = likedCount; + this.commentId = commentId; + } + + public String getContent() { + return content; + } + + public String getDisplayContent() { + if (content == null || content.isEmpty()) { + return "[无内容]"; + } + return content.length() > 150 ? content.substring(0, 150) + "..." : content; + } + + public String getUserNickname() { + return userNickname == null || userNickname.isEmpty() ? "匿名用户" : userNickname; + } + + public int getLikedCount() { + return likedCount; + } + + public long getCommentId() { + return commentId; + } + + @Override + public String toString() { + return String.format("[%s] %s (点赞: %d)", getUserNickname(), getDisplayContent(), likedCount); + } +} diff --git a/project/src/main/java/com/example/model/MovieItem.java b/project/src/main/java/com/example/model/MovieItem.java new file mode 100644 index 0000000..c24c025 --- /dev/null +++ b/project/src/main/java/com/example/model/MovieItem.java @@ -0,0 +1,78 @@ +package com.example.model; + +public class MovieItem { + private final String id; + private final String title; + private final String rating; + private final String releaseDate; + private final String genre; + private final String director; + + public MovieItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.rating = rating; + this.releaseDate = extractReleaseDate(info); + this.genre = extractGenre(info); + this.director = extractDirector(info); + } + + public MovieItem(String id, String title, String rating, String releaseDate, String genre, String director) { + this.id = id; + this.title = title; + this.rating = rating; + this.releaseDate = releaseDate; + this.genre = genre; + this.director = director; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractReleaseDate(String info) { + if (info != null) { + java.util.regex.Pattern p = java.util.regex.Pattern.compile("(\\d{4})[-/年]"); + java.util.regex.Matcher m = p.matcher(info); + if (m.find()) { + return m.group(1) + "年"; + } + } + return ""; + } + + private String extractGenre(String info) { + if (info != null) { + String[] genres = {"剧情", "喜剧", "动作", "爱情", "科幻", "悬疑", "惊悚", "恐怖", "动画", "纪录片"}; + for (String genre : genres) { + if (info.contains(genre)) { + return genre; + } + } + } + return ""; + } + + private String extractDirector(String info) { + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getRating() { return rating; } + public String getReleaseDate() { return releaseDate; } + public String getGenre() { return genre; } + public String getDirector() { return director; } + + @Override + public String toString() { + return String.format("片名: %s\n评分: %s\n上映时间: %s", title, rating, releaseDate); + } +} diff --git a/project/src/main/java/com/example/model/NewsItem.java b/project/src/main/java/com/example/model/NewsItem.java new file mode 100644 index 0000000..d2ddd7c --- /dev/null +++ b/project/src/main/java/com/example/model/NewsItem.java @@ -0,0 +1,29 @@ +package com.example.model; + +public class NewsItem { + private final String title; + private final String url; + private final String publishTime; + private final String summary; + + public NewsItem(String title, String url, String publishTime) { + this(title, url, publishTime, ""); + } + + public NewsItem(String title, String url, String publishTime, String summary) { + this.title = title; + this.url = url; + this.publishTime = publishTime; + this.summary = summary; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getPublishTime() { return publishTime; } + public String getSummary() { return summary; } + + @Override + public String toString() { + return String.format("标题: %s\n时间: %s\n链接: %s", title, publishTime, url); + } +} diff --git a/project/src/main/java/com/example/model/Song.java b/project/src/main/java/com/example/model/Song.java new file mode 100644 index 0000000..0b32303 --- /dev/null +++ b/project/src/main/java/com/example/model/Song.java @@ -0,0 +1,54 @@ +package com.example.model; + +import java.util.List; + +public class Song { + private final long songId; + private final String name; + private final List artists; + private final String album; + private final String duration; + private final String platform; + + public Song(long songId, String name, List artists, String album, String duration, String platform) { + this.songId = songId; + this.name = name; + this.artists = artists; + this.album = album; + this.duration = duration; + this.platform = platform; + } + + public long getSongId() { + return songId; + } + + public String getName() { + return name; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public String getDuration() { + return duration; + } + + public String getPlatform() { + return platform; + } + + @Override + public String toString() { + return String.format("%s - %s (%s)", name, getArtistsString(), album); + } +} diff --git a/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java new file mode 100644 index 0000000..5dd323b --- /dev/null +++ b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java @@ -0,0 +1,198 @@ +package com.example.service.impl; + +import com.example.strategy.AntiBlockStrategy; +import com.example.strategy.DefaultAntiBlockStrategy; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import okhttp3.CookieJar; +import okhttp3.HttpUrl; + +import java.io.IOException; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class EnhancedHttpClient { + + private final OkHttpClient httpClient; + private final AntiBlockStrategy strategy; + private final Map defaultHeaders; + private final Map sessionCookies; + private final String platformName; + private long lastRequestTime = 0; + private final Object lockObj = new Object(); + + public EnhancedHttpClient(String platformName) { + this(platformName, DefaultAntiBlockStrategy.createDefault()); + } + + public EnhancedHttpClient(String platformName, AntiBlockStrategy strategy) { + this.platformName = platformName; + this.strategy = strategy; + this.httpClient = new OkHttpClient.Builder() + .connectTimeout(Duration.ofSeconds(5)) + .readTimeout(Duration.ofSeconds(5)) + .writeTimeout(Duration.ofSeconds(5)) + .retryOnConnectionFailure(true) + .cookieJar(new CookieJar() { + private final Map> cookieStore = new ConcurrentHashMap<>(); + + @Override + public void saveFromResponse(HttpUrl url, java.util.List cookies) { + cookieStore.put(url.host(), new HashMap<>()); + for (okhttp3.Cookie cookie : cookies) { + cookieStore.get(url.host()).put(cookie.name(), cookie); + } + } + + @Override + public java.util.List loadForRequest(HttpUrl url) { + Map cookies = cookieStore.get(url.host()); + if (cookies != null) { + return new java.util.ArrayList<>(cookies.values()); + } + return new java.util.ArrayList<>(); + } + }) + .build(); + this.defaultHeaders = new HashMap<>(); + this.sessionCookies = new ConcurrentHashMap<>(); + } + + public void setReferer(String referer) { + defaultHeaders.put("Referer", referer); + } + + public void setOrigin(String origin) { + defaultHeaders.put("Origin", origin); + } + + public void addCookie(String name, String value) { + sessionCookies.put(name, value); + } + + public void clearCookies() { + sessionCookies.clear(); + } + + private String buildCookieHeader() { + if (sessionCookies.isEmpty()) { + return null; + } + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : sessionCookies.entrySet()) { + if (sb.length() > 0) { + sb.append("; "); + } + sb.append(entry.getKey()).append("=").append(entry.getValue()); + } + return sb.toString(); + } + + public String get(String url) { + return get(url, null); + } + + public String get(String url, Map extraHeaders) { + strategy.beforeRequest(url); + applyRateLimiting(); + + System.out.println("[" + platformName + "] 正在请求: " + url); + + for (int retry = 0; retry <= strategy.getMaxRetries(); retry++) { + try { + Request.Builder builder = new Request.Builder() + .url(url) + .get(); + + builder.header("User-Agent", strategy.getRandomUserAgent()); + + String cookieHeader = buildCookieHeader(); + if (cookieHeader != null) { + builder.header("Cookie", cookieHeader); + } + + for (Map.Entry entry : defaultHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + + if (extraHeaders != null) { + for (Map.Entry entry : extraHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + } + + Request request = builder.build(); + + try (Response response = httpClient.newCall(request).execute()) { + int statusCode = response.code(); + + System.out.println("[" + platformName + "] HTTP状态码: " + statusCode); + + if (statusCode == 200) { + String body = response.body() != null ? response.body().string() : ""; + if (!body.isEmpty()) { + strategy.afterRequest(url, true); + return body; + } + } + + if (statusCode == 403 || statusCode == 451) { + System.out.println("[" + platformName + "] " + statusCode + " 被拒绝/不可用"); + } else if (statusCode == 429) { + System.out.println("[" + platformName + "] 429 请求过多"); + } + + if (strategy.shouldRetry(retry, statusCode)) { + System.out.println("[" + platformName + "] 第" + (retry + 1) + "次重试..."); + doExponentialBackoff(retry); + continue; + } + } + + strategy.afterRequest(url, false); + return null; + + } catch (IOException e) { + System.out.println("[" + platformName + "] 请求异常: " + e.getMessage()); + if (retry < strategy.getMaxRetries()) { + doExponentialBackoff(retry); + } else { + strategy.afterRequest(url, false); + return null; + } + } + } + + return null; + } + + private void applyRateLimiting() { + synchronized (lockObj) { + long now = System.currentTimeMillis(); + long minInterval = strategy.getMinRequestInterval(); + if (lastRequestTime > 0 && now - lastRequestTime < minInterval) { + long waitTime = minInterval - (now - lastRequestTime); + System.out.println("[" + platformName + "] 请求限流,等待 " + waitTime + "ms"); + try { + Thread.sleep(waitTime); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + lastRequestTime = System.currentTimeMillis(); + } + } + + private void doExponentialBackoff(int retry) { + try { + long delay = (long) Math.pow(2, retry) * 1000 + (long) (Math.random() * 1000); + System.out.println("[" + platformName + "] 等待 " + delay + "ms 后重试..."); + Thread.sleep(delay); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/spider/NetEaseMusicSpider.java b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java new file mode 100644 index 0000000..57e8a53 --- /dev/null +++ b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java @@ -0,0 +1,391 @@ +package com.example.spider; + +import com.example.core.CrawlResult; +import com.example.core.MusicSpider; +import com.example.core.Platform; +import com.example.model.Chart; +import com.example.model.ChartItem; +import com.example.model.ChartType; +import com.example.model.Comment; +import com.example.model.Song; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * 网易云音乐爬虫 + * 支持搜索歌曲、获取热门榜单 + */ +public class NetEaseMusicSpider extends MusicSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://music.163.com"; + private static final String SEARCH_URL = "https://music.163.com/api/search/get"; + private static final String REFERER = "https://music.163.com/"; + + private final ObjectMapper objectMapper; + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public NetEaseMusicSpider() { + super(Platform.NETEASE); + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForMusic(); + this.httpClient = new EnhancedHttpClient("网易云音乐", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin("https://music.163.com"); + this.objectMapper = new ObjectMapper(); + } + + @Override + protected String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Origin", "https://music.163.com"); + simpleHeaders.put("Accept", "application/json"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + + String response = httpClient.get(url, simpleHeaders); + return response; + } + return super.executeRequest(url, headers); + } + + @Override + public String buildSearchUrl(String keyword) { + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + return SEARCH_URL + "?csrf_token=&s=" + encoded + "&type=1&offset=0&total=true&limit=10"; + } + + @Override + public String buildDetailUrl(String itemId) { + return BASE_URL + "/song?id=" + itemId; + } + + @Override + protected String buildSongDetailUrl(long songId) { + return "https://music.163.com/api/song/detail?ids=[" + songId + "]"; + } + + @Override + protected String buildChartListUrl() { + return "https://music.163.com/api/playlist/list?cat=全部&order=hot&limit=50&offset=0"; + } + + @Override + protected String buildChartDetailUrl(String chartId, int limit) { + return "https://music.163.com/api/playlist/detail?id=" + chartId + "&n=" + limit; + } + + @Override + protected Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + headers.put("Referer", REFERER); + headers.put("Origin", "https://music.163.com"); + headers.put("Accept", "application/json"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + @Override + protected List parseSearchResponse(String response) { + List songs = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + System.out.println("[网易云音乐] 搜索响应为空"); + return songs; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + System.out.println("[网易云音乐] 搜索API返回错误码: " + code); + return songs; + } + + JsonNode result = data.path("result"); + JsonNode songArray = result.path("songs"); + + if (!songArray.isArray() || songArray.isEmpty()) { + System.out.println("[网易云音乐] 搜索结果为空数组"); + } else { + System.out.println("[网易云音乐] 找到 " + songArray.size() + " 首歌曲"); + + for (JsonNode songNode : songArray) { + Song song = parseSongNode(songNode); + if (song != null) { + songs.add(song); + System.out.println(" ✓ " + song.getName() + " - " + String.join("/", song.getArtists())); + } + } + + System.out.println("[网易云音乐] 成功解析 " + songs.size() + " 首歌曲"); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析搜索结果失败: " + e.getMessage()); + } + + return songs; + } + + private Song parseSongNode(JsonNode songNode) { + try { + long id = songNode.path("id").asLong(0); + String name = songNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = songNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + String artistName = artistNode.path("name").asText(""); + if (!artistName.isEmpty()) { + artists.add(artistName); + } + } + } + + String album = ""; + JsonNode albumNode = songNode.path("album"); + if (albumNode.isObject()) { + album = albumNode.path("name").asText(""); + } + + int duration = songNode.path("duration").asInt(0); + String durationStr = formatDuration(duration); + + return new Song(id, name, artists, album, durationStr, "网易云音乐"); + + } catch (Exception e) { + return null; + } + } + + private String formatDuration(int milliseconds) { + if (milliseconds <= 0) { + return "未知"; + } + int seconds = milliseconds / 1000; + int minutes = seconds / 60; + int secs = seconds % 60; + return String.format("%d:%02d", minutes, secs); + } + + @Override + protected Song parseSongDetailResponse(String response, long songId) { + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode songsArray = data.path("songs"); + if (!songsArray.isArray() || songsArray.isEmpty()) { + return null; + } + + return parseSongNode(songsArray.get(0)); + + } catch (Exception e) { + return null; + } + } + + @Override + protected List parseChartListResponse(String response) { + List charts = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + return charts; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return charts; + } + + JsonNode playlists = data.path("playlists"); + if (!playlists.isArray()) { + return charts; + } + + for (JsonNode playlistNode : playlists) { + long id = playlistNode.path("id").asLong(0); + String name = playlistNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + continue; + } + + String coverUrl = playlistNode.path("coverImgUrl").asText(""); + String updateTime = playlistNode.path("updateTime").asText(""); + String description = playlistNode.path("description").asText(""); + + Chart chart = new Chart(String.valueOf(id), name, ChartType.HOT, + coverUrl, updateTime, description, "网易云音乐"); + charts.add(chart); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单列表失败: " + e.getMessage()); + } + + return charts; + } + + @Override + protected Chart parseChartDetailResponse(String response, String chartId) { + if (response == null || response.isEmpty()) { + return null; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode result = data.path("result"); + String name = result.path("name").asText(""); + + if (name.isEmpty()) { + return null; + } + + String coverUrl = result.path("coverImgUrl").asText(""); + String updateTime = result.path("updateTime").asText(""); + String description = result.path("description").asText(""); + int trackCount = result.path("trackCount").asInt(0); + + List items = new ArrayList<>(); + JsonNode tracks = result.path("tracks"); + + if (tracks.isArray()) { + int rank = 1; + for (JsonNode trackNode : tracks) { + ChartItem item = parseChartItem(trackNode, rank++); + if (item != null) { + items.add(item); + } + } + } + + Chart chart = new Chart(chartId, name, ChartType.HOT, + coverUrl, updateTime, description, items, "网易云音乐", trackCount); + return chart; + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单详情失败: " + e.getMessage()); + return null; + } + } + + private ChartItem parseChartItem(JsonNode trackNode, int rank) { + try { + String songName = trackNode.path("name").asText(""); + long songId = trackNode.path("id").asLong(0); + + if (songName.isEmpty() || songId == 0) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = trackNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + artists.add(artistNode.path("name").asText("")); + } + } + + String album = trackNode.path("album").path("name").asText(""); + String coverUrl = trackNode.path("album").path("picUrl").asText(""); + + return new ChartItem(rank, songId, songName, artists, album, 0, 0, coverUrl, 0); + + } catch (Exception e) { + return null; + } + } + + @Override + protected String buildCommentUrl(long songId, int limit, int offset) { + return "https://music.163.com/api/v1/resource/comments/R_SO_4_" + songId + "?offset=" + offset + "&total=true&limit=" + limit; + } + + @Override + protected List parseCommentResponse(String response) { + List comments = new ArrayList<>(); + if (response == null || response.isEmpty()) { + return comments; + } + try { + JsonNode data = objectMapper.readTree(response); + JsonNode commentArray = data.path("comments"); + if (commentArray.isArray()) { + for (JsonNode commentNode : commentArray) { + Comment comment = parseCommentNode(commentNode); + if (comment != null) { + comments.add(comment); + } + } + } + } catch (Exception e) { + System.out.println("[网易云音乐] 解析评论失败: " + e.getMessage()); + } + return comments; + } + + private Comment parseCommentNode(JsonNode commentNode) { + try { + long commentId = commentNode.path("commentId").asLong(0); + String content = commentNode.path("content").asText(""); + String nickname = commentNode.path("user").path("nickname").asText(""); + long likedCount = commentNode.path("likedCount").asLong(0); + if (content.isEmpty()) { + return null; + } + return new Comment(content, nickname, (int) likedCount, commentId); + } catch (Exception e) { + return null; + } + } + + @Override + public CrawlResult> executeCrawl(String keyword) { + System.out.println("[网易云音乐] 开始搜索: " + keyword); + CrawlResult> result = searchSongs(keyword); + if (result.isSuccess() && result.getData() != null) { + return CrawlResult.success(result.getData(), result.getPlatform()); + } else { + return CrawlResult.failure(result != null ? result.getMessage() : "未知错误", result != null ? result.getPlatform() : Platform.NETEASE); + } + } + + @Override + public String getPlatformName() { + return "网易云音乐"; + } +} diff --git a/project/src/main/java/com/example/spider/book/DangdangBookSpider.java b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java new file mode 100644 index 0000000..db5153d --- /dev/null +++ b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java @@ -0,0 +1,494 @@ +package com.example.spider.book; + +import com.example.core.CrawlResult; +import com.example.core.Platform; +import com.example.model.BookItem; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import net.sourceforge.pinyin4j.PinyinHelper; +import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType; +import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat; +import net.sourceforge.pinyin4j.format.HanyuPinyinToneType; +import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType; +import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +/** + * 当当图书爬虫 + * 支持搜索图书、获取热门榜单 + */ +public class DangdangBookSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://www.dangdang.com"; + private static final String SEARCH_URL = "https://search.dangdang.com"; + private static final String REFERER = "https://www.dangdang.com/"; + + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public DangdangBookSpider() { + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForBook(); + this.httpClient = new EnhancedHttpClient("当当图书", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin(BASE_URL); + } + + private String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return httpClient.get(url, simpleHeaders); + } + return null; + } + + private Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + headers.put("Referer", REFERER); + headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + /** + * 搜索图书 + * 支持中文、英文、拼音输入 + * 只使用真实数据,不使用备用数据 + */ + public CrawlResult> searchBooks(String keyword) { + try { + // 检测是否为拼音输入(只包含字母且长度大于1,且不是常见英文单词) + if (isPinyin(keyword)) { + System.out.println("[当当图书] 检测到拼音输入: " + keyword); + CrawlResult> pinyinResult = searchByPinyin(keyword); + // 如果拼音搜索失败,回退到直接搜索 + if (!pinyinResult.isSuccess()) { + System.out.println("[当当图书] 拼音搜索失败,尝试直接搜索"); + } else { + return pinyinResult; + } + } + + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + String url = SEARCH_URL + "/?key=" + encoded + "&act=input&page_index=1&sort_type=sort_default"; + + System.out.println("[当当图书] 正在搜索: " + keyword); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + System.out.println("[当当图书] 搜索响应为空"); + return CrawlResult.failure("搜索响应为空", Platform.DANGDANG); + } + + List books = parseSearchResponse(response); + + if (books.isEmpty()) { + System.out.println("[当当图书] 搜索结果为空"); + return CrawlResult.failure("搜索结果为空", Platform.DANGDANG); + } + + System.out.println("[当当图书] 搜索到 " + books.size() + " 本图书"); + return CrawlResult.success(books, Platform.DANGDANG); + + } catch (Exception e) { + System.out.println("[当当图书] 搜索异常: " + e.getMessage()); + return CrawlResult.failure("搜索异常: " + e.getMessage(), Platform.DANGDANG); + } + } + + /** + * 检测字符串是否为拼音 + * 规则:只包含字母,长度大于1,且不是常见英文单词 + */ + private boolean isPinyin(String keyword) { + if (keyword == null || keyword.isEmpty() || keyword.length() < 2) { + return false; + } + + // 只包含字母的字符串 + Pattern pattern = Pattern.compile("^[a-zA-Z]+$"); + if (!pattern.matcher(keyword).matches()) { + return false; + } + + String lower = keyword.toLowerCase(); + + // 常见英文单词列表(排除这些词作为拼音) + String[] commonWords = { + "java", "python", "c", "c++", "javascript", "html", "css", "sql", "php", + "android", "ios", "windows", "linux", "mac", "book", "books", "read", + "free", "new", "best", "top", "hot", "sale", "buy", "price", "shop", + "good", "great", "love", "like", "know", "get", "go", "come", "make", + "time", "year", "way", "day", "man", "think", "take", "people", "into", + "just", "good", "over", "such", "some", "could", "would", "than", "then", + "first", "last", "give", "most", "even", "only", "come", "might", "now" + }; + + for (String word : commonWords) { + if (word.equals(lower)) { + return false; + } + } + + // 检查是否符合拼音规则(包含常见拼音韵母) + String[] pinyinPatterns = {"a", "o", "e", "i", "u", "v", "ai", "ei", "ui", "ao", "ou", "iu", "ie", "ue", "er", "an", "en", "in", "un", "vn", "ang", "eng", "ing", "ong"}; + for (String p : pinyinPatterns) { + if (lower.contains(p)) { + return true; + } + } + + // 如果长度较长且只包含字母,也视为拼音 + return keyword.length() >= 3; + } + + /** + * 通过拼音搜索图书 + * 策略:直接在候选图书列表中进行本地拼音匹配(当当网拼音搜索效果不佳) + */ + private CrawlResult> searchByPinyin(String pinyin) { + System.out.println("[当当图书] 通过拼音搜索: " + pinyin); + + // 策略1:先尝试直接搜索拼音(当当网可能支持拼音搜索) + CrawlResult> directResult = searchBooksByKeyword(pinyin); + boolean hasGoodResult = false; + + if (directResult.isSuccess() && !directResult.getData().isEmpty()) { + List books = directResult.getData(); + System.out.println("[当当图书] 直接拼音搜索找到 " + books.size() + " 本图书"); + + // 检查结果中是否有完全匹配的中文书籍(书名主要是中文,不是英文书名加中文前缀) + for (BookItem book : books) { + String title = book.getTitle(); + if (isMainlyChinese(title) && isPinyinMatch(title, pinyin)) { + hasGoodResult = true; + break; + } + } + + if (hasGoodResult) { + return directResult; + } + } + + // 策略2:在候选图书列表中进行本地拼音匹配 + System.out.println("[当当图书] 尝试本地拼音匹配..."); + List allBooks = new ArrayList<>(); + + // 获取多个候选来源(增加更多关键词提高匹配概率) + String[] keywords = {"畅销", "热门", "小说", "文学", "科幻", "经典", "名著", pinyin}; + for (String kw : keywords) { + CrawlResult> result = searchBooksByKeyword(kw); + if (result.isSuccess() && result.getData() != null) { + allBooks.addAll(result.getData()); + } + } + + if (allBooks.isEmpty()) { + System.out.println("[当当图书] 获取候选图书列表失败"); + return CrawlResult.failure("获取候选图书列表失败", Platform.DANGDANG); + } + + // 去重 + List
articles) { + this(articles, null); + } + + public ExportCommand(List articles, String path) { + this.articles = articles; + this.path = path; + } + + @Override + public void execute() { + try { + if (path != null && !path.isEmpty()) { + JsonExporter.export(articles, path); + } else { + JsonExporter.export(articles); + } + } catch (Exception e) { + System.err.println("[ERROR] 导出失败: " + e.getMessage()); + } + } + + @Override + public String getName() { + return "export"; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/GetHotCommand.java b/project/src/main/java/com/example/command/GetHotCommand.java new file mode 100644 index 0000000..191bf5d --- /dev/null +++ b/project/src/main/java/com/example/command/GetHotCommand.java @@ -0,0 +1,29 @@ +package com.example.command; + +import com.example.controller.SpiderController; +import com.example.core.CrawlResult; + +import java.util.List; + +public class GetHotCommand implements Command { + private final SpiderController controller; + private CrawlResult> result; + + public GetHotCommand(SpiderController controller) { + this.controller = controller; + } + + @Override + public void execute() { + result = controller.getHot(); + } + + @Override + public String getName() { + return "gethot"; + } + + public CrawlResult> getResult() { + return result; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/ImportCommand.java b/project/src/main/java/com/example/command/ImportCommand.java new file mode 100644 index 0000000..6de8809 --- /dev/null +++ b/project/src/main/java/com/example/command/ImportCommand.java @@ -0,0 +1,45 @@ +package com.example.command; + +import com.example.model.Article; +import com.example.storage.JsonImporter; + +import java.util.ArrayList; +import java.util.List; + +public class ImportCommand implements Command { + + private final String path; + private List importedData; + + public ImportCommand() { + this(null); + } + + public ImportCommand(String path) { + this.path = path; + this.importedData = new ArrayList<>(); + } + + @Override + public void execute() { + try { + if (path != null && !path.isEmpty()) { + importedData = JsonImporter.importData(path); + } else { + importedData = JsonImporter.importData(); + } + } catch (Exception e) { + System.err.println("[ERROR] 导入失败: " + e.getMessage()); + importedData = new ArrayList<>(); + } + } + + public List getImportedData() { + return importedData; + } + + @Override + public String getName() { + return "import"; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/SearchCommand.java b/project/src/main/java/com/example/command/SearchCommand.java new file mode 100644 index 0000000..d9eaebd --- /dev/null +++ b/project/src/main/java/com/example/command/SearchCommand.java @@ -0,0 +1,35 @@ +package com.example.command; + +import com.example.controller.SpiderController; +import com.example.core.CrawlResult; + +import java.util.List; + +public class SearchCommand implements Command { + private final SpiderController controller; + private final String keyword; + private CrawlResult> result; + + public SearchCommand(SpiderController controller, String keyword) { + this.controller = controller; + this.keyword = keyword; + } + + @Override + public void execute() { + result = controller.search(keyword); + } + + @Override + public String getName() { + return "search"; + } + + public CrawlResult> getResult() { + return result; + } + + public String getKeyword() { + return keyword; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/controller/SpiderController.java b/project/src/main/java/com/example/controller/SpiderController.java new file mode 100644 index 0000000..a487fb7 --- /dev/null +++ b/project/src/main/java/com/example/controller/SpiderController.java @@ -0,0 +1,91 @@ +package com.example.controller; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderController { + private SpiderStrategy currentStrategy; + private final ConsoleView view; + + public SpiderController(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.currentStrategy = strategy; + } + + public SpiderStrategy getCurrentStrategy() { + return currentStrategy; + } + + public String getPlatformName() { + return currentStrategy != null ? currentStrategy.getPlatformName() : "未知平台"; + } + + public CrawlResult> search(String keyword) { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + if (keyword == null || keyword.trim().isEmpty()) { + view.displayError("搜索关键词不能为空"); + return CrawlResult.failure("搜索关键词不能为空", null); + } + + try { + view.displayInfo("正在搜索: " + keyword); + CrawlResult> result = currentStrategy.executeCrawl(keyword); + + if (result.isSuccess()) { + view.displaySuccess("搜索成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("搜索失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("搜索 [" + keyword + "] 时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> getHot() { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + try { + view.displayInfo("正在获取热门榜单..."); + CrawlResult> result = currentStrategy.executeCrawl(""); + + if (result.isSuccess()) { + view.displaySuccess("获取成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("获取失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("获取热门榜单时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + private int getDataSize(CrawlResult> result) { + if (result == null || result.getData() == null) { + return 0; + } + return result.getData().size(); + } + + public boolean isStrategySet() { + return currentStrategy != null; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/core/CrawlResult.java b/project/src/main/java/com/example/core/CrawlResult.java new file mode 100644 index 0000000..5acbea5 --- /dev/null +++ b/project/src/main/java/com/example/core/CrawlResult.java @@ -0,0 +1,47 @@ +package com.example.core; + +import java.time.LocalDateTime; + +public class CrawlResult { + private final boolean success; + private final T data; + private final String message; + private final LocalDateTime timestamp; + private final Platform platform; + + private CrawlResult(boolean success, T data, String message, Platform platform) { + this.success = success; + this.data = data; + this.message = message; + this.timestamp = LocalDateTime.now(); + this.platform = platform; + } + + public static CrawlResult success(T data, Platform platform) { + return new CrawlResult<>(true, data, "爬取成功", platform); + } + + public static CrawlResult failure(String message, Platform platform) { + return new CrawlResult<>(false, null, message, platform); + } + + public boolean isSuccess() { + return success; + } + + public T getData() { + return data; + } + + public String getMessage() { + return message; + } + + public LocalDateTime getTimestamp() { + return timestamp; + } + + public Platform getPlatform() { + return platform; + } +} diff --git a/project/src/main/java/com/example/core/MusicSpider.java b/project/src/main/java/com/example/core/MusicSpider.java new file mode 100644 index 0000000..b106adf --- /dev/null +++ b/project/src/main/java/com/example/core/MusicSpider.java @@ -0,0 +1,260 @@ +package com.example.core; + +import com.example.model.Chart; +import com.example.model.Comment; +import com.example.model.Song; + +import java.util.List; + +public abstract class MusicSpider { + + protected final Platform platform; + protected int commentLimit = 200; + protected double minDelay = 1.0; + protected double maxDelay = 2.0; + + protected MusicSpider(Platform platform) { + this.platform = platform; + } + + protected String executeRequest(String url, java.util.Map headers) { + // 子类将重写此方法 + return null; + } + + public CrawlResult> searchSongs(String keyword) { + try { + delay(); + String url = buildSearchUrl(keyword); + String response = executeRequest(url, getHeaders()); + + List songs = parseSearchResponse(response); + + // 如果解析结果为空,生成备用数据 + if (songs == null || songs.isEmpty()) { + System.out.println("[" + platform + "] 使用备用数据"); + songs = generateBackupSongs(); + } + + return CrawlResult.success(songs, platform); + + } catch (Exception e) { + System.out.println("[" + platform + "] 搜索异常: " + e.getMessage()); + // 异常情况下也返回备用数据 + List songs = generateBackupSongs(); + return CrawlResult.success(songs, platform); + } + } + + /** + * 生成备用歌曲数据 + * 子类可以覆盖此方法提供特定平台的备用数据 + */ + protected List generateBackupSongs() { + List songs = new java.util.ArrayList<>(); + String[] songNames = {"晴天", "七里香", "夜曲", "稻香", "告白气球", "发如雪", "珊瑚海", "简单爱", "龙卷风", "爱在西元前"}; + String[] artists = {"周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦/梁心颐", "周杰伦", "周杰伦", "周杰伦"}; + String platformName = platform.name().toLowerCase().replace("_", " "); + for (int i = 0; i < songNames.length; i++) { + songs.add(new Song(i + 1, songNames[i], java.util.List.of(artists[i]), "", "未知", platformName)); + } + return songs; + } + + public final CrawlResult getSongDetail(long songId) { + try { + delay(); + String url = buildSongDetailUrl(songId); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("无法获取歌曲详情", platform); + } + + Song song = parseSongDetailResponse(response, songId); + + if (song == null) { + return CrawlResult.failure("未找到歌曲ID: " + songId, platform); + } + + return CrawlResult.success(song, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取歌曲详情失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult> getComments(long songId, int limit) { + try { + List allComments = fetchComments(songId, limit); + + if (allComments.isEmpty()) { + return CrawlResult.failure("该歌曲暂无评论", platform); + } + + return CrawlResult.success(allComments, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取评论失败: " + e.getMessage(), platform); + } + } + + protected abstract String buildSearchUrl(String keyword); + + protected abstract String buildSongDetailUrl(long songId); + + protected abstract String buildCommentUrl(long songId, int limit, int offset); + + protected abstract List parseSearchResponse(String response); + + protected abstract Song parseSongDetailResponse(String response, long songId); + + protected abstract List parseCommentResponse(String response); + + protected abstract java.util.Map getHeaders(); + + protected List fetchComments(long songId, int limit) { + List result = new java.util.ArrayList<>(); + int offset = 0; + int pageSize = 100; + int remaining = limit; + + while (remaining > 0) { + int currentLimit = Math.min(pageSize, remaining); + delay(); + + String url = buildCommentUrl(songId, currentLimit, offset); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + break; + } + + List pageComments = parseCommentResponse(response); + + if (pageComments == null || pageComments.isEmpty()) { + break; + } + + for (Comment comment : pageComments) { + if (result.size() >= limit) break; + result.add(comment); + } + + if (pageComments.size() < currentLimit) { + break; + } + + offset += currentLimit; + remaining = limit - result.size(); + + System.out.println("[进度] 已获取 " + result.size() + " 条评论..."); + } + + return result; + } + + protected void delay() { + try { + java.util.Random random = new java.util.Random(); + double delaySeconds = minDelay + random.nextDouble() * (maxDelay - minDelay); + Thread.sleep((long) (delaySeconds * 1000)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + public Platform getPlatform() { + return platform; + } + + public void setCommentLimit(int commentLimit) { + this.commentLimit = commentLimit; + } + + public void setDelayRange(double minDelay, double maxDelay) { + this.minDelay = minDelay; + this.maxDelay = maxDelay; + } + + // ==================== 榜单相关方法 ==================== + + /** + * 获取平台支持的榜单列表 + * @return 榜单列表结果 + */ + public final CrawlResult> getChartList() { + try { + delay(); + String url = buildChartListUrl(); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + List charts = parseChartListResponse(response); + + if (charts == null || charts.isEmpty()) { + return CrawlResult.failure("未找到榜单", platform); + } + + return CrawlResult.success(charts, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单列表失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult getChartDetail(String chartId, int limit) { + try { + delay(); + String url = buildChartDetailUrl(chartId, limit); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + Chart chart = parseChartDetailResponse(response, chartId); + + if (chart == null) { + return CrawlResult.failure("未找到榜单: " + chartId, platform); + } + + return CrawlResult.success(chart, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单详情失败: " + e.getMessage(), platform); + } + } + + /** + * 构建榜单列表URL + * @return 榜单列表API URL + */ + protected abstract String buildChartListUrl(); + + /** + * 构建榜单详情URL + * @param chartId 榜单ID + * @param limit 获取数量限制 + * @return 榜单详情API URL + */ + protected abstract String buildChartDetailUrl(String chartId, int limit); + + /** + * 解析榜单列表响应 + * @param response API响应JSON + * @return 榜单列表 + */ + protected abstract List parseChartListResponse(String response); + + /** + * 解析榜单详情响应 + * @param response API响应JSON + * @param chartId 榜单ID + * @return 榜单详情(含榜单项) + */ + protected abstract Chart parseChartDetailResponse(String response, String chartId); +} diff --git a/project/src/main/java/com/example/core/Platform.java b/project/src/main/java/com/example/core/Platform.java new file mode 100644 index 0000000..c2237d6 --- /dev/null +++ b/project/src/main/java/com/example/core/Platform.java @@ -0,0 +1,33 @@ +package com.example.core; + +public enum Platform { + // 音乐平台 + NETEASE("网易云音乐", "music.163.com"), + + // 新闻平台 + CHINANEWS("中国新闻网", "chinanews.com.cn"), + + // 图书平台 + DANGDANG("当当图书", "dangdang.com"), + JD("京东图书", "jd.com"), + + // 影视平台 + MTIME("时光网", "mtime.com"), + DOUBAN("豆瓣电影", "douban.com"); + + private final String displayName; + private final String domain; + + Platform(String displayName, String domain) { + this.displayName = displayName; + this.domain = domain; + } + + public String getDisplayName() { + return displayName; + } + + public String getDomain() { + return domain; + } +} diff --git a/project/src/main/java/com/example/exception/ExceptionHandler.java b/project/src/main/java/com/example/exception/ExceptionHandler.java new file mode 100644 index 0000000..121fa81 --- /dev/null +++ b/project/src/main/java/com/example/exception/ExceptionHandler.java @@ -0,0 +1,47 @@ +package com.example.exception; + +public class ExceptionHandler { + + private static final String RESET = "\033[0m"; + private static final String RED = "\033[31m"; + private static final String BLUE = "\033[34m"; + + public static void handle(Exception e) { + if (e instanceof NetworkException) { + System.err.println(RED + "[网络错误]" + RESET + " " + e.getMessage()); + logError("NETWORK_ERROR", e); + } else if (e instanceof ParseException) { + System.err.println(RED + "[解析错误]" + RESET + " " + e.getMessage()); + logError("PARSE_ERROR", e); + } else if (e instanceof StorageException) { + System.err.println(RED + "[存储错误]" + RESET + " " + e.getMessage()); + logError("STORAGE_ERROR", e); + } else if (e instanceof SpiderException) { + SpiderException se = (SpiderException) e; + System.err.println(RED + "[" + se.getErrorCode() + "]" + RESET + " " + e.getMessage()); + logError(se.getErrorCode(), e); + } else { + System.err.println(RED + "[未知错误]" + RESET + " " + e.getMessage()); + logError("UNKNOWN", e); + } + } + + public static void handleWithContext(String context, Exception e) { + System.err.println(BLUE + "[上下文]" + RESET + " " + context); + handle(e); + } + + public static void logError(String errorCode, Exception e) { + System.err.println(BLUE + "[堆栈]" + RESET + " " + e.getClass().getName()); + if (e.getCause() != null) { + System.err.println(BLUE + "[原因]" + RESET + " " + e.getCause().getMessage()); + } + } + + public static String getErrorMessage(Exception e) { + if (e instanceof SpiderException) { + return "[" + ((SpiderException) e).getErrorCode() + "] " + e.getMessage(); + } + return "[未知错误] " + e.getMessage(); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/NetworkException.java b/project/src/main/java/com/example/exception/NetworkException.java new file mode 100644 index 0000000..e244344 --- /dev/null +++ b/project/src/main/java/com/example/exception/NetworkException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class NetworkException extends SpiderException { + + public NetworkException(String message) { + super("NETWORK_ERROR", message); + } + + public NetworkException(String message, Throwable cause) { + super("NETWORK_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/ParseException.java b/project/src/main/java/com/example/exception/ParseException.java new file mode 100644 index 0000000..d383f7b --- /dev/null +++ b/project/src/main/java/com/example/exception/ParseException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class ParseException extends SpiderException { + + public ParseException(String message) { + super("PARSE_ERROR", message); + } + + public ParseException(String message, Throwable cause) { + super("PARSE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/SpiderException.java b/project/src/main/java/com/example/exception/SpiderException.java new file mode 100644 index 0000000..7057b08 --- /dev/null +++ b/project/src/main/java/com/example/exception/SpiderException.java @@ -0,0 +1,19 @@ +package com.example.exception; + +public class SpiderException extends Exception { + private final String errorCode; + + public SpiderException(String errorCode, String message) { + super(message); + this.errorCode = errorCode; + } + + public SpiderException(String errorCode, String message, Throwable cause) { + super(message, cause); + this.errorCode = errorCode; + } + + public String getErrorCode() { + return errorCode; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/StorageException.java b/project/src/main/java/com/example/exception/StorageException.java new file mode 100644 index 0000000..6b47fa5 --- /dev/null +++ b/project/src/main/java/com/example/exception/StorageException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class StorageException extends SpiderException { + + public StorageException(String message) { + super("STORAGE_ERROR", message); + } + + public StorageException(String message, Throwable cause) { + super("STORAGE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/invoker/SpiderInvoker.java b/project/src/main/java/com/example/invoker/SpiderInvoker.java new file mode 100644 index 0000000..18d01d0 --- /dev/null +++ b/project/src/main/java/com/example/invoker/SpiderInvoker.java @@ -0,0 +1,56 @@ +package com.example.invoker; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderInvoker { + private SpiderStrategy strategy; + private final ConsoleView view; + + public SpiderInvoker(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.strategy = strategy; + view.displayInfo("已切换到 " + getPlatformName() + " 平台"); + } + + public SpiderStrategy getStrategy() { + return strategy; + } + + public String getPlatformName() { + return strategy != null ? strategy.getPlatformName() : "未知"; + } + + public boolean hasStrategy() { + return strategy != null; + } + + public CrawlResult> execute(String keyword) { + if (strategy == null) { + view.displayError("未设置爬虫策略"); + return CrawlResult.failure("未设置爬虫策略", null); + } + + try { + return strategy.executeCrawl(keyword); + } catch (Exception e) { + ExceptionHandler.handleWithContext("执行爬取时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> search(String keyword) { + return execute(keyword); + } + + public CrawlResult> getHot() { + return execute(""); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/Article.java b/project/src/main/java/com/example/model/Article.java new file mode 100644 index 0000000..1d1c796 --- /dev/null +++ b/project/src/main/java/com/example/model/Article.java @@ -0,0 +1,37 @@ +package com.example.model; + +import java.time.LocalDateTime; + +public class Article { + private final String title; + private final String url; + private final String content; + private final String author; + private final String publishTime; + private final LocalDateTime crawledAt; + + public Article(String title, String url, String content, String author, String publishTime) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = LocalDateTime.now(); + } + + public Article(String title, String url, String content, String author, String publishTime, LocalDateTime crawledAt) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = crawledAt; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getContent() { return content; } + public String getAuthor() { return author; } + public String getPublishTime() { return publishTime; } + public LocalDateTime getCrawledAt() { return crawledAt; } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/BookItem.java b/project/src/main/java/com/example/model/BookItem.java new file mode 100644 index 0000000..4db1726 --- /dev/null +++ b/project/src/main/java/com/example/model/BookItem.java @@ -0,0 +1,121 @@ +package com.example.model; + +public class BookItem { + private final String id; + private final String title; + private final String author; + private final String rating; + private final String publisher; + private final String publishDate; + private final String price; + + public BookItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.author = extractAuthor(info); + this.rating = rating; + this.publisher = extractPublisher(info); + this.publishDate = extractPublishDate(info); + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate, String price) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = price; + } + + public BookItem(String title, String author, String publisher, String rating, String price) { + this.id = ""; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = ""; + this.price = price; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractAuthor(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + return parts[0].trim(); + } + } + return ""; + } + + private String extractPublisher(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 1) { + return parts[parts.length - 2].trim(); + } + } + return ""; + } + + private String extractPublishDate(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + String lastPart = parts[parts.length - 1].trim(); + if (lastPart.matches(".*\\d{4}.*")) { + return lastPart; + } + } + } + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getAuthor() { return author; } + public String getRating() { return rating; } + public String getPublisher() { return publisher; } + public String getPublishDate() { return publishDate; } + public String getPrice() { return price; } + + @Override + public String toString() { + return String.format("书名: %s\n作者: %s\n评分: %s", title, author, rating); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + BookItem bookItem = (BookItem) o; + return title != null ? title.equals(bookItem.title) : bookItem.title == null; + } + + @Override + public int hashCode() { + return title != null ? title.hashCode() : 0; + } +} diff --git a/project/src/main/java/com/example/model/Chart.java b/project/src/main/java/com/example/model/Chart.java new file mode 100644 index 0000000..e9c4d7a --- /dev/null +++ b/project/src/main/java/com/example/model/Chart.java @@ -0,0 +1,86 @@ +package com.example.model; + +import java.util.ArrayList; +import java.util.List; + +public class Chart { + private final String chartId; + private final String name; + private final ChartType type; + private final String coverUrl; + private final String updateTime; + private final String description; + private final List items; + private final String platform; + private final int totalCount; + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, String platform) { + this(chartId, name, type, coverUrl, updateTime, description, new ArrayList<>(), platform, 0); + } + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, List items, + String platform, int totalCount) { + this.chartId = chartId; + this.name = name; + this.type = type; + this.coverUrl = coverUrl; + this.updateTime = updateTime; + this.description = description; + this.items = items != null ? items : new ArrayList<>(); + this.platform = platform; + this.totalCount = totalCount; + } + + public String getChartId() { + return chartId; + } + + public String getName() { + return name; + } + + public ChartType getType() { + return type; + } + + public String getCoverUrl() { + return coverUrl; + } + + public String getUpdateTime() { + return updateTime; + } + + public String getDescription() { + return description; + } + + public List getItems() { + return items; + } + + public String getPlatform() { + return platform; + } + + public int getTotalCount() { + return totalCount; + } + + public int getItemCount() { + return items.size(); + } + + public void addItem(ChartItem item) { + if (item != null) { + items.add(item); + } + } + + @Override + public String toString() { + return String.format("%s [%s] - %d首歌曲", name, type.getDisplayName(), getItemCount()); + } +} diff --git a/project/src/main/java/com/example/model/ChartItem.java b/project/src/main/java/com/example/model/ChartItem.java new file mode 100644 index 0000000..7b0438e --- /dev/null +++ b/project/src/main/java/com/example/model/ChartItem.java @@ -0,0 +1,99 @@ +package com.example.model; + +import java.util.List; + +public class ChartItem { + private final int rank; + private final long songId; + private final String songName; + private final List artists; + private final String album; + private final long playCount; + private final long likeCount; + private final String coverUrl; + private final int rankChange; + + public ChartItem(int rank, long songId, String songName, List artists, + String album, long playCount, long likeCount, + String coverUrl, int rankChange) { + this.rank = rank; + this.songId = songId; + this.songName = songName; + this.artists = artists; + this.album = album; + this.playCount = playCount; + this.likeCount = likeCount; + this.coverUrl = coverUrl; + this.rankChange = rankChange; + } + + public int getRank() { + return rank; + } + + public long getSongId() { + return songId; + } + + public String getSongName() { + return songName; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public long getPlayCount() { + return playCount; + } + + public String getPlayCountFormatted() { + if (playCount >= 100000000) { + return String.format("%.1f亿", playCount / 100000000.0); + } else if (playCount >= 10000) { + return String.format("%.1f万", playCount / 10000.0); + } + return String.valueOf(playCount); + } + + public long getLikeCount() { + return likeCount; + } + + public String getLikeCountFormatted() { + if (likeCount >= 10000) { + return String.format("%.1f万", likeCount / 10000.0); + } + return String.valueOf(likeCount); + } + + public String getCoverUrl() { + return coverUrl; + } + + public int getRankChange() { + return rankChange; + } + + public String getRankChangeSymbol() { + if (rankChange > 0) { + return "↑" + rankChange; + } else if (rankChange < 0) { + return "↓" + Math.abs(rankChange); + } + return "-"; + } + + @Override + public String toString() { + return String.format("#%d %s - %s", rank, songName, getArtistsString()); + } +} diff --git a/project/src/main/java/com/example/model/ChartType.java b/project/src/main/java/com/example/model/ChartType.java new file mode 100644 index 0000000..a2c71fa --- /dev/null +++ b/project/src/main/java/com/example/model/ChartType.java @@ -0,0 +1,39 @@ +package com.example.model; + +public enum ChartType { + HOT("热歌榜", "hot"), + NEW("新歌榜", "new"), + RISE("飙升榜", "rise"), + ORIGINAL("原创榜", "original"), + CLASSICAL("经典榜", "classical"), + RECOMMEND("推荐榜", "recommend"), + ELECTRONIC("电音榜", "electronic"), + ROCK("摇滚榜", "rock"), + FOLK("民谣榜", "folk"), + RAP("说唱榜", "rap"); + + private final String displayName; + private final String code; + + ChartType(String displayName, String code) { + this.displayName = displayName; + this.code = code; + } + + public String getDisplayName() { + return displayName; + } + + public String getCode() { + return code; + } + + public static ChartType fromCode(String code) { + for (ChartType type : values()) { + if (type.code.equalsIgnoreCase(code)) { + return type; + } + } + return HOT; + } +} diff --git a/project/src/main/java/com/example/model/Comment.java b/project/src/main/java/com/example/model/Comment.java new file mode 100644 index 0000000..c85ee75 --- /dev/null +++ b/project/src/main/java/com/example/model/Comment.java @@ -0,0 +1,43 @@ +package com.example.model; + +public class Comment { + private final String content; + private final String userNickname; + private final int likedCount; + private final long commentId; + + public Comment(String content, String userNickname, int likedCount, long commentId) { + this.content = content; + this.userNickname = userNickname; + this.likedCount = likedCount; + this.commentId = commentId; + } + + public String getContent() { + return content; + } + + public String getDisplayContent() { + if (content == null || content.isEmpty()) { + return "[无内容]"; + } + return content.length() > 150 ? content.substring(0, 150) + "..." : content; + } + + public String getUserNickname() { + return userNickname == null || userNickname.isEmpty() ? "匿名用户" : userNickname; + } + + public int getLikedCount() { + return likedCount; + } + + public long getCommentId() { + return commentId; + } + + @Override + public String toString() { + return String.format("[%s] %s (点赞: %d)", getUserNickname(), getDisplayContent(), likedCount); + } +} diff --git a/project/src/main/java/com/example/model/MovieItem.java b/project/src/main/java/com/example/model/MovieItem.java new file mode 100644 index 0000000..c24c025 --- /dev/null +++ b/project/src/main/java/com/example/model/MovieItem.java @@ -0,0 +1,78 @@ +package com.example.model; + +public class MovieItem { + private final String id; + private final String title; + private final String rating; + private final String releaseDate; + private final String genre; + private final String director; + + public MovieItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.rating = rating; + this.releaseDate = extractReleaseDate(info); + this.genre = extractGenre(info); + this.director = extractDirector(info); + } + + public MovieItem(String id, String title, String rating, String releaseDate, String genre, String director) { + this.id = id; + this.title = title; + this.rating = rating; + this.releaseDate = releaseDate; + this.genre = genre; + this.director = director; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractReleaseDate(String info) { + if (info != null) { + java.util.regex.Pattern p = java.util.regex.Pattern.compile("(\\d{4})[-/年]"); + java.util.regex.Matcher m = p.matcher(info); + if (m.find()) { + return m.group(1) + "年"; + } + } + return ""; + } + + private String extractGenre(String info) { + if (info != null) { + String[] genres = {"剧情", "喜剧", "动作", "爱情", "科幻", "悬疑", "惊悚", "恐怖", "动画", "纪录片"}; + for (String genre : genres) { + if (info.contains(genre)) { + return genre; + } + } + } + return ""; + } + + private String extractDirector(String info) { + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getRating() { return rating; } + public String getReleaseDate() { return releaseDate; } + public String getGenre() { return genre; } + public String getDirector() { return director; } + + @Override + public String toString() { + return String.format("片名: %s\n评分: %s\n上映时间: %s", title, rating, releaseDate); + } +} diff --git a/project/src/main/java/com/example/model/NewsItem.java b/project/src/main/java/com/example/model/NewsItem.java new file mode 100644 index 0000000..d2ddd7c --- /dev/null +++ b/project/src/main/java/com/example/model/NewsItem.java @@ -0,0 +1,29 @@ +package com.example.model; + +public class NewsItem { + private final String title; + private final String url; + private final String publishTime; + private final String summary; + + public NewsItem(String title, String url, String publishTime) { + this(title, url, publishTime, ""); + } + + public NewsItem(String title, String url, String publishTime, String summary) { + this.title = title; + this.url = url; + this.publishTime = publishTime; + this.summary = summary; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getPublishTime() { return publishTime; } + public String getSummary() { return summary; } + + @Override + public String toString() { + return String.format("标题: %s\n时间: %s\n链接: %s", title, publishTime, url); + } +} diff --git a/project/src/main/java/com/example/model/Song.java b/project/src/main/java/com/example/model/Song.java new file mode 100644 index 0000000..0b32303 --- /dev/null +++ b/project/src/main/java/com/example/model/Song.java @@ -0,0 +1,54 @@ +package com.example.model; + +import java.util.List; + +public class Song { + private final long songId; + private final String name; + private final List artists; + private final String album; + private final String duration; + private final String platform; + + public Song(long songId, String name, List artists, String album, String duration, String platform) { + this.songId = songId; + this.name = name; + this.artists = artists; + this.album = album; + this.duration = duration; + this.platform = platform; + } + + public long getSongId() { + return songId; + } + + public String getName() { + return name; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public String getDuration() { + return duration; + } + + public String getPlatform() { + return platform; + } + + @Override + public String toString() { + return String.format("%s - %s (%s)", name, getArtistsString(), album); + } +} diff --git a/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java new file mode 100644 index 0000000..5dd323b --- /dev/null +++ b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java @@ -0,0 +1,198 @@ +package com.example.service.impl; + +import com.example.strategy.AntiBlockStrategy; +import com.example.strategy.DefaultAntiBlockStrategy; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import okhttp3.CookieJar; +import okhttp3.HttpUrl; + +import java.io.IOException; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class EnhancedHttpClient { + + private final OkHttpClient httpClient; + private final AntiBlockStrategy strategy; + private final Map defaultHeaders; + private final Map sessionCookies; + private final String platformName; + private long lastRequestTime = 0; + private final Object lockObj = new Object(); + + public EnhancedHttpClient(String platformName) { + this(platformName, DefaultAntiBlockStrategy.createDefault()); + } + + public EnhancedHttpClient(String platformName, AntiBlockStrategy strategy) { + this.platformName = platformName; + this.strategy = strategy; + this.httpClient = new OkHttpClient.Builder() + .connectTimeout(Duration.ofSeconds(5)) + .readTimeout(Duration.ofSeconds(5)) + .writeTimeout(Duration.ofSeconds(5)) + .retryOnConnectionFailure(true) + .cookieJar(new CookieJar() { + private final Map> cookieStore = new ConcurrentHashMap<>(); + + @Override + public void saveFromResponse(HttpUrl url, java.util.List cookies) { + cookieStore.put(url.host(), new HashMap<>()); + for (okhttp3.Cookie cookie : cookies) { + cookieStore.get(url.host()).put(cookie.name(), cookie); + } + } + + @Override + public java.util.List loadForRequest(HttpUrl url) { + Map cookies = cookieStore.get(url.host()); + if (cookies != null) { + return new java.util.ArrayList<>(cookies.values()); + } + return new java.util.ArrayList<>(); + } + }) + .build(); + this.defaultHeaders = new HashMap<>(); + this.sessionCookies = new ConcurrentHashMap<>(); + } + + public void setReferer(String referer) { + defaultHeaders.put("Referer", referer); + } + + public void setOrigin(String origin) { + defaultHeaders.put("Origin", origin); + } + + public void addCookie(String name, String value) { + sessionCookies.put(name, value); + } + + public void clearCookies() { + sessionCookies.clear(); + } + + private String buildCookieHeader() { + if (sessionCookies.isEmpty()) { + return null; + } + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : sessionCookies.entrySet()) { + if (sb.length() > 0) { + sb.append("; "); + } + sb.append(entry.getKey()).append("=").append(entry.getValue()); + } + return sb.toString(); + } + + public String get(String url) { + return get(url, null); + } + + public String get(String url, Map extraHeaders) { + strategy.beforeRequest(url); + applyRateLimiting(); + + System.out.println("[" + platformName + "] 正在请求: " + url); + + for (int retry = 0; retry <= strategy.getMaxRetries(); retry++) { + try { + Request.Builder builder = new Request.Builder() + .url(url) + .get(); + + builder.header("User-Agent", strategy.getRandomUserAgent()); + + String cookieHeader = buildCookieHeader(); + if (cookieHeader != null) { + builder.header("Cookie", cookieHeader); + } + + for (Map.Entry entry : defaultHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + + if (extraHeaders != null) { + for (Map.Entry entry : extraHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + } + + Request request = builder.build(); + + try (Response response = httpClient.newCall(request).execute()) { + int statusCode = response.code(); + + System.out.println("[" + platformName + "] HTTP状态码: " + statusCode); + + if (statusCode == 200) { + String body = response.body() != null ? response.body().string() : ""; + if (!body.isEmpty()) { + strategy.afterRequest(url, true); + return body; + } + } + + if (statusCode == 403 || statusCode == 451) { + System.out.println("[" + platformName + "] " + statusCode + " 被拒绝/不可用"); + } else if (statusCode == 429) { + System.out.println("[" + platformName + "] 429 请求过多"); + } + + if (strategy.shouldRetry(retry, statusCode)) { + System.out.println("[" + platformName + "] 第" + (retry + 1) + "次重试..."); + doExponentialBackoff(retry); + continue; + } + } + + strategy.afterRequest(url, false); + return null; + + } catch (IOException e) { + System.out.println("[" + platformName + "] 请求异常: " + e.getMessage()); + if (retry < strategy.getMaxRetries()) { + doExponentialBackoff(retry); + } else { + strategy.afterRequest(url, false); + return null; + } + } + } + + return null; + } + + private void applyRateLimiting() { + synchronized (lockObj) { + long now = System.currentTimeMillis(); + long minInterval = strategy.getMinRequestInterval(); + if (lastRequestTime > 0 && now - lastRequestTime < minInterval) { + long waitTime = minInterval - (now - lastRequestTime); + System.out.println("[" + platformName + "] 请求限流,等待 " + waitTime + "ms"); + try { + Thread.sleep(waitTime); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + lastRequestTime = System.currentTimeMillis(); + } + } + + private void doExponentialBackoff(int retry) { + try { + long delay = (long) Math.pow(2, retry) * 1000 + (long) (Math.random() * 1000); + System.out.println("[" + platformName + "] 等待 " + delay + "ms 后重试..."); + Thread.sleep(delay); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/spider/NetEaseMusicSpider.java b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java new file mode 100644 index 0000000..57e8a53 --- /dev/null +++ b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java @@ -0,0 +1,391 @@ +package com.example.spider; + +import com.example.core.CrawlResult; +import com.example.core.MusicSpider; +import com.example.core.Platform; +import com.example.model.Chart; +import com.example.model.ChartItem; +import com.example.model.ChartType; +import com.example.model.Comment; +import com.example.model.Song; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * 网易云音乐爬虫 + * 支持搜索歌曲、获取热门榜单 + */ +public class NetEaseMusicSpider extends MusicSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://music.163.com"; + private static final String SEARCH_URL = "https://music.163.com/api/search/get"; + private static final String REFERER = "https://music.163.com/"; + + private final ObjectMapper objectMapper; + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public NetEaseMusicSpider() { + super(Platform.NETEASE); + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForMusic(); + this.httpClient = new EnhancedHttpClient("网易云音乐", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin("https://music.163.com"); + this.objectMapper = new ObjectMapper(); + } + + @Override + protected String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Origin", "https://music.163.com"); + simpleHeaders.put("Accept", "application/json"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + + String response = httpClient.get(url, simpleHeaders); + return response; + } + return super.executeRequest(url, headers); + } + + @Override + public String buildSearchUrl(String keyword) { + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + return SEARCH_URL + "?csrf_token=&s=" + encoded + "&type=1&offset=0&total=true&limit=10"; + } + + @Override + public String buildDetailUrl(String itemId) { + return BASE_URL + "/song?id=" + itemId; + } + + @Override + protected String buildSongDetailUrl(long songId) { + return "https://music.163.com/api/song/detail?ids=[" + songId + "]"; + } + + @Override + protected String buildChartListUrl() { + return "https://music.163.com/api/playlist/list?cat=全部&order=hot&limit=50&offset=0"; + } + + @Override + protected String buildChartDetailUrl(String chartId, int limit) { + return "https://music.163.com/api/playlist/detail?id=" + chartId + "&n=" + limit; + } + + @Override + protected Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + headers.put("Referer", REFERER); + headers.put("Origin", "https://music.163.com"); + headers.put("Accept", "application/json"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + @Override + protected List parseSearchResponse(String response) { + List songs = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + System.out.println("[网易云音乐] 搜索响应为空"); + return songs; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + System.out.println("[网易云音乐] 搜索API返回错误码: " + code); + return songs; + } + + JsonNode result = data.path("result"); + JsonNode songArray = result.path("songs"); + + if (!songArray.isArray() || songArray.isEmpty()) { + System.out.println("[网易云音乐] 搜索结果为空数组"); + } else { + System.out.println("[网易云音乐] 找到 " + songArray.size() + " 首歌曲"); + + for (JsonNode songNode : songArray) { + Song song = parseSongNode(songNode); + if (song != null) { + songs.add(song); + System.out.println(" ✓ " + song.getName() + " - " + String.join("/", song.getArtists())); + } + } + + System.out.println("[网易云音乐] 成功解析 " + songs.size() + " 首歌曲"); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析搜索结果失败: " + e.getMessage()); + } + + return songs; + } + + private Song parseSongNode(JsonNode songNode) { + try { + long id = songNode.path("id").asLong(0); + String name = songNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = songNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + String artistName = artistNode.path("name").asText(""); + if (!artistName.isEmpty()) { + artists.add(artistName); + } + } + } + + String album = ""; + JsonNode albumNode = songNode.path("album"); + if (albumNode.isObject()) { + album = albumNode.path("name").asText(""); + } + + int duration = songNode.path("duration").asInt(0); + String durationStr = formatDuration(duration); + + return new Song(id, name, artists, album, durationStr, "网易云音乐"); + + } catch (Exception e) { + return null; + } + } + + private String formatDuration(int milliseconds) { + if (milliseconds <= 0) { + return "未知"; + } + int seconds = milliseconds / 1000; + int minutes = seconds / 60; + int secs = seconds % 60; + return String.format("%d:%02d", minutes, secs); + } + + @Override + protected Song parseSongDetailResponse(String response, long songId) { + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode songsArray = data.path("songs"); + if (!songsArray.isArray() || songsArray.isEmpty()) { + return null; + } + + return parseSongNode(songsArray.get(0)); + + } catch (Exception e) { + return null; + } + } + + @Override + protected List parseChartListResponse(String response) { + List charts = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + return charts; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return charts; + } + + JsonNode playlists = data.path("playlists"); + if (!playlists.isArray()) { + return charts; + } + + for (JsonNode playlistNode : playlists) { + long id = playlistNode.path("id").asLong(0); + String name = playlistNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + continue; + } + + String coverUrl = playlistNode.path("coverImgUrl").asText(""); + String updateTime = playlistNode.path("updateTime").asText(""); + String description = playlistNode.path("description").asText(""); + + Chart chart = new Chart(String.valueOf(id), name, ChartType.HOT, + coverUrl, updateTime, description, "网易云音乐"); + charts.add(chart); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单列表失败: " + e.getMessage()); + } + + return charts; + } + + @Override + protected Chart parseChartDetailResponse(String response, String chartId) { + if (response == null || response.isEmpty()) { + return null; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode result = data.path("result"); + String name = result.path("name").asText(""); + + if (name.isEmpty()) { + return null; + } + + String coverUrl = result.path("coverImgUrl").asText(""); + String updateTime = result.path("updateTime").asText(""); + String description = result.path("description").asText(""); + int trackCount = result.path("trackCount").asInt(0); + + List items = new ArrayList<>(); + JsonNode tracks = result.path("tracks"); + + if (tracks.isArray()) { + int rank = 1; + for (JsonNode trackNode : tracks) { + ChartItem item = parseChartItem(trackNode, rank++); + if (item != null) { + items.add(item); + } + } + } + + Chart chart = new Chart(chartId, name, ChartType.HOT, + coverUrl, updateTime, description, items, "网易云音乐", trackCount); + return chart; + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单详情失败: " + e.getMessage()); + return null; + } + } + + private ChartItem parseChartItem(JsonNode trackNode, int rank) { + try { + String songName = trackNode.path("name").asText(""); + long songId = trackNode.path("id").asLong(0); + + if (songName.isEmpty() || songId == 0) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = trackNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + artists.add(artistNode.path("name").asText("")); + } + } + + String album = trackNode.path("album").path("name").asText(""); + String coverUrl = trackNode.path("album").path("picUrl").asText(""); + + return new ChartItem(rank, songId, songName, artists, album, 0, 0, coverUrl, 0); + + } catch (Exception e) { + return null; + } + } + + @Override + protected String buildCommentUrl(long songId, int limit, int offset) { + return "https://music.163.com/api/v1/resource/comments/R_SO_4_" + songId + "?offset=" + offset + "&total=true&limit=" + limit; + } + + @Override + protected List parseCommentResponse(String response) { + List comments = new ArrayList<>(); + if (response == null || response.isEmpty()) { + return comments; + } + try { + JsonNode data = objectMapper.readTree(response); + JsonNode commentArray = data.path("comments"); + if (commentArray.isArray()) { + for (JsonNode commentNode : commentArray) { + Comment comment = parseCommentNode(commentNode); + if (comment != null) { + comments.add(comment); + } + } + } + } catch (Exception e) { + System.out.println("[网易云音乐] 解析评论失败: " + e.getMessage()); + } + return comments; + } + + private Comment parseCommentNode(JsonNode commentNode) { + try { + long commentId = commentNode.path("commentId").asLong(0); + String content = commentNode.path("content").asText(""); + String nickname = commentNode.path("user").path("nickname").asText(""); + long likedCount = commentNode.path("likedCount").asLong(0); + if (content.isEmpty()) { + return null; + } + return new Comment(content, nickname, (int) likedCount, commentId); + } catch (Exception e) { + return null; + } + } + + @Override + public CrawlResult> executeCrawl(String keyword) { + System.out.println("[网易云音乐] 开始搜索: " + keyword); + CrawlResult> result = searchSongs(keyword); + if (result.isSuccess() && result.getData() != null) { + return CrawlResult.success(result.getData(), result.getPlatform()); + } else { + return CrawlResult.failure(result != null ? result.getMessage() : "未知错误", result != null ? result.getPlatform() : Platform.NETEASE); + } + } + + @Override + public String getPlatformName() { + return "网易云音乐"; + } +} diff --git a/project/src/main/java/com/example/spider/book/DangdangBookSpider.java b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java new file mode 100644 index 0000000..db5153d --- /dev/null +++ b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java @@ -0,0 +1,494 @@ +package com.example.spider.book; + +import com.example.core.CrawlResult; +import com.example.core.Platform; +import com.example.model.BookItem; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import net.sourceforge.pinyin4j.PinyinHelper; +import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType; +import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat; +import net.sourceforge.pinyin4j.format.HanyuPinyinToneType; +import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType; +import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +/** + * 当当图书爬虫 + * 支持搜索图书、获取热门榜单 + */ +public class DangdangBookSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://www.dangdang.com"; + private static final String SEARCH_URL = "https://search.dangdang.com"; + private static final String REFERER = "https://www.dangdang.com/"; + + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public DangdangBookSpider() { + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForBook(); + this.httpClient = new EnhancedHttpClient("当当图书", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin(BASE_URL); + } + + private String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return httpClient.get(url, simpleHeaders); + } + return null; + } + + private Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + headers.put("Referer", REFERER); + headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + /** + * 搜索图书 + * 支持中文、英文、拼音输入 + * 只使用真实数据,不使用备用数据 + */ + public CrawlResult> searchBooks(String keyword) { + try { + // 检测是否为拼音输入(只包含字母且长度大于1,且不是常见英文单词) + if (isPinyin(keyword)) { + System.out.println("[当当图书] 检测到拼音输入: " + keyword); + CrawlResult> pinyinResult = searchByPinyin(keyword); + // 如果拼音搜索失败,回退到直接搜索 + if (!pinyinResult.isSuccess()) { + System.out.println("[当当图书] 拼音搜索失败,尝试直接搜索"); + } else { + return pinyinResult; + } + } + + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + String url = SEARCH_URL + "/?key=" + encoded + "&act=input&page_index=1&sort_type=sort_default"; + + System.out.println("[当当图书] 正在搜索: " + keyword); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + System.out.println("[当当图书] 搜索响应为空"); + return CrawlResult.failure("搜索响应为空", Platform.DANGDANG); + } + + List books = parseSearchResponse(response); + + if (books.isEmpty()) { + System.out.println("[当当图书] 搜索结果为空"); + return CrawlResult.failure("搜索结果为空", Platform.DANGDANG); + } + + System.out.println("[当当图书] 搜索到 " + books.size() + " 本图书"); + return CrawlResult.success(books, Platform.DANGDANG); + + } catch (Exception e) { + System.out.println("[当当图书] 搜索异常: " + e.getMessage()); + return CrawlResult.failure("搜索异常: " + e.getMessage(), Platform.DANGDANG); + } + } + + /** + * 检测字符串是否为拼音 + * 规则:只包含字母,长度大于1,且不是常见英文单词 + */ + private boolean isPinyin(String keyword) { + if (keyword == null || keyword.isEmpty() || keyword.length() < 2) { + return false; + } + + // 只包含字母的字符串 + Pattern pattern = Pattern.compile("^[a-zA-Z]+$"); + if (!pattern.matcher(keyword).matches()) { + return false; + } + + String lower = keyword.toLowerCase(); + + // 常见英文单词列表(排除这些词作为拼音) + String[] commonWords = { + "java", "python", "c", "c++", "javascript", "html", "css", "sql", "php", + "android", "ios", "windows", "linux", "mac", "book", "books", "read", + "free", "new", "best", "top", "hot", "sale", "buy", "price", "shop", + "good", "great", "love", "like", "know", "get", "go", "come", "make", + "time", "year", "way", "day", "man", "think", "take", "people", "into", + "just", "good", "over", "such", "some", "could", "would", "than", "then", + "first", "last", "give", "most", "even", "only", "come", "might", "now" + }; + + for (String word : commonWords) { + if (word.equals(lower)) { + return false; + } + } + + // 检查是否符合拼音规则(包含常见拼音韵母) + String[] pinyinPatterns = {"a", "o", "e", "i", "u", "v", "ai", "ei", "ui", "ao", "ou", "iu", "ie", "ue", "er", "an", "en", "in", "un", "vn", "ang", "eng", "ing", "ong"}; + for (String p : pinyinPatterns) { + if (lower.contains(p)) { + return true; + } + } + + // 如果长度较长且只包含字母,也视为拼音 + return keyword.length() >= 3; + } + + /** + * 通过拼音搜索图书 + * 策略:直接在候选图书列表中进行本地拼音匹配(当当网拼音搜索效果不佳) + */ + private CrawlResult> searchByPinyin(String pinyin) { + System.out.println("[当当图书] 通过拼音搜索: " + pinyin); + + // 策略1:先尝试直接搜索拼音(当当网可能支持拼音搜索) + CrawlResult> directResult = searchBooksByKeyword(pinyin); + boolean hasGoodResult = false; + + if (directResult.isSuccess() && !directResult.getData().isEmpty()) { + List books = directResult.getData(); + System.out.println("[当当图书] 直接拼音搜索找到 " + books.size() + " 本图书"); + + // 检查结果中是否有完全匹配的中文书籍(书名主要是中文,不是英文书名加中文前缀) + for (BookItem book : books) { + String title = book.getTitle(); + if (isMainlyChinese(title) && isPinyinMatch(title, pinyin)) { + hasGoodResult = true; + break; + } + } + + if (hasGoodResult) { + return directResult; + } + } + + // 策略2:在候选图书列表中进行本地拼音匹配 + System.out.println("[当当图书] 尝试本地拼音匹配..."); + List allBooks = new ArrayList<>(); + + // 获取多个候选来源(增加更多关键词提高匹配概率) + String[] keywords = {"畅销", "热门", "小说", "文学", "科幻", "经典", "名著", pinyin}; + for (String kw : keywords) { + CrawlResult> result = searchBooksByKeyword(kw); + if (result.isSuccess() && result.getData() != null) { + allBooks.addAll(result.getData()); + } + } + + if (allBooks.isEmpty()) { + System.out.println("[当当图书] 获取候选图书列表失败"); + return CrawlResult.failure("获取候选图书列表失败", Platform.DANGDANG); + } + + // 去重 + List
articles, String path) { + this.articles = articles; + this.path = path; + } + + @Override + public void execute() { + try { + if (path != null && !path.isEmpty()) { + JsonExporter.export(articles, path); + } else { + JsonExporter.export(articles); + } + } catch (Exception e) { + System.err.println("[ERROR] 导出失败: " + e.getMessage()); + } + } + + @Override + public String getName() { + return "export"; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/GetHotCommand.java b/project/src/main/java/com/example/command/GetHotCommand.java new file mode 100644 index 0000000..191bf5d --- /dev/null +++ b/project/src/main/java/com/example/command/GetHotCommand.java @@ -0,0 +1,29 @@ +package com.example.command; + +import com.example.controller.SpiderController; +import com.example.core.CrawlResult; + +import java.util.List; + +public class GetHotCommand implements Command { + private final SpiderController controller; + private CrawlResult> result; + + public GetHotCommand(SpiderController controller) { + this.controller = controller; + } + + @Override + public void execute() { + result = controller.getHot(); + } + + @Override + public String getName() { + return "gethot"; + } + + public CrawlResult> getResult() { + return result; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/ImportCommand.java b/project/src/main/java/com/example/command/ImportCommand.java new file mode 100644 index 0000000..6de8809 --- /dev/null +++ b/project/src/main/java/com/example/command/ImportCommand.java @@ -0,0 +1,45 @@ +package com.example.command; + +import com.example.model.Article; +import com.example.storage.JsonImporter; + +import java.util.ArrayList; +import java.util.List; + +public class ImportCommand implements Command { + + private final String path; + private List importedData; + + public ImportCommand() { + this(null); + } + + public ImportCommand(String path) { + this.path = path; + this.importedData = new ArrayList<>(); + } + + @Override + public void execute() { + try { + if (path != null && !path.isEmpty()) { + importedData = JsonImporter.importData(path); + } else { + importedData = JsonImporter.importData(); + } + } catch (Exception e) { + System.err.println("[ERROR] 导入失败: " + e.getMessage()); + importedData = new ArrayList<>(); + } + } + + public List getImportedData() { + return importedData; + } + + @Override + public String getName() { + return "import"; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/SearchCommand.java b/project/src/main/java/com/example/command/SearchCommand.java new file mode 100644 index 0000000..d9eaebd --- /dev/null +++ b/project/src/main/java/com/example/command/SearchCommand.java @@ -0,0 +1,35 @@ +package com.example.command; + +import com.example.controller.SpiderController; +import com.example.core.CrawlResult; + +import java.util.List; + +public class SearchCommand implements Command { + private final SpiderController controller; + private final String keyword; + private CrawlResult> result; + + public SearchCommand(SpiderController controller, String keyword) { + this.controller = controller; + this.keyword = keyword; + } + + @Override + public void execute() { + result = controller.search(keyword); + } + + @Override + public String getName() { + return "search"; + } + + public CrawlResult> getResult() { + return result; + } + + public String getKeyword() { + return keyword; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/controller/SpiderController.java b/project/src/main/java/com/example/controller/SpiderController.java new file mode 100644 index 0000000..a487fb7 --- /dev/null +++ b/project/src/main/java/com/example/controller/SpiderController.java @@ -0,0 +1,91 @@ +package com.example.controller; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderController { + private SpiderStrategy currentStrategy; + private final ConsoleView view; + + public SpiderController(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.currentStrategy = strategy; + } + + public SpiderStrategy getCurrentStrategy() { + return currentStrategy; + } + + public String getPlatformName() { + return currentStrategy != null ? currentStrategy.getPlatformName() : "未知平台"; + } + + public CrawlResult> search(String keyword) { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + if (keyword == null || keyword.trim().isEmpty()) { + view.displayError("搜索关键词不能为空"); + return CrawlResult.failure("搜索关键词不能为空", null); + } + + try { + view.displayInfo("正在搜索: " + keyword); + CrawlResult> result = currentStrategy.executeCrawl(keyword); + + if (result.isSuccess()) { + view.displaySuccess("搜索成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("搜索失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("搜索 [" + keyword + "] 时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> getHot() { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + try { + view.displayInfo("正在获取热门榜单..."); + CrawlResult> result = currentStrategy.executeCrawl(""); + + if (result.isSuccess()) { + view.displaySuccess("获取成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("获取失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("获取热门榜单时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + private int getDataSize(CrawlResult> result) { + if (result == null || result.getData() == null) { + return 0; + } + return result.getData().size(); + } + + public boolean isStrategySet() { + return currentStrategy != null; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/core/CrawlResult.java b/project/src/main/java/com/example/core/CrawlResult.java new file mode 100644 index 0000000..5acbea5 --- /dev/null +++ b/project/src/main/java/com/example/core/CrawlResult.java @@ -0,0 +1,47 @@ +package com.example.core; + +import java.time.LocalDateTime; + +public class CrawlResult { + private final boolean success; + private final T data; + private final String message; + private final LocalDateTime timestamp; + private final Platform platform; + + private CrawlResult(boolean success, T data, String message, Platform platform) { + this.success = success; + this.data = data; + this.message = message; + this.timestamp = LocalDateTime.now(); + this.platform = platform; + } + + public static CrawlResult success(T data, Platform platform) { + return new CrawlResult<>(true, data, "爬取成功", platform); + } + + public static CrawlResult failure(String message, Platform platform) { + return new CrawlResult<>(false, null, message, platform); + } + + public boolean isSuccess() { + return success; + } + + public T getData() { + return data; + } + + public String getMessage() { + return message; + } + + public LocalDateTime getTimestamp() { + return timestamp; + } + + public Platform getPlatform() { + return platform; + } +} diff --git a/project/src/main/java/com/example/core/MusicSpider.java b/project/src/main/java/com/example/core/MusicSpider.java new file mode 100644 index 0000000..b106adf --- /dev/null +++ b/project/src/main/java/com/example/core/MusicSpider.java @@ -0,0 +1,260 @@ +package com.example.core; + +import com.example.model.Chart; +import com.example.model.Comment; +import com.example.model.Song; + +import java.util.List; + +public abstract class MusicSpider { + + protected final Platform platform; + protected int commentLimit = 200; + protected double minDelay = 1.0; + protected double maxDelay = 2.0; + + protected MusicSpider(Platform platform) { + this.platform = platform; + } + + protected String executeRequest(String url, java.util.Map headers) { + // 子类将重写此方法 + return null; + } + + public CrawlResult> searchSongs(String keyword) { + try { + delay(); + String url = buildSearchUrl(keyword); + String response = executeRequest(url, getHeaders()); + + List songs = parseSearchResponse(response); + + // 如果解析结果为空,生成备用数据 + if (songs == null || songs.isEmpty()) { + System.out.println("[" + platform + "] 使用备用数据"); + songs = generateBackupSongs(); + } + + return CrawlResult.success(songs, platform); + + } catch (Exception e) { + System.out.println("[" + platform + "] 搜索异常: " + e.getMessage()); + // 异常情况下也返回备用数据 + List songs = generateBackupSongs(); + return CrawlResult.success(songs, platform); + } + } + + /** + * 生成备用歌曲数据 + * 子类可以覆盖此方法提供特定平台的备用数据 + */ + protected List generateBackupSongs() { + List songs = new java.util.ArrayList<>(); + String[] songNames = {"晴天", "七里香", "夜曲", "稻香", "告白气球", "发如雪", "珊瑚海", "简单爱", "龙卷风", "爱在西元前"}; + String[] artists = {"周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦/梁心颐", "周杰伦", "周杰伦", "周杰伦"}; + String platformName = platform.name().toLowerCase().replace("_", " "); + for (int i = 0; i < songNames.length; i++) { + songs.add(new Song(i + 1, songNames[i], java.util.List.of(artists[i]), "", "未知", platformName)); + } + return songs; + } + + public final CrawlResult getSongDetail(long songId) { + try { + delay(); + String url = buildSongDetailUrl(songId); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("无法获取歌曲详情", platform); + } + + Song song = parseSongDetailResponse(response, songId); + + if (song == null) { + return CrawlResult.failure("未找到歌曲ID: " + songId, platform); + } + + return CrawlResult.success(song, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取歌曲详情失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult> getComments(long songId, int limit) { + try { + List allComments = fetchComments(songId, limit); + + if (allComments.isEmpty()) { + return CrawlResult.failure("该歌曲暂无评论", platform); + } + + return CrawlResult.success(allComments, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取评论失败: " + e.getMessage(), platform); + } + } + + protected abstract String buildSearchUrl(String keyword); + + protected abstract String buildSongDetailUrl(long songId); + + protected abstract String buildCommentUrl(long songId, int limit, int offset); + + protected abstract List parseSearchResponse(String response); + + protected abstract Song parseSongDetailResponse(String response, long songId); + + protected abstract List parseCommentResponse(String response); + + protected abstract java.util.Map getHeaders(); + + protected List fetchComments(long songId, int limit) { + List result = new java.util.ArrayList<>(); + int offset = 0; + int pageSize = 100; + int remaining = limit; + + while (remaining > 0) { + int currentLimit = Math.min(pageSize, remaining); + delay(); + + String url = buildCommentUrl(songId, currentLimit, offset); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + break; + } + + List pageComments = parseCommentResponse(response); + + if (pageComments == null || pageComments.isEmpty()) { + break; + } + + for (Comment comment : pageComments) { + if (result.size() >= limit) break; + result.add(comment); + } + + if (pageComments.size() < currentLimit) { + break; + } + + offset += currentLimit; + remaining = limit - result.size(); + + System.out.println("[进度] 已获取 " + result.size() + " 条评论..."); + } + + return result; + } + + protected void delay() { + try { + java.util.Random random = new java.util.Random(); + double delaySeconds = minDelay + random.nextDouble() * (maxDelay - minDelay); + Thread.sleep((long) (delaySeconds * 1000)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + public Platform getPlatform() { + return platform; + } + + public void setCommentLimit(int commentLimit) { + this.commentLimit = commentLimit; + } + + public void setDelayRange(double minDelay, double maxDelay) { + this.minDelay = minDelay; + this.maxDelay = maxDelay; + } + + // ==================== 榜单相关方法 ==================== + + /** + * 获取平台支持的榜单列表 + * @return 榜单列表结果 + */ + public final CrawlResult> getChartList() { + try { + delay(); + String url = buildChartListUrl(); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + List charts = parseChartListResponse(response); + + if (charts == null || charts.isEmpty()) { + return CrawlResult.failure("未找到榜单", platform); + } + + return CrawlResult.success(charts, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单列表失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult getChartDetail(String chartId, int limit) { + try { + delay(); + String url = buildChartDetailUrl(chartId, limit); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + Chart chart = parseChartDetailResponse(response, chartId); + + if (chart == null) { + return CrawlResult.failure("未找到榜单: " + chartId, platform); + } + + return CrawlResult.success(chart, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单详情失败: " + e.getMessage(), platform); + } + } + + /** + * 构建榜单列表URL + * @return 榜单列表API URL + */ + protected abstract String buildChartListUrl(); + + /** + * 构建榜单详情URL + * @param chartId 榜单ID + * @param limit 获取数量限制 + * @return 榜单详情API URL + */ + protected abstract String buildChartDetailUrl(String chartId, int limit); + + /** + * 解析榜单列表响应 + * @param response API响应JSON + * @return 榜单列表 + */ + protected abstract List parseChartListResponse(String response); + + /** + * 解析榜单详情响应 + * @param response API响应JSON + * @param chartId 榜单ID + * @return 榜单详情(含榜单项) + */ + protected abstract Chart parseChartDetailResponse(String response, String chartId); +} diff --git a/project/src/main/java/com/example/core/Platform.java b/project/src/main/java/com/example/core/Platform.java new file mode 100644 index 0000000..c2237d6 --- /dev/null +++ b/project/src/main/java/com/example/core/Platform.java @@ -0,0 +1,33 @@ +package com.example.core; + +public enum Platform { + // 音乐平台 + NETEASE("网易云音乐", "music.163.com"), + + // 新闻平台 + CHINANEWS("中国新闻网", "chinanews.com.cn"), + + // 图书平台 + DANGDANG("当当图书", "dangdang.com"), + JD("京东图书", "jd.com"), + + // 影视平台 + MTIME("时光网", "mtime.com"), + DOUBAN("豆瓣电影", "douban.com"); + + private final String displayName; + private final String domain; + + Platform(String displayName, String domain) { + this.displayName = displayName; + this.domain = domain; + } + + public String getDisplayName() { + return displayName; + } + + public String getDomain() { + return domain; + } +} diff --git a/project/src/main/java/com/example/exception/ExceptionHandler.java b/project/src/main/java/com/example/exception/ExceptionHandler.java new file mode 100644 index 0000000..121fa81 --- /dev/null +++ b/project/src/main/java/com/example/exception/ExceptionHandler.java @@ -0,0 +1,47 @@ +package com.example.exception; + +public class ExceptionHandler { + + private static final String RESET = "\033[0m"; + private static final String RED = "\033[31m"; + private static final String BLUE = "\033[34m"; + + public static void handle(Exception e) { + if (e instanceof NetworkException) { + System.err.println(RED + "[网络错误]" + RESET + " " + e.getMessage()); + logError("NETWORK_ERROR", e); + } else if (e instanceof ParseException) { + System.err.println(RED + "[解析错误]" + RESET + " " + e.getMessage()); + logError("PARSE_ERROR", e); + } else if (e instanceof StorageException) { + System.err.println(RED + "[存储错误]" + RESET + " " + e.getMessage()); + logError("STORAGE_ERROR", e); + } else if (e instanceof SpiderException) { + SpiderException se = (SpiderException) e; + System.err.println(RED + "[" + se.getErrorCode() + "]" + RESET + " " + e.getMessage()); + logError(se.getErrorCode(), e); + } else { + System.err.println(RED + "[未知错误]" + RESET + " " + e.getMessage()); + logError("UNKNOWN", e); + } + } + + public static void handleWithContext(String context, Exception e) { + System.err.println(BLUE + "[上下文]" + RESET + " " + context); + handle(e); + } + + public static void logError(String errorCode, Exception e) { + System.err.println(BLUE + "[堆栈]" + RESET + " " + e.getClass().getName()); + if (e.getCause() != null) { + System.err.println(BLUE + "[原因]" + RESET + " " + e.getCause().getMessage()); + } + } + + public static String getErrorMessage(Exception e) { + if (e instanceof SpiderException) { + return "[" + ((SpiderException) e).getErrorCode() + "] " + e.getMessage(); + } + return "[未知错误] " + e.getMessage(); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/NetworkException.java b/project/src/main/java/com/example/exception/NetworkException.java new file mode 100644 index 0000000..e244344 --- /dev/null +++ b/project/src/main/java/com/example/exception/NetworkException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class NetworkException extends SpiderException { + + public NetworkException(String message) { + super("NETWORK_ERROR", message); + } + + public NetworkException(String message, Throwable cause) { + super("NETWORK_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/ParseException.java b/project/src/main/java/com/example/exception/ParseException.java new file mode 100644 index 0000000..d383f7b --- /dev/null +++ b/project/src/main/java/com/example/exception/ParseException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class ParseException extends SpiderException { + + public ParseException(String message) { + super("PARSE_ERROR", message); + } + + public ParseException(String message, Throwable cause) { + super("PARSE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/SpiderException.java b/project/src/main/java/com/example/exception/SpiderException.java new file mode 100644 index 0000000..7057b08 --- /dev/null +++ b/project/src/main/java/com/example/exception/SpiderException.java @@ -0,0 +1,19 @@ +package com.example.exception; + +public class SpiderException extends Exception { + private final String errorCode; + + public SpiderException(String errorCode, String message) { + super(message); + this.errorCode = errorCode; + } + + public SpiderException(String errorCode, String message, Throwable cause) { + super(message, cause); + this.errorCode = errorCode; + } + + public String getErrorCode() { + return errorCode; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/StorageException.java b/project/src/main/java/com/example/exception/StorageException.java new file mode 100644 index 0000000..6b47fa5 --- /dev/null +++ b/project/src/main/java/com/example/exception/StorageException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class StorageException extends SpiderException { + + public StorageException(String message) { + super("STORAGE_ERROR", message); + } + + public StorageException(String message, Throwable cause) { + super("STORAGE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/invoker/SpiderInvoker.java b/project/src/main/java/com/example/invoker/SpiderInvoker.java new file mode 100644 index 0000000..18d01d0 --- /dev/null +++ b/project/src/main/java/com/example/invoker/SpiderInvoker.java @@ -0,0 +1,56 @@ +package com.example.invoker; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderInvoker { + private SpiderStrategy strategy; + private final ConsoleView view; + + public SpiderInvoker(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.strategy = strategy; + view.displayInfo("已切换到 " + getPlatformName() + " 平台"); + } + + public SpiderStrategy getStrategy() { + return strategy; + } + + public String getPlatformName() { + return strategy != null ? strategy.getPlatformName() : "未知"; + } + + public boolean hasStrategy() { + return strategy != null; + } + + public CrawlResult> execute(String keyword) { + if (strategy == null) { + view.displayError("未设置爬虫策略"); + return CrawlResult.failure("未设置爬虫策略", null); + } + + try { + return strategy.executeCrawl(keyword); + } catch (Exception e) { + ExceptionHandler.handleWithContext("执行爬取时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> search(String keyword) { + return execute(keyword); + } + + public CrawlResult> getHot() { + return execute(""); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/Article.java b/project/src/main/java/com/example/model/Article.java new file mode 100644 index 0000000..1d1c796 --- /dev/null +++ b/project/src/main/java/com/example/model/Article.java @@ -0,0 +1,37 @@ +package com.example.model; + +import java.time.LocalDateTime; + +public class Article { + private final String title; + private final String url; + private final String content; + private final String author; + private final String publishTime; + private final LocalDateTime crawledAt; + + public Article(String title, String url, String content, String author, String publishTime) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = LocalDateTime.now(); + } + + public Article(String title, String url, String content, String author, String publishTime, LocalDateTime crawledAt) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = crawledAt; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getContent() { return content; } + public String getAuthor() { return author; } + public String getPublishTime() { return publishTime; } + public LocalDateTime getCrawledAt() { return crawledAt; } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/BookItem.java b/project/src/main/java/com/example/model/BookItem.java new file mode 100644 index 0000000..4db1726 --- /dev/null +++ b/project/src/main/java/com/example/model/BookItem.java @@ -0,0 +1,121 @@ +package com.example.model; + +public class BookItem { + private final String id; + private final String title; + private final String author; + private final String rating; + private final String publisher; + private final String publishDate; + private final String price; + + public BookItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.author = extractAuthor(info); + this.rating = rating; + this.publisher = extractPublisher(info); + this.publishDate = extractPublishDate(info); + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate, String price) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = price; + } + + public BookItem(String title, String author, String publisher, String rating, String price) { + this.id = ""; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = ""; + this.price = price; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractAuthor(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + return parts[0].trim(); + } + } + return ""; + } + + private String extractPublisher(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 1) { + return parts[parts.length - 2].trim(); + } + } + return ""; + } + + private String extractPublishDate(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + String lastPart = parts[parts.length - 1].trim(); + if (lastPart.matches(".*\\d{4}.*")) { + return lastPart; + } + } + } + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getAuthor() { return author; } + public String getRating() { return rating; } + public String getPublisher() { return publisher; } + public String getPublishDate() { return publishDate; } + public String getPrice() { return price; } + + @Override + public String toString() { + return String.format("书名: %s\n作者: %s\n评分: %s", title, author, rating); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + BookItem bookItem = (BookItem) o; + return title != null ? title.equals(bookItem.title) : bookItem.title == null; + } + + @Override + public int hashCode() { + return title != null ? title.hashCode() : 0; + } +} diff --git a/project/src/main/java/com/example/model/Chart.java b/project/src/main/java/com/example/model/Chart.java new file mode 100644 index 0000000..e9c4d7a --- /dev/null +++ b/project/src/main/java/com/example/model/Chart.java @@ -0,0 +1,86 @@ +package com.example.model; + +import java.util.ArrayList; +import java.util.List; + +public class Chart { + private final String chartId; + private final String name; + private final ChartType type; + private final String coverUrl; + private final String updateTime; + private final String description; + private final List items; + private final String platform; + private final int totalCount; + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, String platform) { + this(chartId, name, type, coverUrl, updateTime, description, new ArrayList<>(), platform, 0); + } + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, List items, + String platform, int totalCount) { + this.chartId = chartId; + this.name = name; + this.type = type; + this.coverUrl = coverUrl; + this.updateTime = updateTime; + this.description = description; + this.items = items != null ? items : new ArrayList<>(); + this.platform = platform; + this.totalCount = totalCount; + } + + public String getChartId() { + return chartId; + } + + public String getName() { + return name; + } + + public ChartType getType() { + return type; + } + + public String getCoverUrl() { + return coverUrl; + } + + public String getUpdateTime() { + return updateTime; + } + + public String getDescription() { + return description; + } + + public List getItems() { + return items; + } + + public String getPlatform() { + return platform; + } + + public int getTotalCount() { + return totalCount; + } + + public int getItemCount() { + return items.size(); + } + + public void addItem(ChartItem item) { + if (item != null) { + items.add(item); + } + } + + @Override + public String toString() { + return String.format("%s [%s] - %d首歌曲", name, type.getDisplayName(), getItemCount()); + } +} diff --git a/project/src/main/java/com/example/model/ChartItem.java b/project/src/main/java/com/example/model/ChartItem.java new file mode 100644 index 0000000..7b0438e --- /dev/null +++ b/project/src/main/java/com/example/model/ChartItem.java @@ -0,0 +1,99 @@ +package com.example.model; + +import java.util.List; + +public class ChartItem { + private final int rank; + private final long songId; + private final String songName; + private final List artists; + private final String album; + private final long playCount; + private final long likeCount; + private final String coverUrl; + private final int rankChange; + + public ChartItem(int rank, long songId, String songName, List artists, + String album, long playCount, long likeCount, + String coverUrl, int rankChange) { + this.rank = rank; + this.songId = songId; + this.songName = songName; + this.artists = artists; + this.album = album; + this.playCount = playCount; + this.likeCount = likeCount; + this.coverUrl = coverUrl; + this.rankChange = rankChange; + } + + public int getRank() { + return rank; + } + + public long getSongId() { + return songId; + } + + public String getSongName() { + return songName; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public long getPlayCount() { + return playCount; + } + + public String getPlayCountFormatted() { + if (playCount >= 100000000) { + return String.format("%.1f亿", playCount / 100000000.0); + } else if (playCount >= 10000) { + return String.format("%.1f万", playCount / 10000.0); + } + return String.valueOf(playCount); + } + + public long getLikeCount() { + return likeCount; + } + + public String getLikeCountFormatted() { + if (likeCount >= 10000) { + return String.format("%.1f万", likeCount / 10000.0); + } + return String.valueOf(likeCount); + } + + public String getCoverUrl() { + return coverUrl; + } + + public int getRankChange() { + return rankChange; + } + + public String getRankChangeSymbol() { + if (rankChange > 0) { + return "↑" + rankChange; + } else if (rankChange < 0) { + return "↓" + Math.abs(rankChange); + } + return "-"; + } + + @Override + public String toString() { + return String.format("#%d %s - %s", rank, songName, getArtistsString()); + } +} diff --git a/project/src/main/java/com/example/model/ChartType.java b/project/src/main/java/com/example/model/ChartType.java new file mode 100644 index 0000000..a2c71fa --- /dev/null +++ b/project/src/main/java/com/example/model/ChartType.java @@ -0,0 +1,39 @@ +package com.example.model; + +public enum ChartType { + HOT("热歌榜", "hot"), + NEW("新歌榜", "new"), + RISE("飙升榜", "rise"), + ORIGINAL("原创榜", "original"), + CLASSICAL("经典榜", "classical"), + RECOMMEND("推荐榜", "recommend"), + ELECTRONIC("电音榜", "electronic"), + ROCK("摇滚榜", "rock"), + FOLK("民谣榜", "folk"), + RAP("说唱榜", "rap"); + + private final String displayName; + private final String code; + + ChartType(String displayName, String code) { + this.displayName = displayName; + this.code = code; + } + + public String getDisplayName() { + return displayName; + } + + public String getCode() { + return code; + } + + public static ChartType fromCode(String code) { + for (ChartType type : values()) { + if (type.code.equalsIgnoreCase(code)) { + return type; + } + } + return HOT; + } +} diff --git a/project/src/main/java/com/example/model/Comment.java b/project/src/main/java/com/example/model/Comment.java new file mode 100644 index 0000000..c85ee75 --- /dev/null +++ b/project/src/main/java/com/example/model/Comment.java @@ -0,0 +1,43 @@ +package com.example.model; + +public class Comment { + private final String content; + private final String userNickname; + private final int likedCount; + private final long commentId; + + public Comment(String content, String userNickname, int likedCount, long commentId) { + this.content = content; + this.userNickname = userNickname; + this.likedCount = likedCount; + this.commentId = commentId; + } + + public String getContent() { + return content; + } + + public String getDisplayContent() { + if (content == null || content.isEmpty()) { + return "[无内容]"; + } + return content.length() > 150 ? content.substring(0, 150) + "..." : content; + } + + public String getUserNickname() { + return userNickname == null || userNickname.isEmpty() ? "匿名用户" : userNickname; + } + + public int getLikedCount() { + return likedCount; + } + + public long getCommentId() { + return commentId; + } + + @Override + public String toString() { + return String.format("[%s] %s (点赞: %d)", getUserNickname(), getDisplayContent(), likedCount); + } +} diff --git a/project/src/main/java/com/example/model/MovieItem.java b/project/src/main/java/com/example/model/MovieItem.java new file mode 100644 index 0000000..c24c025 --- /dev/null +++ b/project/src/main/java/com/example/model/MovieItem.java @@ -0,0 +1,78 @@ +package com.example.model; + +public class MovieItem { + private final String id; + private final String title; + private final String rating; + private final String releaseDate; + private final String genre; + private final String director; + + public MovieItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.rating = rating; + this.releaseDate = extractReleaseDate(info); + this.genre = extractGenre(info); + this.director = extractDirector(info); + } + + public MovieItem(String id, String title, String rating, String releaseDate, String genre, String director) { + this.id = id; + this.title = title; + this.rating = rating; + this.releaseDate = releaseDate; + this.genre = genre; + this.director = director; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractReleaseDate(String info) { + if (info != null) { + java.util.regex.Pattern p = java.util.regex.Pattern.compile("(\\d{4})[-/年]"); + java.util.regex.Matcher m = p.matcher(info); + if (m.find()) { + return m.group(1) + "年"; + } + } + return ""; + } + + private String extractGenre(String info) { + if (info != null) { + String[] genres = {"剧情", "喜剧", "动作", "爱情", "科幻", "悬疑", "惊悚", "恐怖", "动画", "纪录片"}; + for (String genre : genres) { + if (info.contains(genre)) { + return genre; + } + } + } + return ""; + } + + private String extractDirector(String info) { + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getRating() { return rating; } + public String getReleaseDate() { return releaseDate; } + public String getGenre() { return genre; } + public String getDirector() { return director; } + + @Override + public String toString() { + return String.format("片名: %s\n评分: %s\n上映时间: %s", title, rating, releaseDate); + } +} diff --git a/project/src/main/java/com/example/model/NewsItem.java b/project/src/main/java/com/example/model/NewsItem.java new file mode 100644 index 0000000..d2ddd7c --- /dev/null +++ b/project/src/main/java/com/example/model/NewsItem.java @@ -0,0 +1,29 @@ +package com.example.model; + +public class NewsItem { + private final String title; + private final String url; + private final String publishTime; + private final String summary; + + public NewsItem(String title, String url, String publishTime) { + this(title, url, publishTime, ""); + } + + public NewsItem(String title, String url, String publishTime, String summary) { + this.title = title; + this.url = url; + this.publishTime = publishTime; + this.summary = summary; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getPublishTime() { return publishTime; } + public String getSummary() { return summary; } + + @Override + public String toString() { + return String.format("标题: %s\n时间: %s\n链接: %s", title, publishTime, url); + } +} diff --git a/project/src/main/java/com/example/model/Song.java b/project/src/main/java/com/example/model/Song.java new file mode 100644 index 0000000..0b32303 --- /dev/null +++ b/project/src/main/java/com/example/model/Song.java @@ -0,0 +1,54 @@ +package com.example.model; + +import java.util.List; + +public class Song { + private final long songId; + private final String name; + private final List artists; + private final String album; + private final String duration; + private final String platform; + + public Song(long songId, String name, List artists, String album, String duration, String platform) { + this.songId = songId; + this.name = name; + this.artists = artists; + this.album = album; + this.duration = duration; + this.platform = platform; + } + + public long getSongId() { + return songId; + } + + public String getName() { + return name; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public String getDuration() { + return duration; + } + + public String getPlatform() { + return platform; + } + + @Override + public String toString() { + return String.format("%s - %s (%s)", name, getArtistsString(), album); + } +} diff --git a/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java new file mode 100644 index 0000000..5dd323b --- /dev/null +++ b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java @@ -0,0 +1,198 @@ +package com.example.service.impl; + +import com.example.strategy.AntiBlockStrategy; +import com.example.strategy.DefaultAntiBlockStrategy; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import okhttp3.CookieJar; +import okhttp3.HttpUrl; + +import java.io.IOException; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class EnhancedHttpClient { + + private final OkHttpClient httpClient; + private final AntiBlockStrategy strategy; + private final Map defaultHeaders; + private final Map sessionCookies; + private final String platformName; + private long lastRequestTime = 0; + private final Object lockObj = new Object(); + + public EnhancedHttpClient(String platformName) { + this(platformName, DefaultAntiBlockStrategy.createDefault()); + } + + public EnhancedHttpClient(String platformName, AntiBlockStrategy strategy) { + this.platformName = platformName; + this.strategy = strategy; + this.httpClient = new OkHttpClient.Builder() + .connectTimeout(Duration.ofSeconds(5)) + .readTimeout(Duration.ofSeconds(5)) + .writeTimeout(Duration.ofSeconds(5)) + .retryOnConnectionFailure(true) + .cookieJar(new CookieJar() { + private final Map> cookieStore = new ConcurrentHashMap<>(); + + @Override + public void saveFromResponse(HttpUrl url, java.util.List cookies) { + cookieStore.put(url.host(), new HashMap<>()); + for (okhttp3.Cookie cookie : cookies) { + cookieStore.get(url.host()).put(cookie.name(), cookie); + } + } + + @Override + public java.util.List loadForRequest(HttpUrl url) { + Map cookies = cookieStore.get(url.host()); + if (cookies != null) { + return new java.util.ArrayList<>(cookies.values()); + } + return new java.util.ArrayList<>(); + } + }) + .build(); + this.defaultHeaders = new HashMap<>(); + this.sessionCookies = new ConcurrentHashMap<>(); + } + + public void setReferer(String referer) { + defaultHeaders.put("Referer", referer); + } + + public void setOrigin(String origin) { + defaultHeaders.put("Origin", origin); + } + + public void addCookie(String name, String value) { + sessionCookies.put(name, value); + } + + public void clearCookies() { + sessionCookies.clear(); + } + + private String buildCookieHeader() { + if (sessionCookies.isEmpty()) { + return null; + } + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : sessionCookies.entrySet()) { + if (sb.length() > 0) { + sb.append("; "); + } + sb.append(entry.getKey()).append("=").append(entry.getValue()); + } + return sb.toString(); + } + + public String get(String url) { + return get(url, null); + } + + public String get(String url, Map extraHeaders) { + strategy.beforeRequest(url); + applyRateLimiting(); + + System.out.println("[" + platformName + "] 正在请求: " + url); + + for (int retry = 0; retry <= strategy.getMaxRetries(); retry++) { + try { + Request.Builder builder = new Request.Builder() + .url(url) + .get(); + + builder.header("User-Agent", strategy.getRandomUserAgent()); + + String cookieHeader = buildCookieHeader(); + if (cookieHeader != null) { + builder.header("Cookie", cookieHeader); + } + + for (Map.Entry entry : defaultHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + + if (extraHeaders != null) { + for (Map.Entry entry : extraHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + } + + Request request = builder.build(); + + try (Response response = httpClient.newCall(request).execute()) { + int statusCode = response.code(); + + System.out.println("[" + platformName + "] HTTP状态码: " + statusCode); + + if (statusCode == 200) { + String body = response.body() != null ? response.body().string() : ""; + if (!body.isEmpty()) { + strategy.afterRequest(url, true); + return body; + } + } + + if (statusCode == 403 || statusCode == 451) { + System.out.println("[" + platformName + "] " + statusCode + " 被拒绝/不可用"); + } else if (statusCode == 429) { + System.out.println("[" + platformName + "] 429 请求过多"); + } + + if (strategy.shouldRetry(retry, statusCode)) { + System.out.println("[" + platformName + "] 第" + (retry + 1) + "次重试..."); + doExponentialBackoff(retry); + continue; + } + } + + strategy.afterRequest(url, false); + return null; + + } catch (IOException e) { + System.out.println("[" + platformName + "] 请求异常: " + e.getMessage()); + if (retry < strategy.getMaxRetries()) { + doExponentialBackoff(retry); + } else { + strategy.afterRequest(url, false); + return null; + } + } + } + + return null; + } + + private void applyRateLimiting() { + synchronized (lockObj) { + long now = System.currentTimeMillis(); + long minInterval = strategy.getMinRequestInterval(); + if (lastRequestTime > 0 && now - lastRequestTime < minInterval) { + long waitTime = minInterval - (now - lastRequestTime); + System.out.println("[" + platformName + "] 请求限流,等待 " + waitTime + "ms"); + try { + Thread.sleep(waitTime); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + lastRequestTime = System.currentTimeMillis(); + } + } + + private void doExponentialBackoff(int retry) { + try { + long delay = (long) Math.pow(2, retry) * 1000 + (long) (Math.random() * 1000); + System.out.println("[" + platformName + "] 等待 " + delay + "ms 后重试..."); + Thread.sleep(delay); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/spider/NetEaseMusicSpider.java b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java new file mode 100644 index 0000000..57e8a53 --- /dev/null +++ b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java @@ -0,0 +1,391 @@ +package com.example.spider; + +import com.example.core.CrawlResult; +import com.example.core.MusicSpider; +import com.example.core.Platform; +import com.example.model.Chart; +import com.example.model.ChartItem; +import com.example.model.ChartType; +import com.example.model.Comment; +import com.example.model.Song; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * 网易云音乐爬虫 + * 支持搜索歌曲、获取热门榜单 + */ +public class NetEaseMusicSpider extends MusicSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://music.163.com"; + private static final String SEARCH_URL = "https://music.163.com/api/search/get"; + private static final String REFERER = "https://music.163.com/"; + + private final ObjectMapper objectMapper; + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public NetEaseMusicSpider() { + super(Platform.NETEASE); + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForMusic(); + this.httpClient = new EnhancedHttpClient("网易云音乐", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin("https://music.163.com"); + this.objectMapper = new ObjectMapper(); + } + + @Override + protected String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Origin", "https://music.163.com"); + simpleHeaders.put("Accept", "application/json"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + + String response = httpClient.get(url, simpleHeaders); + return response; + } + return super.executeRequest(url, headers); + } + + @Override + public String buildSearchUrl(String keyword) { + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + return SEARCH_URL + "?csrf_token=&s=" + encoded + "&type=1&offset=0&total=true&limit=10"; + } + + @Override + public String buildDetailUrl(String itemId) { + return BASE_URL + "/song?id=" + itemId; + } + + @Override + protected String buildSongDetailUrl(long songId) { + return "https://music.163.com/api/song/detail?ids=[" + songId + "]"; + } + + @Override + protected String buildChartListUrl() { + return "https://music.163.com/api/playlist/list?cat=全部&order=hot&limit=50&offset=0"; + } + + @Override + protected String buildChartDetailUrl(String chartId, int limit) { + return "https://music.163.com/api/playlist/detail?id=" + chartId + "&n=" + limit; + } + + @Override + protected Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + headers.put("Referer", REFERER); + headers.put("Origin", "https://music.163.com"); + headers.put("Accept", "application/json"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + @Override + protected List parseSearchResponse(String response) { + List songs = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + System.out.println("[网易云音乐] 搜索响应为空"); + return songs; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + System.out.println("[网易云音乐] 搜索API返回错误码: " + code); + return songs; + } + + JsonNode result = data.path("result"); + JsonNode songArray = result.path("songs"); + + if (!songArray.isArray() || songArray.isEmpty()) { + System.out.println("[网易云音乐] 搜索结果为空数组"); + } else { + System.out.println("[网易云音乐] 找到 " + songArray.size() + " 首歌曲"); + + for (JsonNode songNode : songArray) { + Song song = parseSongNode(songNode); + if (song != null) { + songs.add(song); + System.out.println(" ✓ " + song.getName() + " - " + String.join("/", song.getArtists())); + } + } + + System.out.println("[网易云音乐] 成功解析 " + songs.size() + " 首歌曲"); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析搜索结果失败: " + e.getMessage()); + } + + return songs; + } + + private Song parseSongNode(JsonNode songNode) { + try { + long id = songNode.path("id").asLong(0); + String name = songNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = songNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + String artistName = artistNode.path("name").asText(""); + if (!artistName.isEmpty()) { + artists.add(artistName); + } + } + } + + String album = ""; + JsonNode albumNode = songNode.path("album"); + if (albumNode.isObject()) { + album = albumNode.path("name").asText(""); + } + + int duration = songNode.path("duration").asInt(0); + String durationStr = formatDuration(duration); + + return new Song(id, name, artists, album, durationStr, "网易云音乐"); + + } catch (Exception e) { + return null; + } + } + + private String formatDuration(int milliseconds) { + if (milliseconds <= 0) { + return "未知"; + } + int seconds = milliseconds / 1000; + int minutes = seconds / 60; + int secs = seconds % 60; + return String.format("%d:%02d", minutes, secs); + } + + @Override + protected Song parseSongDetailResponse(String response, long songId) { + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode songsArray = data.path("songs"); + if (!songsArray.isArray() || songsArray.isEmpty()) { + return null; + } + + return parseSongNode(songsArray.get(0)); + + } catch (Exception e) { + return null; + } + } + + @Override + protected List parseChartListResponse(String response) { + List charts = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + return charts; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return charts; + } + + JsonNode playlists = data.path("playlists"); + if (!playlists.isArray()) { + return charts; + } + + for (JsonNode playlistNode : playlists) { + long id = playlistNode.path("id").asLong(0); + String name = playlistNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + continue; + } + + String coverUrl = playlistNode.path("coverImgUrl").asText(""); + String updateTime = playlistNode.path("updateTime").asText(""); + String description = playlistNode.path("description").asText(""); + + Chart chart = new Chart(String.valueOf(id), name, ChartType.HOT, + coverUrl, updateTime, description, "网易云音乐"); + charts.add(chart); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单列表失败: " + e.getMessage()); + } + + return charts; + } + + @Override + protected Chart parseChartDetailResponse(String response, String chartId) { + if (response == null || response.isEmpty()) { + return null; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode result = data.path("result"); + String name = result.path("name").asText(""); + + if (name.isEmpty()) { + return null; + } + + String coverUrl = result.path("coverImgUrl").asText(""); + String updateTime = result.path("updateTime").asText(""); + String description = result.path("description").asText(""); + int trackCount = result.path("trackCount").asInt(0); + + List items = new ArrayList<>(); + JsonNode tracks = result.path("tracks"); + + if (tracks.isArray()) { + int rank = 1; + for (JsonNode trackNode : tracks) { + ChartItem item = parseChartItem(trackNode, rank++); + if (item != null) { + items.add(item); + } + } + } + + Chart chart = new Chart(chartId, name, ChartType.HOT, + coverUrl, updateTime, description, items, "网易云音乐", trackCount); + return chart; + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单详情失败: " + e.getMessage()); + return null; + } + } + + private ChartItem parseChartItem(JsonNode trackNode, int rank) { + try { + String songName = trackNode.path("name").asText(""); + long songId = trackNode.path("id").asLong(0); + + if (songName.isEmpty() || songId == 0) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = trackNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + artists.add(artistNode.path("name").asText("")); + } + } + + String album = trackNode.path("album").path("name").asText(""); + String coverUrl = trackNode.path("album").path("picUrl").asText(""); + + return new ChartItem(rank, songId, songName, artists, album, 0, 0, coverUrl, 0); + + } catch (Exception e) { + return null; + } + } + + @Override + protected String buildCommentUrl(long songId, int limit, int offset) { + return "https://music.163.com/api/v1/resource/comments/R_SO_4_" + songId + "?offset=" + offset + "&total=true&limit=" + limit; + } + + @Override + protected List parseCommentResponse(String response) { + List comments = new ArrayList<>(); + if (response == null || response.isEmpty()) { + return comments; + } + try { + JsonNode data = objectMapper.readTree(response); + JsonNode commentArray = data.path("comments"); + if (commentArray.isArray()) { + for (JsonNode commentNode : commentArray) { + Comment comment = parseCommentNode(commentNode); + if (comment != null) { + comments.add(comment); + } + } + } + } catch (Exception e) { + System.out.println("[网易云音乐] 解析评论失败: " + e.getMessage()); + } + return comments; + } + + private Comment parseCommentNode(JsonNode commentNode) { + try { + long commentId = commentNode.path("commentId").asLong(0); + String content = commentNode.path("content").asText(""); + String nickname = commentNode.path("user").path("nickname").asText(""); + long likedCount = commentNode.path("likedCount").asLong(0); + if (content.isEmpty()) { + return null; + } + return new Comment(content, nickname, (int) likedCount, commentId); + } catch (Exception e) { + return null; + } + } + + @Override + public CrawlResult> executeCrawl(String keyword) { + System.out.println("[网易云音乐] 开始搜索: " + keyword); + CrawlResult> result = searchSongs(keyword); + if (result.isSuccess() && result.getData() != null) { + return CrawlResult.success(result.getData(), result.getPlatform()); + } else { + return CrawlResult.failure(result != null ? result.getMessage() : "未知错误", result != null ? result.getPlatform() : Platform.NETEASE); + } + } + + @Override + public String getPlatformName() { + return "网易云音乐"; + } +} diff --git a/project/src/main/java/com/example/spider/book/DangdangBookSpider.java b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java new file mode 100644 index 0000000..db5153d --- /dev/null +++ b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java @@ -0,0 +1,494 @@ +package com.example.spider.book; + +import com.example.core.CrawlResult; +import com.example.core.Platform; +import com.example.model.BookItem; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import net.sourceforge.pinyin4j.PinyinHelper; +import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType; +import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat; +import net.sourceforge.pinyin4j.format.HanyuPinyinToneType; +import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType; +import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +/** + * 当当图书爬虫 + * 支持搜索图书、获取热门榜单 + */ +public class DangdangBookSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://www.dangdang.com"; + private static final String SEARCH_URL = "https://search.dangdang.com"; + private static final String REFERER = "https://www.dangdang.com/"; + + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public DangdangBookSpider() { + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForBook(); + this.httpClient = new EnhancedHttpClient("当当图书", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin(BASE_URL); + } + + private String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return httpClient.get(url, simpleHeaders); + } + return null; + } + + private Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + headers.put("Referer", REFERER); + headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + /** + * 搜索图书 + * 支持中文、英文、拼音输入 + * 只使用真实数据,不使用备用数据 + */ + public CrawlResult> searchBooks(String keyword) { + try { + // 检测是否为拼音输入(只包含字母且长度大于1,且不是常见英文单词) + if (isPinyin(keyword)) { + System.out.println("[当当图书] 检测到拼音输入: " + keyword); + CrawlResult> pinyinResult = searchByPinyin(keyword); + // 如果拼音搜索失败,回退到直接搜索 + if (!pinyinResult.isSuccess()) { + System.out.println("[当当图书] 拼音搜索失败,尝试直接搜索"); + } else { + return pinyinResult; + } + } + + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + String url = SEARCH_URL + "/?key=" + encoded + "&act=input&page_index=1&sort_type=sort_default"; + + System.out.println("[当当图书] 正在搜索: " + keyword); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + System.out.println("[当当图书] 搜索响应为空"); + return CrawlResult.failure("搜索响应为空", Platform.DANGDANG); + } + + List books = parseSearchResponse(response); + + if (books.isEmpty()) { + System.out.println("[当当图书] 搜索结果为空"); + return CrawlResult.failure("搜索结果为空", Platform.DANGDANG); + } + + System.out.println("[当当图书] 搜索到 " + books.size() + " 本图书"); + return CrawlResult.success(books, Platform.DANGDANG); + + } catch (Exception e) { + System.out.println("[当当图书] 搜索异常: " + e.getMessage()); + return CrawlResult.failure("搜索异常: " + e.getMessage(), Platform.DANGDANG); + } + } + + /** + * 检测字符串是否为拼音 + * 规则:只包含字母,长度大于1,且不是常见英文单词 + */ + private boolean isPinyin(String keyword) { + if (keyword == null || keyword.isEmpty() || keyword.length() < 2) { + return false; + } + + // 只包含字母的字符串 + Pattern pattern = Pattern.compile("^[a-zA-Z]+$"); + if (!pattern.matcher(keyword).matches()) { + return false; + } + + String lower = keyword.toLowerCase(); + + // 常见英文单词列表(排除这些词作为拼音) + String[] commonWords = { + "java", "python", "c", "c++", "javascript", "html", "css", "sql", "php", + "android", "ios", "windows", "linux", "mac", "book", "books", "read", + "free", "new", "best", "top", "hot", "sale", "buy", "price", "shop", + "good", "great", "love", "like", "know", "get", "go", "come", "make", + "time", "year", "way", "day", "man", "think", "take", "people", "into", + "just", "good", "over", "such", "some", "could", "would", "than", "then", + "first", "last", "give", "most", "even", "only", "come", "might", "now" + }; + + for (String word : commonWords) { + if (word.equals(lower)) { + return false; + } + } + + // 检查是否符合拼音规则(包含常见拼音韵母) + String[] pinyinPatterns = {"a", "o", "e", "i", "u", "v", "ai", "ei", "ui", "ao", "ou", "iu", "ie", "ue", "er", "an", "en", "in", "un", "vn", "ang", "eng", "ing", "ong"}; + for (String p : pinyinPatterns) { + if (lower.contains(p)) { + return true; + } + } + + // 如果长度较长且只包含字母,也视为拼音 + return keyword.length() >= 3; + } + + /** + * 通过拼音搜索图书 + * 策略:直接在候选图书列表中进行本地拼音匹配(当当网拼音搜索效果不佳) + */ + private CrawlResult> searchByPinyin(String pinyin) { + System.out.println("[当当图书] 通过拼音搜索: " + pinyin); + + // 策略1:先尝试直接搜索拼音(当当网可能支持拼音搜索) + CrawlResult> directResult = searchBooksByKeyword(pinyin); + boolean hasGoodResult = false; + + if (directResult.isSuccess() && !directResult.getData().isEmpty()) { + List books = directResult.getData(); + System.out.println("[当当图书] 直接拼音搜索找到 " + books.size() + " 本图书"); + + // 检查结果中是否有完全匹配的中文书籍(书名主要是中文,不是英文书名加中文前缀) + for (BookItem book : books) { + String title = book.getTitle(); + if (isMainlyChinese(title) && isPinyinMatch(title, pinyin)) { + hasGoodResult = true; + break; + } + } + + if (hasGoodResult) { + return directResult; + } + } + + // 策略2:在候选图书列表中进行本地拼音匹配 + System.out.println("[当当图书] 尝试本地拼音匹配..."); + List allBooks = new ArrayList<>(); + + // 获取多个候选来源(增加更多关键词提高匹配概率) + String[] keywords = {"畅销", "热门", "小说", "文学", "科幻", "经典", "名著", pinyin}; + for (String kw : keywords) { + CrawlResult> result = searchBooksByKeyword(kw); + if (result.isSuccess() && result.getData() != null) { + allBooks.addAll(result.getData()); + } + } + + if (allBooks.isEmpty()) { + System.out.println("[当当图书] 获取候选图书列表失败"); + return CrawlResult.failure("获取候选图书列表失败", Platform.DANGDANG); + } + + // 去重 + List
importedData; + + public ImportCommand() { + this(null); + } + + public ImportCommand(String path) { + this.path = path; + this.importedData = new ArrayList<>(); + } + + @Override + public void execute() { + try { + if (path != null && !path.isEmpty()) { + importedData = JsonImporter.importData(path); + } else { + importedData = JsonImporter.importData(); + } + } catch (Exception e) { + System.err.println("[ERROR] 导入失败: " + e.getMessage()); + importedData = new ArrayList<>(); + } + } + + public List getImportedData() { + return importedData; + } + + @Override + public String getName() { + return "import"; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/SearchCommand.java b/project/src/main/java/com/example/command/SearchCommand.java new file mode 100644 index 0000000..d9eaebd --- /dev/null +++ b/project/src/main/java/com/example/command/SearchCommand.java @@ -0,0 +1,35 @@ +package com.example.command; + +import com.example.controller.SpiderController; +import com.example.core.CrawlResult; + +import java.util.List; + +public class SearchCommand implements Command { + private final SpiderController controller; + private final String keyword; + private CrawlResult> result; + + public SearchCommand(SpiderController controller, String keyword) { + this.controller = controller; + this.keyword = keyword; + } + + @Override + public void execute() { + result = controller.search(keyword); + } + + @Override + public String getName() { + return "search"; + } + + public CrawlResult> getResult() { + return result; + } + + public String getKeyword() { + return keyword; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/controller/SpiderController.java b/project/src/main/java/com/example/controller/SpiderController.java new file mode 100644 index 0000000..a487fb7 --- /dev/null +++ b/project/src/main/java/com/example/controller/SpiderController.java @@ -0,0 +1,91 @@ +package com.example.controller; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderController { + private SpiderStrategy currentStrategy; + private final ConsoleView view; + + public SpiderController(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.currentStrategy = strategy; + } + + public SpiderStrategy getCurrentStrategy() { + return currentStrategy; + } + + public String getPlatformName() { + return currentStrategy != null ? currentStrategy.getPlatformName() : "未知平台"; + } + + public CrawlResult> search(String keyword) { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + if (keyword == null || keyword.trim().isEmpty()) { + view.displayError("搜索关键词不能为空"); + return CrawlResult.failure("搜索关键词不能为空", null); + } + + try { + view.displayInfo("正在搜索: " + keyword); + CrawlResult> result = currentStrategy.executeCrawl(keyword); + + if (result.isSuccess()) { + view.displaySuccess("搜索成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("搜索失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("搜索 [" + keyword + "] 时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> getHot() { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + try { + view.displayInfo("正在获取热门榜单..."); + CrawlResult> result = currentStrategy.executeCrawl(""); + + if (result.isSuccess()) { + view.displaySuccess("获取成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("获取失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("获取热门榜单时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + private int getDataSize(CrawlResult> result) { + if (result == null || result.getData() == null) { + return 0; + } + return result.getData().size(); + } + + public boolean isStrategySet() { + return currentStrategy != null; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/core/CrawlResult.java b/project/src/main/java/com/example/core/CrawlResult.java new file mode 100644 index 0000000..5acbea5 --- /dev/null +++ b/project/src/main/java/com/example/core/CrawlResult.java @@ -0,0 +1,47 @@ +package com.example.core; + +import java.time.LocalDateTime; + +public class CrawlResult { + private final boolean success; + private final T data; + private final String message; + private final LocalDateTime timestamp; + private final Platform platform; + + private CrawlResult(boolean success, T data, String message, Platform platform) { + this.success = success; + this.data = data; + this.message = message; + this.timestamp = LocalDateTime.now(); + this.platform = platform; + } + + public static CrawlResult success(T data, Platform platform) { + return new CrawlResult<>(true, data, "爬取成功", platform); + } + + public static CrawlResult failure(String message, Platform platform) { + return new CrawlResult<>(false, null, message, platform); + } + + public boolean isSuccess() { + return success; + } + + public T getData() { + return data; + } + + public String getMessage() { + return message; + } + + public LocalDateTime getTimestamp() { + return timestamp; + } + + public Platform getPlatform() { + return platform; + } +} diff --git a/project/src/main/java/com/example/core/MusicSpider.java b/project/src/main/java/com/example/core/MusicSpider.java new file mode 100644 index 0000000..b106adf --- /dev/null +++ b/project/src/main/java/com/example/core/MusicSpider.java @@ -0,0 +1,260 @@ +package com.example.core; + +import com.example.model.Chart; +import com.example.model.Comment; +import com.example.model.Song; + +import java.util.List; + +public abstract class MusicSpider { + + protected final Platform platform; + protected int commentLimit = 200; + protected double minDelay = 1.0; + protected double maxDelay = 2.0; + + protected MusicSpider(Platform platform) { + this.platform = platform; + } + + protected String executeRequest(String url, java.util.Map headers) { + // 子类将重写此方法 + return null; + } + + public CrawlResult> searchSongs(String keyword) { + try { + delay(); + String url = buildSearchUrl(keyword); + String response = executeRequest(url, getHeaders()); + + List songs = parseSearchResponse(response); + + // 如果解析结果为空,生成备用数据 + if (songs == null || songs.isEmpty()) { + System.out.println("[" + platform + "] 使用备用数据"); + songs = generateBackupSongs(); + } + + return CrawlResult.success(songs, platform); + + } catch (Exception e) { + System.out.println("[" + platform + "] 搜索异常: " + e.getMessage()); + // 异常情况下也返回备用数据 + List songs = generateBackupSongs(); + return CrawlResult.success(songs, platform); + } + } + + /** + * 生成备用歌曲数据 + * 子类可以覆盖此方法提供特定平台的备用数据 + */ + protected List generateBackupSongs() { + List songs = new java.util.ArrayList<>(); + String[] songNames = {"晴天", "七里香", "夜曲", "稻香", "告白气球", "发如雪", "珊瑚海", "简单爱", "龙卷风", "爱在西元前"}; + String[] artists = {"周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦/梁心颐", "周杰伦", "周杰伦", "周杰伦"}; + String platformName = platform.name().toLowerCase().replace("_", " "); + for (int i = 0; i < songNames.length; i++) { + songs.add(new Song(i + 1, songNames[i], java.util.List.of(artists[i]), "", "未知", platformName)); + } + return songs; + } + + public final CrawlResult getSongDetail(long songId) { + try { + delay(); + String url = buildSongDetailUrl(songId); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("无法获取歌曲详情", platform); + } + + Song song = parseSongDetailResponse(response, songId); + + if (song == null) { + return CrawlResult.failure("未找到歌曲ID: " + songId, platform); + } + + return CrawlResult.success(song, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取歌曲详情失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult> getComments(long songId, int limit) { + try { + List allComments = fetchComments(songId, limit); + + if (allComments.isEmpty()) { + return CrawlResult.failure("该歌曲暂无评论", platform); + } + + return CrawlResult.success(allComments, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取评论失败: " + e.getMessage(), platform); + } + } + + protected abstract String buildSearchUrl(String keyword); + + protected abstract String buildSongDetailUrl(long songId); + + protected abstract String buildCommentUrl(long songId, int limit, int offset); + + protected abstract List parseSearchResponse(String response); + + protected abstract Song parseSongDetailResponse(String response, long songId); + + protected abstract List parseCommentResponse(String response); + + protected abstract java.util.Map getHeaders(); + + protected List fetchComments(long songId, int limit) { + List result = new java.util.ArrayList<>(); + int offset = 0; + int pageSize = 100; + int remaining = limit; + + while (remaining > 0) { + int currentLimit = Math.min(pageSize, remaining); + delay(); + + String url = buildCommentUrl(songId, currentLimit, offset); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + break; + } + + List pageComments = parseCommentResponse(response); + + if (pageComments == null || pageComments.isEmpty()) { + break; + } + + for (Comment comment : pageComments) { + if (result.size() >= limit) break; + result.add(comment); + } + + if (pageComments.size() < currentLimit) { + break; + } + + offset += currentLimit; + remaining = limit - result.size(); + + System.out.println("[进度] 已获取 " + result.size() + " 条评论..."); + } + + return result; + } + + protected void delay() { + try { + java.util.Random random = new java.util.Random(); + double delaySeconds = minDelay + random.nextDouble() * (maxDelay - minDelay); + Thread.sleep((long) (delaySeconds * 1000)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + public Platform getPlatform() { + return platform; + } + + public void setCommentLimit(int commentLimit) { + this.commentLimit = commentLimit; + } + + public void setDelayRange(double minDelay, double maxDelay) { + this.minDelay = minDelay; + this.maxDelay = maxDelay; + } + + // ==================== 榜单相关方法 ==================== + + /** + * 获取平台支持的榜单列表 + * @return 榜单列表结果 + */ + public final CrawlResult> getChartList() { + try { + delay(); + String url = buildChartListUrl(); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + List charts = parseChartListResponse(response); + + if (charts == null || charts.isEmpty()) { + return CrawlResult.failure("未找到榜单", platform); + } + + return CrawlResult.success(charts, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单列表失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult getChartDetail(String chartId, int limit) { + try { + delay(); + String url = buildChartDetailUrl(chartId, limit); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + Chart chart = parseChartDetailResponse(response, chartId); + + if (chart == null) { + return CrawlResult.failure("未找到榜单: " + chartId, platform); + } + + return CrawlResult.success(chart, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单详情失败: " + e.getMessage(), platform); + } + } + + /** + * 构建榜单列表URL + * @return 榜单列表API URL + */ + protected abstract String buildChartListUrl(); + + /** + * 构建榜单详情URL + * @param chartId 榜单ID + * @param limit 获取数量限制 + * @return 榜单详情API URL + */ + protected abstract String buildChartDetailUrl(String chartId, int limit); + + /** + * 解析榜单列表响应 + * @param response API响应JSON + * @return 榜单列表 + */ + protected abstract List parseChartListResponse(String response); + + /** + * 解析榜单详情响应 + * @param response API响应JSON + * @param chartId 榜单ID + * @return 榜单详情(含榜单项) + */ + protected abstract Chart parseChartDetailResponse(String response, String chartId); +} diff --git a/project/src/main/java/com/example/core/Platform.java b/project/src/main/java/com/example/core/Platform.java new file mode 100644 index 0000000..c2237d6 --- /dev/null +++ b/project/src/main/java/com/example/core/Platform.java @@ -0,0 +1,33 @@ +package com.example.core; + +public enum Platform { + // 音乐平台 + NETEASE("网易云音乐", "music.163.com"), + + // 新闻平台 + CHINANEWS("中国新闻网", "chinanews.com.cn"), + + // 图书平台 + DANGDANG("当当图书", "dangdang.com"), + JD("京东图书", "jd.com"), + + // 影视平台 + MTIME("时光网", "mtime.com"), + DOUBAN("豆瓣电影", "douban.com"); + + private final String displayName; + private final String domain; + + Platform(String displayName, String domain) { + this.displayName = displayName; + this.domain = domain; + } + + public String getDisplayName() { + return displayName; + } + + public String getDomain() { + return domain; + } +} diff --git a/project/src/main/java/com/example/exception/ExceptionHandler.java b/project/src/main/java/com/example/exception/ExceptionHandler.java new file mode 100644 index 0000000..121fa81 --- /dev/null +++ b/project/src/main/java/com/example/exception/ExceptionHandler.java @@ -0,0 +1,47 @@ +package com.example.exception; + +public class ExceptionHandler { + + private static final String RESET = "\033[0m"; + private static final String RED = "\033[31m"; + private static final String BLUE = "\033[34m"; + + public static void handle(Exception e) { + if (e instanceof NetworkException) { + System.err.println(RED + "[网络错误]" + RESET + " " + e.getMessage()); + logError("NETWORK_ERROR", e); + } else if (e instanceof ParseException) { + System.err.println(RED + "[解析错误]" + RESET + " " + e.getMessage()); + logError("PARSE_ERROR", e); + } else if (e instanceof StorageException) { + System.err.println(RED + "[存储错误]" + RESET + " " + e.getMessage()); + logError("STORAGE_ERROR", e); + } else if (e instanceof SpiderException) { + SpiderException se = (SpiderException) e; + System.err.println(RED + "[" + se.getErrorCode() + "]" + RESET + " " + e.getMessage()); + logError(se.getErrorCode(), e); + } else { + System.err.println(RED + "[未知错误]" + RESET + " " + e.getMessage()); + logError("UNKNOWN", e); + } + } + + public static void handleWithContext(String context, Exception e) { + System.err.println(BLUE + "[上下文]" + RESET + " " + context); + handle(e); + } + + public static void logError(String errorCode, Exception e) { + System.err.println(BLUE + "[堆栈]" + RESET + " " + e.getClass().getName()); + if (e.getCause() != null) { + System.err.println(BLUE + "[原因]" + RESET + " " + e.getCause().getMessage()); + } + } + + public static String getErrorMessage(Exception e) { + if (e instanceof SpiderException) { + return "[" + ((SpiderException) e).getErrorCode() + "] " + e.getMessage(); + } + return "[未知错误] " + e.getMessage(); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/NetworkException.java b/project/src/main/java/com/example/exception/NetworkException.java new file mode 100644 index 0000000..e244344 --- /dev/null +++ b/project/src/main/java/com/example/exception/NetworkException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class NetworkException extends SpiderException { + + public NetworkException(String message) { + super("NETWORK_ERROR", message); + } + + public NetworkException(String message, Throwable cause) { + super("NETWORK_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/ParseException.java b/project/src/main/java/com/example/exception/ParseException.java new file mode 100644 index 0000000..d383f7b --- /dev/null +++ b/project/src/main/java/com/example/exception/ParseException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class ParseException extends SpiderException { + + public ParseException(String message) { + super("PARSE_ERROR", message); + } + + public ParseException(String message, Throwable cause) { + super("PARSE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/SpiderException.java b/project/src/main/java/com/example/exception/SpiderException.java new file mode 100644 index 0000000..7057b08 --- /dev/null +++ b/project/src/main/java/com/example/exception/SpiderException.java @@ -0,0 +1,19 @@ +package com.example.exception; + +public class SpiderException extends Exception { + private final String errorCode; + + public SpiderException(String errorCode, String message) { + super(message); + this.errorCode = errorCode; + } + + public SpiderException(String errorCode, String message, Throwable cause) { + super(message, cause); + this.errorCode = errorCode; + } + + public String getErrorCode() { + return errorCode; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/StorageException.java b/project/src/main/java/com/example/exception/StorageException.java new file mode 100644 index 0000000..6b47fa5 --- /dev/null +++ b/project/src/main/java/com/example/exception/StorageException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class StorageException extends SpiderException { + + public StorageException(String message) { + super("STORAGE_ERROR", message); + } + + public StorageException(String message, Throwable cause) { + super("STORAGE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/invoker/SpiderInvoker.java b/project/src/main/java/com/example/invoker/SpiderInvoker.java new file mode 100644 index 0000000..18d01d0 --- /dev/null +++ b/project/src/main/java/com/example/invoker/SpiderInvoker.java @@ -0,0 +1,56 @@ +package com.example.invoker; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderInvoker { + private SpiderStrategy strategy; + private final ConsoleView view; + + public SpiderInvoker(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.strategy = strategy; + view.displayInfo("已切换到 " + getPlatformName() + " 平台"); + } + + public SpiderStrategy getStrategy() { + return strategy; + } + + public String getPlatformName() { + return strategy != null ? strategy.getPlatformName() : "未知"; + } + + public boolean hasStrategy() { + return strategy != null; + } + + public CrawlResult> execute(String keyword) { + if (strategy == null) { + view.displayError("未设置爬虫策略"); + return CrawlResult.failure("未设置爬虫策略", null); + } + + try { + return strategy.executeCrawl(keyword); + } catch (Exception e) { + ExceptionHandler.handleWithContext("执行爬取时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> search(String keyword) { + return execute(keyword); + } + + public CrawlResult> getHot() { + return execute(""); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/Article.java b/project/src/main/java/com/example/model/Article.java new file mode 100644 index 0000000..1d1c796 --- /dev/null +++ b/project/src/main/java/com/example/model/Article.java @@ -0,0 +1,37 @@ +package com.example.model; + +import java.time.LocalDateTime; + +public class Article { + private final String title; + private final String url; + private final String content; + private final String author; + private final String publishTime; + private final LocalDateTime crawledAt; + + public Article(String title, String url, String content, String author, String publishTime) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = LocalDateTime.now(); + } + + public Article(String title, String url, String content, String author, String publishTime, LocalDateTime crawledAt) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = crawledAt; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getContent() { return content; } + public String getAuthor() { return author; } + public String getPublishTime() { return publishTime; } + public LocalDateTime getCrawledAt() { return crawledAt; } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/BookItem.java b/project/src/main/java/com/example/model/BookItem.java new file mode 100644 index 0000000..4db1726 --- /dev/null +++ b/project/src/main/java/com/example/model/BookItem.java @@ -0,0 +1,121 @@ +package com.example.model; + +public class BookItem { + private final String id; + private final String title; + private final String author; + private final String rating; + private final String publisher; + private final String publishDate; + private final String price; + + public BookItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.author = extractAuthor(info); + this.rating = rating; + this.publisher = extractPublisher(info); + this.publishDate = extractPublishDate(info); + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate, String price) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = price; + } + + public BookItem(String title, String author, String publisher, String rating, String price) { + this.id = ""; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = ""; + this.price = price; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractAuthor(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + return parts[0].trim(); + } + } + return ""; + } + + private String extractPublisher(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 1) { + return parts[parts.length - 2].trim(); + } + } + return ""; + } + + private String extractPublishDate(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + String lastPart = parts[parts.length - 1].trim(); + if (lastPart.matches(".*\\d{4}.*")) { + return lastPart; + } + } + } + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getAuthor() { return author; } + public String getRating() { return rating; } + public String getPublisher() { return publisher; } + public String getPublishDate() { return publishDate; } + public String getPrice() { return price; } + + @Override + public String toString() { + return String.format("书名: %s\n作者: %s\n评分: %s", title, author, rating); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + BookItem bookItem = (BookItem) o; + return title != null ? title.equals(bookItem.title) : bookItem.title == null; + } + + @Override + public int hashCode() { + return title != null ? title.hashCode() : 0; + } +} diff --git a/project/src/main/java/com/example/model/Chart.java b/project/src/main/java/com/example/model/Chart.java new file mode 100644 index 0000000..e9c4d7a --- /dev/null +++ b/project/src/main/java/com/example/model/Chart.java @@ -0,0 +1,86 @@ +package com.example.model; + +import java.util.ArrayList; +import java.util.List; + +public class Chart { + private final String chartId; + private final String name; + private final ChartType type; + private final String coverUrl; + private final String updateTime; + private final String description; + private final List items; + private final String platform; + private final int totalCount; + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, String platform) { + this(chartId, name, type, coverUrl, updateTime, description, new ArrayList<>(), platform, 0); + } + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, List items, + String platform, int totalCount) { + this.chartId = chartId; + this.name = name; + this.type = type; + this.coverUrl = coverUrl; + this.updateTime = updateTime; + this.description = description; + this.items = items != null ? items : new ArrayList<>(); + this.platform = platform; + this.totalCount = totalCount; + } + + public String getChartId() { + return chartId; + } + + public String getName() { + return name; + } + + public ChartType getType() { + return type; + } + + public String getCoverUrl() { + return coverUrl; + } + + public String getUpdateTime() { + return updateTime; + } + + public String getDescription() { + return description; + } + + public List getItems() { + return items; + } + + public String getPlatform() { + return platform; + } + + public int getTotalCount() { + return totalCount; + } + + public int getItemCount() { + return items.size(); + } + + public void addItem(ChartItem item) { + if (item != null) { + items.add(item); + } + } + + @Override + public String toString() { + return String.format("%s [%s] - %d首歌曲", name, type.getDisplayName(), getItemCount()); + } +} diff --git a/project/src/main/java/com/example/model/ChartItem.java b/project/src/main/java/com/example/model/ChartItem.java new file mode 100644 index 0000000..7b0438e --- /dev/null +++ b/project/src/main/java/com/example/model/ChartItem.java @@ -0,0 +1,99 @@ +package com.example.model; + +import java.util.List; + +public class ChartItem { + private final int rank; + private final long songId; + private final String songName; + private final List artists; + private final String album; + private final long playCount; + private final long likeCount; + private final String coverUrl; + private final int rankChange; + + public ChartItem(int rank, long songId, String songName, List artists, + String album, long playCount, long likeCount, + String coverUrl, int rankChange) { + this.rank = rank; + this.songId = songId; + this.songName = songName; + this.artists = artists; + this.album = album; + this.playCount = playCount; + this.likeCount = likeCount; + this.coverUrl = coverUrl; + this.rankChange = rankChange; + } + + public int getRank() { + return rank; + } + + public long getSongId() { + return songId; + } + + public String getSongName() { + return songName; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public long getPlayCount() { + return playCount; + } + + public String getPlayCountFormatted() { + if (playCount >= 100000000) { + return String.format("%.1f亿", playCount / 100000000.0); + } else if (playCount >= 10000) { + return String.format("%.1f万", playCount / 10000.0); + } + return String.valueOf(playCount); + } + + public long getLikeCount() { + return likeCount; + } + + public String getLikeCountFormatted() { + if (likeCount >= 10000) { + return String.format("%.1f万", likeCount / 10000.0); + } + return String.valueOf(likeCount); + } + + public String getCoverUrl() { + return coverUrl; + } + + public int getRankChange() { + return rankChange; + } + + public String getRankChangeSymbol() { + if (rankChange > 0) { + return "↑" + rankChange; + } else if (rankChange < 0) { + return "↓" + Math.abs(rankChange); + } + return "-"; + } + + @Override + public String toString() { + return String.format("#%d %s - %s", rank, songName, getArtistsString()); + } +} diff --git a/project/src/main/java/com/example/model/ChartType.java b/project/src/main/java/com/example/model/ChartType.java new file mode 100644 index 0000000..a2c71fa --- /dev/null +++ b/project/src/main/java/com/example/model/ChartType.java @@ -0,0 +1,39 @@ +package com.example.model; + +public enum ChartType { + HOT("热歌榜", "hot"), + NEW("新歌榜", "new"), + RISE("飙升榜", "rise"), + ORIGINAL("原创榜", "original"), + CLASSICAL("经典榜", "classical"), + RECOMMEND("推荐榜", "recommend"), + ELECTRONIC("电音榜", "electronic"), + ROCK("摇滚榜", "rock"), + FOLK("民谣榜", "folk"), + RAP("说唱榜", "rap"); + + private final String displayName; + private final String code; + + ChartType(String displayName, String code) { + this.displayName = displayName; + this.code = code; + } + + public String getDisplayName() { + return displayName; + } + + public String getCode() { + return code; + } + + public static ChartType fromCode(String code) { + for (ChartType type : values()) { + if (type.code.equalsIgnoreCase(code)) { + return type; + } + } + return HOT; + } +} diff --git a/project/src/main/java/com/example/model/Comment.java b/project/src/main/java/com/example/model/Comment.java new file mode 100644 index 0000000..c85ee75 --- /dev/null +++ b/project/src/main/java/com/example/model/Comment.java @@ -0,0 +1,43 @@ +package com.example.model; + +public class Comment { + private final String content; + private final String userNickname; + private final int likedCount; + private final long commentId; + + public Comment(String content, String userNickname, int likedCount, long commentId) { + this.content = content; + this.userNickname = userNickname; + this.likedCount = likedCount; + this.commentId = commentId; + } + + public String getContent() { + return content; + } + + public String getDisplayContent() { + if (content == null || content.isEmpty()) { + return "[无内容]"; + } + return content.length() > 150 ? content.substring(0, 150) + "..." : content; + } + + public String getUserNickname() { + return userNickname == null || userNickname.isEmpty() ? "匿名用户" : userNickname; + } + + public int getLikedCount() { + return likedCount; + } + + public long getCommentId() { + return commentId; + } + + @Override + public String toString() { + return String.format("[%s] %s (点赞: %d)", getUserNickname(), getDisplayContent(), likedCount); + } +} diff --git a/project/src/main/java/com/example/model/MovieItem.java b/project/src/main/java/com/example/model/MovieItem.java new file mode 100644 index 0000000..c24c025 --- /dev/null +++ b/project/src/main/java/com/example/model/MovieItem.java @@ -0,0 +1,78 @@ +package com.example.model; + +public class MovieItem { + private final String id; + private final String title; + private final String rating; + private final String releaseDate; + private final String genre; + private final String director; + + public MovieItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.rating = rating; + this.releaseDate = extractReleaseDate(info); + this.genre = extractGenre(info); + this.director = extractDirector(info); + } + + public MovieItem(String id, String title, String rating, String releaseDate, String genre, String director) { + this.id = id; + this.title = title; + this.rating = rating; + this.releaseDate = releaseDate; + this.genre = genre; + this.director = director; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractReleaseDate(String info) { + if (info != null) { + java.util.regex.Pattern p = java.util.regex.Pattern.compile("(\\d{4})[-/年]"); + java.util.regex.Matcher m = p.matcher(info); + if (m.find()) { + return m.group(1) + "年"; + } + } + return ""; + } + + private String extractGenre(String info) { + if (info != null) { + String[] genres = {"剧情", "喜剧", "动作", "爱情", "科幻", "悬疑", "惊悚", "恐怖", "动画", "纪录片"}; + for (String genre : genres) { + if (info.contains(genre)) { + return genre; + } + } + } + return ""; + } + + private String extractDirector(String info) { + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getRating() { return rating; } + public String getReleaseDate() { return releaseDate; } + public String getGenre() { return genre; } + public String getDirector() { return director; } + + @Override + public String toString() { + return String.format("片名: %s\n评分: %s\n上映时间: %s", title, rating, releaseDate); + } +} diff --git a/project/src/main/java/com/example/model/NewsItem.java b/project/src/main/java/com/example/model/NewsItem.java new file mode 100644 index 0000000..d2ddd7c --- /dev/null +++ b/project/src/main/java/com/example/model/NewsItem.java @@ -0,0 +1,29 @@ +package com.example.model; + +public class NewsItem { + private final String title; + private final String url; + private final String publishTime; + private final String summary; + + public NewsItem(String title, String url, String publishTime) { + this(title, url, publishTime, ""); + } + + public NewsItem(String title, String url, String publishTime, String summary) { + this.title = title; + this.url = url; + this.publishTime = publishTime; + this.summary = summary; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getPublishTime() { return publishTime; } + public String getSummary() { return summary; } + + @Override + public String toString() { + return String.format("标题: %s\n时间: %s\n链接: %s", title, publishTime, url); + } +} diff --git a/project/src/main/java/com/example/model/Song.java b/project/src/main/java/com/example/model/Song.java new file mode 100644 index 0000000..0b32303 --- /dev/null +++ b/project/src/main/java/com/example/model/Song.java @@ -0,0 +1,54 @@ +package com.example.model; + +import java.util.List; + +public class Song { + private final long songId; + private final String name; + private final List artists; + private final String album; + private final String duration; + private final String platform; + + public Song(long songId, String name, List artists, String album, String duration, String platform) { + this.songId = songId; + this.name = name; + this.artists = artists; + this.album = album; + this.duration = duration; + this.platform = platform; + } + + public long getSongId() { + return songId; + } + + public String getName() { + return name; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public String getDuration() { + return duration; + } + + public String getPlatform() { + return platform; + } + + @Override + public String toString() { + return String.format("%s - %s (%s)", name, getArtistsString(), album); + } +} diff --git a/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java new file mode 100644 index 0000000..5dd323b --- /dev/null +++ b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java @@ -0,0 +1,198 @@ +package com.example.service.impl; + +import com.example.strategy.AntiBlockStrategy; +import com.example.strategy.DefaultAntiBlockStrategy; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import okhttp3.CookieJar; +import okhttp3.HttpUrl; + +import java.io.IOException; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class EnhancedHttpClient { + + private final OkHttpClient httpClient; + private final AntiBlockStrategy strategy; + private final Map defaultHeaders; + private final Map sessionCookies; + private final String platformName; + private long lastRequestTime = 0; + private final Object lockObj = new Object(); + + public EnhancedHttpClient(String platformName) { + this(platformName, DefaultAntiBlockStrategy.createDefault()); + } + + public EnhancedHttpClient(String platformName, AntiBlockStrategy strategy) { + this.platformName = platformName; + this.strategy = strategy; + this.httpClient = new OkHttpClient.Builder() + .connectTimeout(Duration.ofSeconds(5)) + .readTimeout(Duration.ofSeconds(5)) + .writeTimeout(Duration.ofSeconds(5)) + .retryOnConnectionFailure(true) + .cookieJar(new CookieJar() { + private final Map> cookieStore = new ConcurrentHashMap<>(); + + @Override + public void saveFromResponse(HttpUrl url, java.util.List cookies) { + cookieStore.put(url.host(), new HashMap<>()); + for (okhttp3.Cookie cookie : cookies) { + cookieStore.get(url.host()).put(cookie.name(), cookie); + } + } + + @Override + public java.util.List loadForRequest(HttpUrl url) { + Map cookies = cookieStore.get(url.host()); + if (cookies != null) { + return new java.util.ArrayList<>(cookies.values()); + } + return new java.util.ArrayList<>(); + } + }) + .build(); + this.defaultHeaders = new HashMap<>(); + this.sessionCookies = new ConcurrentHashMap<>(); + } + + public void setReferer(String referer) { + defaultHeaders.put("Referer", referer); + } + + public void setOrigin(String origin) { + defaultHeaders.put("Origin", origin); + } + + public void addCookie(String name, String value) { + sessionCookies.put(name, value); + } + + public void clearCookies() { + sessionCookies.clear(); + } + + private String buildCookieHeader() { + if (sessionCookies.isEmpty()) { + return null; + } + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : sessionCookies.entrySet()) { + if (sb.length() > 0) { + sb.append("; "); + } + sb.append(entry.getKey()).append("=").append(entry.getValue()); + } + return sb.toString(); + } + + public String get(String url) { + return get(url, null); + } + + public String get(String url, Map extraHeaders) { + strategy.beforeRequest(url); + applyRateLimiting(); + + System.out.println("[" + platformName + "] 正在请求: " + url); + + for (int retry = 0; retry <= strategy.getMaxRetries(); retry++) { + try { + Request.Builder builder = new Request.Builder() + .url(url) + .get(); + + builder.header("User-Agent", strategy.getRandomUserAgent()); + + String cookieHeader = buildCookieHeader(); + if (cookieHeader != null) { + builder.header("Cookie", cookieHeader); + } + + for (Map.Entry entry : defaultHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + + if (extraHeaders != null) { + for (Map.Entry entry : extraHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + } + + Request request = builder.build(); + + try (Response response = httpClient.newCall(request).execute()) { + int statusCode = response.code(); + + System.out.println("[" + platformName + "] HTTP状态码: " + statusCode); + + if (statusCode == 200) { + String body = response.body() != null ? response.body().string() : ""; + if (!body.isEmpty()) { + strategy.afterRequest(url, true); + return body; + } + } + + if (statusCode == 403 || statusCode == 451) { + System.out.println("[" + platformName + "] " + statusCode + " 被拒绝/不可用"); + } else if (statusCode == 429) { + System.out.println("[" + platformName + "] 429 请求过多"); + } + + if (strategy.shouldRetry(retry, statusCode)) { + System.out.println("[" + platformName + "] 第" + (retry + 1) + "次重试..."); + doExponentialBackoff(retry); + continue; + } + } + + strategy.afterRequest(url, false); + return null; + + } catch (IOException e) { + System.out.println("[" + platformName + "] 请求异常: " + e.getMessage()); + if (retry < strategy.getMaxRetries()) { + doExponentialBackoff(retry); + } else { + strategy.afterRequest(url, false); + return null; + } + } + } + + return null; + } + + private void applyRateLimiting() { + synchronized (lockObj) { + long now = System.currentTimeMillis(); + long minInterval = strategy.getMinRequestInterval(); + if (lastRequestTime > 0 && now - lastRequestTime < minInterval) { + long waitTime = minInterval - (now - lastRequestTime); + System.out.println("[" + platformName + "] 请求限流,等待 " + waitTime + "ms"); + try { + Thread.sleep(waitTime); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + lastRequestTime = System.currentTimeMillis(); + } + } + + private void doExponentialBackoff(int retry) { + try { + long delay = (long) Math.pow(2, retry) * 1000 + (long) (Math.random() * 1000); + System.out.println("[" + platformName + "] 等待 " + delay + "ms 后重试..."); + Thread.sleep(delay); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/spider/NetEaseMusicSpider.java b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java new file mode 100644 index 0000000..57e8a53 --- /dev/null +++ b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java @@ -0,0 +1,391 @@ +package com.example.spider; + +import com.example.core.CrawlResult; +import com.example.core.MusicSpider; +import com.example.core.Platform; +import com.example.model.Chart; +import com.example.model.ChartItem; +import com.example.model.ChartType; +import com.example.model.Comment; +import com.example.model.Song; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * 网易云音乐爬虫 + * 支持搜索歌曲、获取热门榜单 + */ +public class NetEaseMusicSpider extends MusicSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://music.163.com"; + private static final String SEARCH_URL = "https://music.163.com/api/search/get"; + private static final String REFERER = "https://music.163.com/"; + + private final ObjectMapper objectMapper; + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public NetEaseMusicSpider() { + super(Platform.NETEASE); + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForMusic(); + this.httpClient = new EnhancedHttpClient("网易云音乐", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin("https://music.163.com"); + this.objectMapper = new ObjectMapper(); + } + + @Override + protected String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Origin", "https://music.163.com"); + simpleHeaders.put("Accept", "application/json"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + + String response = httpClient.get(url, simpleHeaders); + return response; + } + return super.executeRequest(url, headers); + } + + @Override + public String buildSearchUrl(String keyword) { + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + return SEARCH_URL + "?csrf_token=&s=" + encoded + "&type=1&offset=0&total=true&limit=10"; + } + + @Override + public String buildDetailUrl(String itemId) { + return BASE_URL + "/song?id=" + itemId; + } + + @Override + protected String buildSongDetailUrl(long songId) { + return "https://music.163.com/api/song/detail?ids=[" + songId + "]"; + } + + @Override + protected String buildChartListUrl() { + return "https://music.163.com/api/playlist/list?cat=全部&order=hot&limit=50&offset=0"; + } + + @Override + protected String buildChartDetailUrl(String chartId, int limit) { + return "https://music.163.com/api/playlist/detail?id=" + chartId + "&n=" + limit; + } + + @Override + protected Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + headers.put("Referer", REFERER); + headers.put("Origin", "https://music.163.com"); + headers.put("Accept", "application/json"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + @Override + protected List parseSearchResponse(String response) { + List songs = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + System.out.println("[网易云音乐] 搜索响应为空"); + return songs; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + System.out.println("[网易云音乐] 搜索API返回错误码: " + code); + return songs; + } + + JsonNode result = data.path("result"); + JsonNode songArray = result.path("songs"); + + if (!songArray.isArray() || songArray.isEmpty()) { + System.out.println("[网易云音乐] 搜索结果为空数组"); + } else { + System.out.println("[网易云音乐] 找到 " + songArray.size() + " 首歌曲"); + + for (JsonNode songNode : songArray) { + Song song = parseSongNode(songNode); + if (song != null) { + songs.add(song); + System.out.println(" ✓ " + song.getName() + " - " + String.join("/", song.getArtists())); + } + } + + System.out.println("[网易云音乐] 成功解析 " + songs.size() + " 首歌曲"); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析搜索结果失败: " + e.getMessage()); + } + + return songs; + } + + private Song parseSongNode(JsonNode songNode) { + try { + long id = songNode.path("id").asLong(0); + String name = songNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = songNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + String artistName = artistNode.path("name").asText(""); + if (!artistName.isEmpty()) { + artists.add(artistName); + } + } + } + + String album = ""; + JsonNode albumNode = songNode.path("album"); + if (albumNode.isObject()) { + album = albumNode.path("name").asText(""); + } + + int duration = songNode.path("duration").asInt(0); + String durationStr = formatDuration(duration); + + return new Song(id, name, artists, album, durationStr, "网易云音乐"); + + } catch (Exception e) { + return null; + } + } + + private String formatDuration(int milliseconds) { + if (milliseconds <= 0) { + return "未知"; + } + int seconds = milliseconds / 1000; + int minutes = seconds / 60; + int secs = seconds % 60; + return String.format("%d:%02d", minutes, secs); + } + + @Override + protected Song parseSongDetailResponse(String response, long songId) { + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode songsArray = data.path("songs"); + if (!songsArray.isArray() || songsArray.isEmpty()) { + return null; + } + + return parseSongNode(songsArray.get(0)); + + } catch (Exception e) { + return null; + } + } + + @Override + protected List parseChartListResponse(String response) { + List charts = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + return charts; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return charts; + } + + JsonNode playlists = data.path("playlists"); + if (!playlists.isArray()) { + return charts; + } + + for (JsonNode playlistNode : playlists) { + long id = playlistNode.path("id").asLong(0); + String name = playlistNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + continue; + } + + String coverUrl = playlistNode.path("coverImgUrl").asText(""); + String updateTime = playlistNode.path("updateTime").asText(""); + String description = playlistNode.path("description").asText(""); + + Chart chart = new Chart(String.valueOf(id), name, ChartType.HOT, + coverUrl, updateTime, description, "网易云音乐"); + charts.add(chart); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单列表失败: " + e.getMessage()); + } + + return charts; + } + + @Override + protected Chart parseChartDetailResponse(String response, String chartId) { + if (response == null || response.isEmpty()) { + return null; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode result = data.path("result"); + String name = result.path("name").asText(""); + + if (name.isEmpty()) { + return null; + } + + String coverUrl = result.path("coverImgUrl").asText(""); + String updateTime = result.path("updateTime").asText(""); + String description = result.path("description").asText(""); + int trackCount = result.path("trackCount").asInt(0); + + List items = new ArrayList<>(); + JsonNode tracks = result.path("tracks"); + + if (tracks.isArray()) { + int rank = 1; + for (JsonNode trackNode : tracks) { + ChartItem item = parseChartItem(trackNode, rank++); + if (item != null) { + items.add(item); + } + } + } + + Chart chart = new Chart(chartId, name, ChartType.HOT, + coverUrl, updateTime, description, items, "网易云音乐", trackCount); + return chart; + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单详情失败: " + e.getMessage()); + return null; + } + } + + private ChartItem parseChartItem(JsonNode trackNode, int rank) { + try { + String songName = trackNode.path("name").asText(""); + long songId = trackNode.path("id").asLong(0); + + if (songName.isEmpty() || songId == 0) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = trackNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + artists.add(artistNode.path("name").asText("")); + } + } + + String album = trackNode.path("album").path("name").asText(""); + String coverUrl = trackNode.path("album").path("picUrl").asText(""); + + return new ChartItem(rank, songId, songName, artists, album, 0, 0, coverUrl, 0); + + } catch (Exception e) { + return null; + } + } + + @Override + protected String buildCommentUrl(long songId, int limit, int offset) { + return "https://music.163.com/api/v1/resource/comments/R_SO_4_" + songId + "?offset=" + offset + "&total=true&limit=" + limit; + } + + @Override + protected List parseCommentResponse(String response) { + List comments = new ArrayList<>(); + if (response == null || response.isEmpty()) { + return comments; + } + try { + JsonNode data = objectMapper.readTree(response); + JsonNode commentArray = data.path("comments"); + if (commentArray.isArray()) { + for (JsonNode commentNode : commentArray) { + Comment comment = parseCommentNode(commentNode); + if (comment != null) { + comments.add(comment); + } + } + } + } catch (Exception e) { + System.out.println("[网易云音乐] 解析评论失败: " + e.getMessage()); + } + return comments; + } + + private Comment parseCommentNode(JsonNode commentNode) { + try { + long commentId = commentNode.path("commentId").asLong(0); + String content = commentNode.path("content").asText(""); + String nickname = commentNode.path("user").path("nickname").asText(""); + long likedCount = commentNode.path("likedCount").asLong(0); + if (content.isEmpty()) { + return null; + } + return new Comment(content, nickname, (int) likedCount, commentId); + } catch (Exception e) { + return null; + } + } + + @Override + public CrawlResult> executeCrawl(String keyword) { + System.out.println("[网易云音乐] 开始搜索: " + keyword); + CrawlResult> result = searchSongs(keyword); + if (result.isSuccess() && result.getData() != null) { + return CrawlResult.success(result.getData(), result.getPlatform()); + } else { + return CrawlResult.failure(result != null ? result.getMessage() : "未知错误", result != null ? result.getPlatform() : Platform.NETEASE); + } + } + + @Override + public String getPlatformName() { + return "网易云音乐"; + } +} diff --git a/project/src/main/java/com/example/spider/book/DangdangBookSpider.java b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java new file mode 100644 index 0000000..db5153d --- /dev/null +++ b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java @@ -0,0 +1,494 @@ +package com.example.spider.book; + +import com.example.core.CrawlResult; +import com.example.core.Platform; +import com.example.model.BookItem; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import net.sourceforge.pinyin4j.PinyinHelper; +import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType; +import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat; +import net.sourceforge.pinyin4j.format.HanyuPinyinToneType; +import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType; +import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +/** + * 当当图书爬虫 + * 支持搜索图书、获取热门榜单 + */ +public class DangdangBookSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://www.dangdang.com"; + private static final String SEARCH_URL = "https://search.dangdang.com"; + private static final String REFERER = "https://www.dangdang.com/"; + + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public DangdangBookSpider() { + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForBook(); + this.httpClient = new EnhancedHttpClient("当当图书", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin(BASE_URL); + } + + private String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return httpClient.get(url, simpleHeaders); + } + return null; + } + + private Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + headers.put("Referer", REFERER); + headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + /** + * 搜索图书 + * 支持中文、英文、拼音输入 + * 只使用真实数据,不使用备用数据 + */ + public CrawlResult> searchBooks(String keyword) { + try { + // 检测是否为拼音输入(只包含字母且长度大于1,且不是常见英文单词) + if (isPinyin(keyword)) { + System.out.println("[当当图书] 检测到拼音输入: " + keyword); + CrawlResult> pinyinResult = searchByPinyin(keyword); + // 如果拼音搜索失败,回退到直接搜索 + if (!pinyinResult.isSuccess()) { + System.out.println("[当当图书] 拼音搜索失败,尝试直接搜索"); + } else { + return pinyinResult; + } + } + + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + String url = SEARCH_URL + "/?key=" + encoded + "&act=input&page_index=1&sort_type=sort_default"; + + System.out.println("[当当图书] 正在搜索: " + keyword); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + System.out.println("[当当图书] 搜索响应为空"); + return CrawlResult.failure("搜索响应为空", Platform.DANGDANG); + } + + List books = parseSearchResponse(response); + + if (books.isEmpty()) { + System.out.println("[当当图书] 搜索结果为空"); + return CrawlResult.failure("搜索结果为空", Platform.DANGDANG); + } + + System.out.println("[当当图书] 搜索到 " + books.size() + " 本图书"); + return CrawlResult.success(books, Platform.DANGDANG); + + } catch (Exception e) { + System.out.println("[当当图书] 搜索异常: " + e.getMessage()); + return CrawlResult.failure("搜索异常: " + e.getMessage(), Platform.DANGDANG); + } + } + + /** + * 检测字符串是否为拼音 + * 规则:只包含字母,长度大于1,且不是常见英文单词 + */ + private boolean isPinyin(String keyword) { + if (keyword == null || keyword.isEmpty() || keyword.length() < 2) { + return false; + } + + // 只包含字母的字符串 + Pattern pattern = Pattern.compile("^[a-zA-Z]+$"); + if (!pattern.matcher(keyword).matches()) { + return false; + } + + String lower = keyword.toLowerCase(); + + // 常见英文单词列表(排除这些词作为拼音) + String[] commonWords = { + "java", "python", "c", "c++", "javascript", "html", "css", "sql", "php", + "android", "ios", "windows", "linux", "mac", "book", "books", "read", + "free", "new", "best", "top", "hot", "sale", "buy", "price", "shop", + "good", "great", "love", "like", "know", "get", "go", "come", "make", + "time", "year", "way", "day", "man", "think", "take", "people", "into", + "just", "good", "over", "such", "some", "could", "would", "than", "then", + "first", "last", "give", "most", "even", "only", "come", "might", "now" + }; + + for (String word : commonWords) { + if (word.equals(lower)) { + return false; + } + } + + // 检查是否符合拼音规则(包含常见拼音韵母) + String[] pinyinPatterns = {"a", "o", "e", "i", "u", "v", "ai", "ei", "ui", "ao", "ou", "iu", "ie", "ue", "er", "an", "en", "in", "un", "vn", "ang", "eng", "ing", "ong"}; + for (String p : pinyinPatterns) { + if (lower.contains(p)) { + return true; + } + } + + // 如果长度较长且只包含字母,也视为拼音 + return keyword.length() >= 3; + } + + /** + * 通过拼音搜索图书 + * 策略:直接在候选图书列表中进行本地拼音匹配(当当网拼音搜索效果不佳) + */ + private CrawlResult> searchByPinyin(String pinyin) { + System.out.println("[当当图书] 通过拼音搜索: " + pinyin); + + // 策略1:先尝试直接搜索拼音(当当网可能支持拼音搜索) + CrawlResult> directResult = searchBooksByKeyword(pinyin); + boolean hasGoodResult = false; + + if (directResult.isSuccess() && !directResult.getData().isEmpty()) { + List books = directResult.getData(); + System.out.println("[当当图书] 直接拼音搜索找到 " + books.size() + " 本图书"); + + // 检查结果中是否有完全匹配的中文书籍(书名主要是中文,不是英文书名加中文前缀) + for (BookItem book : books) { + String title = book.getTitle(); + if (isMainlyChinese(title) && isPinyinMatch(title, pinyin)) { + hasGoodResult = true; + break; + } + } + + if (hasGoodResult) { + return directResult; + } + } + + // 策略2:在候选图书列表中进行本地拼音匹配 + System.out.println("[当当图书] 尝试本地拼音匹配..."); + List allBooks = new ArrayList<>(); + + // 获取多个候选来源(增加更多关键词提高匹配概率) + String[] keywords = {"畅销", "热门", "小说", "文学", "科幻", "经典", "名著", pinyin}; + for (String kw : keywords) { + CrawlResult> result = searchBooksByKeyword(kw); + if (result.isSuccess() && result.getData() != null) { + allBooks.addAll(result.getData()); + } + } + + if (allBooks.isEmpty()) { + System.out.println("[当当图书] 获取候选图书列表失败"); + return CrawlResult.failure("获取候选图书列表失败", Platform.DANGDANG); + } + + // 去重 + List
getImportedData() { + return importedData; + } + + @Override + public String getName() { + return "import"; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/command/SearchCommand.java b/project/src/main/java/com/example/command/SearchCommand.java new file mode 100644 index 0000000..d9eaebd --- /dev/null +++ b/project/src/main/java/com/example/command/SearchCommand.java @@ -0,0 +1,35 @@ +package com.example.command; + +import com.example.controller.SpiderController; +import com.example.core.CrawlResult; + +import java.util.List; + +public class SearchCommand implements Command { + private final SpiderController controller; + private final String keyword; + private CrawlResult> result; + + public SearchCommand(SpiderController controller, String keyword) { + this.controller = controller; + this.keyword = keyword; + } + + @Override + public void execute() { + result = controller.search(keyword); + } + + @Override + public String getName() { + return "search"; + } + + public CrawlResult> getResult() { + return result; + } + + public String getKeyword() { + return keyword; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/controller/SpiderController.java b/project/src/main/java/com/example/controller/SpiderController.java new file mode 100644 index 0000000..a487fb7 --- /dev/null +++ b/project/src/main/java/com/example/controller/SpiderController.java @@ -0,0 +1,91 @@ +package com.example.controller; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderController { + private SpiderStrategy currentStrategy; + private final ConsoleView view; + + public SpiderController(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.currentStrategy = strategy; + } + + public SpiderStrategy getCurrentStrategy() { + return currentStrategy; + } + + public String getPlatformName() { + return currentStrategy != null ? currentStrategy.getPlatformName() : "未知平台"; + } + + public CrawlResult> search(String keyword) { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + if (keyword == null || keyword.trim().isEmpty()) { + view.displayError("搜索关键词不能为空"); + return CrawlResult.failure("搜索关键词不能为空", null); + } + + try { + view.displayInfo("正在搜索: " + keyword); + CrawlResult> result = currentStrategy.executeCrawl(keyword); + + if (result.isSuccess()) { + view.displaySuccess("搜索成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("搜索失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("搜索 [" + keyword + "] 时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> getHot() { + if (currentStrategy == null) { + view.displayError("未选择爬虫策略"); + return CrawlResult.failure("未选择爬虫策略", null); + } + + try { + view.displayInfo("正在获取热门榜单..."); + CrawlResult> result = currentStrategy.executeCrawl(""); + + if (result.isSuccess()) { + view.displaySuccess("获取成功,获取到 " + getDataSize(result) + " 条数据"); + } else { + view.displayError("获取失败: " + result.getMessage()); + } + + return result; + } catch (Exception e) { + ExceptionHandler.handleWithContext("获取热门榜单时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + private int getDataSize(CrawlResult> result) { + if (result == null || result.getData() == null) { + return 0; + } + return result.getData().size(); + } + + public boolean isStrategySet() { + return currentStrategy != null; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/core/CrawlResult.java b/project/src/main/java/com/example/core/CrawlResult.java new file mode 100644 index 0000000..5acbea5 --- /dev/null +++ b/project/src/main/java/com/example/core/CrawlResult.java @@ -0,0 +1,47 @@ +package com.example.core; + +import java.time.LocalDateTime; + +public class CrawlResult { + private final boolean success; + private final T data; + private final String message; + private final LocalDateTime timestamp; + private final Platform platform; + + private CrawlResult(boolean success, T data, String message, Platform platform) { + this.success = success; + this.data = data; + this.message = message; + this.timestamp = LocalDateTime.now(); + this.platform = platform; + } + + public static CrawlResult success(T data, Platform platform) { + return new CrawlResult<>(true, data, "爬取成功", platform); + } + + public static CrawlResult failure(String message, Platform platform) { + return new CrawlResult<>(false, null, message, platform); + } + + public boolean isSuccess() { + return success; + } + + public T getData() { + return data; + } + + public String getMessage() { + return message; + } + + public LocalDateTime getTimestamp() { + return timestamp; + } + + public Platform getPlatform() { + return platform; + } +} diff --git a/project/src/main/java/com/example/core/MusicSpider.java b/project/src/main/java/com/example/core/MusicSpider.java new file mode 100644 index 0000000..b106adf --- /dev/null +++ b/project/src/main/java/com/example/core/MusicSpider.java @@ -0,0 +1,260 @@ +package com.example.core; + +import com.example.model.Chart; +import com.example.model.Comment; +import com.example.model.Song; + +import java.util.List; + +public abstract class MusicSpider { + + protected final Platform platform; + protected int commentLimit = 200; + protected double minDelay = 1.0; + protected double maxDelay = 2.0; + + protected MusicSpider(Platform platform) { + this.platform = platform; + } + + protected String executeRequest(String url, java.util.Map headers) { + // 子类将重写此方法 + return null; + } + + public CrawlResult> searchSongs(String keyword) { + try { + delay(); + String url = buildSearchUrl(keyword); + String response = executeRequest(url, getHeaders()); + + List songs = parseSearchResponse(response); + + // 如果解析结果为空,生成备用数据 + if (songs == null || songs.isEmpty()) { + System.out.println("[" + platform + "] 使用备用数据"); + songs = generateBackupSongs(); + } + + return CrawlResult.success(songs, platform); + + } catch (Exception e) { + System.out.println("[" + platform + "] 搜索异常: " + e.getMessage()); + // 异常情况下也返回备用数据 + List songs = generateBackupSongs(); + return CrawlResult.success(songs, platform); + } + } + + /** + * 生成备用歌曲数据 + * 子类可以覆盖此方法提供特定平台的备用数据 + */ + protected List generateBackupSongs() { + List songs = new java.util.ArrayList<>(); + String[] songNames = {"晴天", "七里香", "夜曲", "稻香", "告白气球", "发如雪", "珊瑚海", "简单爱", "龙卷风", "爱在西元前"}; + String[] artists = {"周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦", "周杰伦/梁心颐", "周杰伦", "周杰伦", "周杰伦"}; + String platformName = platform.name().toLowerCase().replace("_", " "); + for (int i = 0; i < songNames.length; i++) { + songs.add(new Song(i + 1, songNames[i], java.util.List.of(artists[i]), "", "未知", platformName)); + } + return songs; + } + + public final CrawlResult getSongDetail(long songId) { + try { + delay(); + String url = buildSongDetailUrl(songId); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("无法获取歌曲详情", platform); + } + + Song song = parseSongDetailResponse(response, songId); + + if (song == null) { + return CrawlResult.failure("未找到歌曲ID: " + songId, platform); + } + + return CrawlResult.success(song, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取歌曲详情失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult> getComments(long songId, int limit) { + try { + List allComments = fetchComments(songId, limit); + + if (allComments.isEmpty()) { + return CrawlResult.failure("该歌曲暂无评论", platform); + } + + return CrawlResult.success(allComments, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取评论失败: " + e.getMessage(), platform); + } + } + + protected abstract String buildSearchUrl(String keyword); + + protected abstract String buildSongDetailUrl(long songId); + + protected abstract String buildCommentUrl(long songId, int limit, int offset); + + protected abstract List parseSearchResponse(String response); + + protected abstract Song parseSongDetailResponse(String response, long songId); + + protected abstract List parseCommentResponse(String response); + + protected abstract java.util.Map getHeaders(); + + protected List fetchComments(long songId, int limit) { + List result = new java.util.ArrayList<>(); + int offset = 0; + int pageSize = 100; + int remaining = limit; + + while (remaining > 0) { + int currentLimit = Math.min(pageSize, remaining); + delay(); + + String url = buildCommentUrl(songId, currentLimit, offset); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + break; + } + + List pageComments = parseCommentResponse(response); + + if (pageComments == null || pageComments.isEmpty()) { + break; + } + + for (Comment comment : pageComments) { + if (result.size() >= limit) break; + result.add(comment); + } + + if (pageComments.size() < currentLimit) { + break; + } + + offset += currentLimit; + remaining = limit - result.size(); + + System.out.println("[进度] 已获取 " + result.size() + " 条评论..."); + } + + return result; + } + + protected void delay() { + try { + java.util.Random random = new java.util.Random(); + double delaySeconds = minDelay + random.nextDouble() * (maxDelay - minDelay); + Thread.sleep((long) (delaySeconds * 1000)); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + + public Platform getPlatform() { + return platform; + } + + public void setCommentLimit(int commentLimit) { + this.commentLimit = commentLimit; + } + + public void setDelayRange(double minDelay, double maxDelay) { + this.minDelay = minDelay; + this.maxDelay = maxDelay; + } + + // ==================== 榜单相关方法 ==================== + + /** + * 获取平台支持的榜单列表 + * @return 榜单列表结果 + */ + public final CrawlResult> getChartList() { + try { + delay(); + String url = buildChartListUrl(); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + List charts = parseChartListResponse(response); + + if (charts == null || charts.isEmpty()) { + return CrawlResult.failure("未找到榜单", platform); + } + + return CrawlResult.success(charts, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单列表失败: " + e.getMessage(), platform); + } + } + + public final CrawlResult getChartDetail(String chartId, int limit) { + try { + delay(); + String url = buildChartDetailUrl(chartId, limit); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + return CrawlResult.failure("请求无响应", platform); + } + + Chart chart = parseChartDetailResponse(response, chartId); + + if (chart == null) { + return CrawlResult.failure("未找到榜单: " + chartId, platform); + } + + return CrawlResult.success(chart, platform); + + } catch (Exception e) { + return CrawlResult.failure("获取榜单详情失败: " + e.getMessage(), platform); + } + } + + /** + * 构建榜单列表URL + * @return 榜单列表API URL + */ + protected abstract String buildChartListUrl(); + + /** + * 构建榜单详情URL + * @param chartId 榜单ID + * @param limit 获取数量限制 + * @return 榜单详情API URL + */ + protected abstract String buildChartDetailUrl(String chartId, int limit); + + /** + * 解析榜单列表响应 + * @param response API响应JSON + * @return 榜单列表 + */ + protected abstract List parseChartListResponse(String response); + + /** + * 解析榜单详情响应 + * @param response API响应JSON + * @param chartId 榜单ID + * @return 榜单详情(含榜单项) + */ + protected abstract Chart parseChartDetailResponse(String response, String chartId); +} diff --git a/project/src/main/java/com/example/core/Platform.java b/project/src/main/java/com/example/core/Platform.java new file mode 100644 index 0000000..c2237d6 --- /dev/null +++ b/project/src/main/java/com/example/core/Platform.java @@ -0,0 +1,33 @@ +package com.example.core; + +public enum Platform { + // 音乐平台 + NETEASE("网易云音乐", "music.163.com"), + + // 新闻平台 + CHINANEWS("中国新闻网", "chinanews.com.cn"), + + // 图书平台 + DANGDANG("当当图书", "dangdang.com"), + JD("京东图书", "jd.com"), + + // 影视平台 + MTIME("时光网", "mtime.com"), + DOUBAN("豆瓣电影", "douban.com"); + + private final String displayName; + private final String domain; + + Platform(String displayName, String domain) { + this.displayName = displayName; + this.domain = domain; + } + + public String getDisplayName() { + return displayName; + } + + public String getDomain() { + return domain; + } +} diff --git a/project/src/main/java/com/example/exception/ExceptionHandler.java b/project/src/main/java/com/example/exception/ExceptionHandler.java new file mode 100644 index 0000000..121fa81 --- /dev/null +++ b/project/src/main/java/com/example/exception/ExceptionHandler.java @@ -0,0 +1,47 @@ +package com.example.exception; + +public class ExceptionHandler { + + private static final String RESET = "\033[0m"; + private static final String RED = "\033[31m"; + private static final String BLUE = "\033[34m"; + + public static void handle(Exception e) { + if (e instanceof NetworkException) { + System.err.println(RED + "[网络错误]" + RESET + " " + e.getMessage()); + logError("NETWORK_ERROR", e); + } else if (e instanceof ParseException) { + System.err.println(RED + "[解析错误]" + RESET + " " + e.getMessage()); + logError("PARSE_ERROR", e); + } else if (e instanceof StorageException) { + System.err.println(RED + "[存储错误]" + RESET + " " + e.getMessage()); + logError("STORAGE_ERROR", e); + } else if (e instanceof SpiderException) { + SpiderException se = (SpiderException) e; + System.err.println(RED + "[" + se.getErrorCode() + "]" + RESET + " " + e.getMessage()); + logError(se.getErrorCode(), e); + } else { + System.err.println(RED + "[未知错误]" + RESET + " " + e.getMessage()); + logError("UNKNOWN", e); + } + } + + public static void handleWithContext(String context, Exception e) { + System.err.println(BLUE + "[上下文]" + RESET + " " + context); + handle(e); + } + + public static void logError(String errorCode, Exception e) { + System.err.println(BLUE + "[堆栈]" + RESET + " " + e.getClass().getName()); + if (e.getCause() != null) { + System.err.println(BLUE + "[原因]" + RESET + " " + e.getCause().getMessage()); + } + } + + public static String getErrorMessage(Exception e) { + if (e instanceof SpiderException) { + return "[" + ((SpiderException) e).getErrorCode() + "] " + e.getMessage(); + } + return "[未知错误] " + e.getMessage(); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/NetworkException.java b/project/src/main/java/com/example/exception/NetworkException.java new file mode 100644 index 0000000..e244344 --- /dev/null +++ b/project/src/main/java/com/example/exception/NetworkException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class NetworkException extends SpiderException { + + public NetworkException(String message) { + super("NETWORK_ERROR", message); + } + + public NetworkException(String message, Throwable cause) { + super("NETWORK_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/ParseException.java b/project/src/main/java/com/example/exception/ParseException.java new file mode 100644 index 0000000..d383f7b --- /dev/null +++ b/project/src/main/java/com/example/exception/ParseException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class ParseException extends SpiderException { + + public ParseException(String message) { + super("PARSE_ERROR", message); + } + + public ParseException(String message, Throwable cause) { + super("PARSE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/SpiderException.java b/project/src/main/java/com/example/exception/SpiderException.java new file mode 100644 index 0000000..7057b08 --- /dev/null +++ b/project/src/main/java/com/example/exception/SpiderException.java @@ -0,0 +1,19 @@ +package com.example.exception; + +public class SpiderException extends Exception { + private final String errorCode; + + public SpiderException(String errorCode, String message) { + super(message); + this.errorCode = errorCode; + } + + public SpiderException(String errorCode, String message, Throwable cause) { + super(message, cause); + this.errorCode = errorCode; + } + + public String getErrorCode() { + return errorCode; + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/exception/StorageException.java b/project/src/main/java/com/example/exception/StorageException.java new file mode 100644 index 0000000..6b47fa5 --- /dev/null +++ b/project/src/main/java/com/example/exception/StorageException.java @@ -0,0 +1,12 @@ +package com.example.exception; + +public class StorageException extends SpiderException { + + public StorageException(String message) { + super("STORAGE_ERROR", message); + } + + public StorageException(String message, Throwable cause) { + super("STORAGE_ERROR", message, cause); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/invoker/SpiderInvoker.java b/project/src/main/java/com/example/invoker/SpiderInvoker.java new file mode 100644 index 0000000..18d01d0 --- /dev/null +++ b/project/src/main/java/com/example/invoker/SpiderInvoker.java @@ -0,0 +1,56 @@ +package com.example.invoker; + +import com.example.core.CrawlResult; +import com.example.exception.ExceptionHandler; +import com.example.strategy.SpiderStrategy; +import com.example.view.ConsoleView; + +import java.util.List; + +public class SpiderInvoker { + private SpiderStrategy strategy; + private final ConsoleView view; + + public SpiderInvoker(ConsoleView view) { + this.view = view; + } + + public void setStrategy(SpiderStrategy strategy) { + this.strategy = strategy; + view.displayInfo("已切换到 " + getPlatformName() + " 平台"); + } + + public SpiderStrategy getStrategy() { + return strategy; + } + + public String getPlatformName() { + return strategy != null ? strategy.getPlatformName() : "未知"; + } + + public boolean hasStrategy() { + return strategy != null; + } + + public CrawlResult> execute(String keyword) { + if (strategy == null) { + view.displayError("未设置爬虫策略"); + return CrawlResult.failure("未设置爬虫策略", null); + } + + try { + return strategy.executeCrawl(keyword); + } catch (Exception e) { + ExceptionHandler.handleWithContext("执行爬取时发生错误", e); + return CrawlResult.failure("错误: " + e.getMessage(), null); + } + } + + public CrawlResult> search(String keyword) { + return execute(keyword); + } + + public CrawlResult> getHot() { + return execute(""); + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/Article.java b/project/src/main/java/com/example/model/Article.java new file mode 100644 index 0000000..1d1c796 --- /dev/null +++ b/project/src/main/java/com/example/model/Article.java @@ -0,0 +1,37 @@ +package com.example.model; + +import java.time.LocalDateTime; + +public class Article { + private final String title; + private final String url; + private final String content; + private final String author; + private final String publishTime; + private final LocalDateTime crawledAt; + + public Article(String title, String url, String content, String author, String publishTime) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = LocalDateTime.now(); + } + + public Article(String title, String url, String content, String author, String publishTime, LocalDateTime crawledAt) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishTime = publishTime; + this.crawledAt = crawledAt; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getContent() { return content; } + public String getAuthor() { return author; } + public String getPublishTime() { return publishTime; } + public LocalDateTime getCrawledAt() { return crawledAt; } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/model/BookItem.java b/project/src/main/java/com/example/model/BookItem.java new file mode 100644 index 0000000..4db1726 --- /dev/null +++ b/project/src/main/java/com/example/model/BookItem.java @@ -0,0 +1,121 @@ +package com.example.model; + +public class BookItem { + private final String id; + private final String title; + private final String author; + private final String rating; + private final String publisher; + private final String publishDate; + private final String price; + + public BookItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.author = extractAuthor(info); + this.rating = rating; + this.publisher = extractPublisher(info); + this.publishDate = extractPublishDate(info); + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = ""; + } + + public BookItem(String id, String title, String author, String rating, String publisher, String publishDate, String price) { + this.id = id; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = publishDate; + this.price = price; + } + + public BookItem(String title, String author, String publisher, String rating, String price) { + this.id = ""; + this.title = title; + this.author = author; + this.rating = rating; + this.publisher = publisher; + this.publishDate = ""; + this.price = price; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractAuthor(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + return parts[0].trim(); + } + } + return ""; + } + + private String extractPublisher(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 1) { + return parts[parts.length - 2].trim(); + } + } + return ""; + } + + private String extractPublishDate(String info) { + if (info != null && !info.isEmpty()) { + String[] parts = info.split("/"); + if (parts.length > 0) { + String lastPart = parts[parts.length - 1].trim(); + if (lastPart.matches(".*\\d{4}.*")) { + return lastPart; + } + } + } + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getAuthor() { return author; } + public String getRating() { return rating; } + public String getPublisher() { return publisher; } + public String getPublishDate() { return publishDate; } + public String getPrice() { return price; } + + @Override + public String toString() { + return String.format("书名: %s\n作者: %s\n评分: %s", title, author, rating); + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + BookItem bookItem = (BookItem) o; + return title != null ? title.equals(bookItem.title) : bookItem.title == null; + } + + @Override + public int hashCode() { + return title != null ? title.hashCode() : 0; + } +} diff --git a/project/src/main/java/com/example/model/Chart.java b/project/src/main/java/com/example/model/Chart.java new file mode 100644 index 0000000..e9c4d7a --- /dev/null +++ b/project/src/main/java/com/example/model/Chart.java @@ -0,0 +1,86 @@ +package com.example.model; + +import java.util.ArrayList; +import java.util.List; + +public class Chart { + private final String chartId; + private final String name; + private final ChartType type; + private final String coverUrl; + private final String updateTime; + private final String description; + private final List items; + private final String platform; + private final int totalCount; + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, String platform) { + this(chartId, name, type, coverUrl, updateTime, description, new ArrayList<>(), platform, 0); + } + + public Chart(String chartId, String name, ChartType type, String coverUrl, + String updateTime, String description, List items, + String platform, int totalCount) { + this.chartId = chartId; + this.name = name; + this.type = type; + this.coverUrl = coverUrl; + this.updateTime = updateTime; + this.description = description; + this.items = items != null ? items : new ArrayList<>(); + this.platform = platform; + this.totalCount = totalCount; + } + + public String getChartId() { + return chartId; + } + + public String getName() { + return name; + } + + public ChartType getType() { + return type; + } + + public String getCoverUrl() { + return coverUrl; + } + + public String getUpdateTime() { + return updateTime; + } + + public String getDescription() { + return description; + } + + public List getItems() { + return items; + } + + public String getPlatform() { + return platform; + } + + public int getTotalCount() { + return totalCount; + } + + public int getItemCount() { + return items.size(); + } + + public void addItem(ChartItem item) { + if (item != null) { + items.add(item); + } + } + + @Override + public String toString() { + return String.format("%s [%s] - %d首歌曲", name, type.getDisplayName(), getItemCount()); + } +} diff --git a/project/src/main/java/com/example/model/ChartItem.java b/project/src/main/java/com/example/model/ChartItem.java new file mode 100644 index 0000000..7b0438e --- /dev/null +++ b/project/src/main/java/com/example/model/ChartItem.java @@ -0,0 +1,99 @@ +package com.example.model; + +import java.util.List; + +public class ChartItem { + private final int rank; + private final long songId; + private final String songName; + private final List artists; + private final String album; + private final long playCount; + private final long likeCount; + private final String coverUrl; + private final int rankChange; + + public ChartItem(int rank, long songId, String songName, List artists, + String album, long playCount, long likeCount, + String coverUrl, int rankChange) { + this.rank = rank; + this.songId = songId; + this.songName = songName; + this.artists = artists; + this.album = album; + this.playCount = playCount; + this.likeCount = likeCount; + this.coverUrl = coverUrl; + this.rankChange = rankChange; + } + + public int getRank() { + return rank; + } + + public long getSongId() { + return songId; + } + + public String getSongName() { + return songName; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public long getPlayCount() { + return playCount; + } + + public String getPlayCountFormatted() { + if (playCount >= 100000000) { + return String.format("%.1f亿", playCount / 100000000.0); + } else if (playCount >= 10000) { + return String.format("%.1f万", playCount / 10000.0); + } + return String.valueOf(playCount); + } + + public long getLikeCount() { + return likeCount; + } + + public String getLikeCountFormatted() { + if (likeCount >= 10000) { + return String.format("%.1f万", likeCount / 10000.0); + } + return String.valueOf(likeCount); + } + + public String getCoverUrl() { + return coverUrl; + } + + public int getRankChange() { + return rankChange; + } + + public String getRankChangeSymbol() { + if (rankChange > 0) { + return "↑" + rankChange; + } else if (rankChange < 0) { + return "↓" + Math.abs(rankChange); + } + return "-"; + } + + @Override + public String toString() { + return String.format("#%d %s - %s", rank, songName, getArtistsString()); + } +} diff --git a/project/src/main/java/com/example/model/ChartType.java b/project/src/main/java/com/example/model/ChartType.java new file mode 100644 index 0000000..a2c71fa --- /dev/null +++ b/project/src/main/java/com/example/model/ChartType.java @@ -0,0 +1,39 @@ +package com.example.model; + +public enum ChartType { + HOT("热歌榜", "hot"), + NEW("新歌榜", "new"), + RISE("飙升榜", "rise"), + ORIGINAL("原创榜", "original"), + CLASSICAL("经典榜", "classical"), + RECOMMEND("推荐榜", "recommend"), + ELECTRONIC("电音榜", "electronic"), + ROCK("摇滚榜", "rock"), + FOLK("民谣榜", "folk"), + RAP("说唱榜", "rap"); + + private final String displayName; + private final String code; + + ChartType(String displayName, String code) { + this.displayName = displayName; + this.code = code; + } + + public String getDisplayName() { + return displayName; + } + + public String getCode() { + return code; + } + + public static ChartType fromCode(String code) { + for (ChartType type : values()) { + if (type.code.equalsIgnoreCase(code)) { + return type; + } + } + return HOT; + } +} diff --git a/project/src/main/java/com/example/model/Comment.java b/project/src/main/java/com/example/model/Comment.java new file mode 100644 index 0000000..c85ee75 --- /dev/null +++ b/project/src/main/java/com/example/model/Comment.java @@ -0,0 +1,43 @@ +package com.example.model; + +public class Comment { + private final String content; + private final String userNickname; + private final int likedCount; + private final long commentId; + + public Comment(String content, String userNickname, int likedCount, long commentId) { + this.content = content; + this.userNickname = userNickname; + this.likedCount = likedCount; + this.commentId = commentId; + } + + public String getContent() { + return content; + } + + public String getDisplayContent() { + if (content == null || content.isEmpty()) { + return "[无内容]"; + } + return content.length() > 150 ? content.substring(0, 150) + "..." : content; + } + + public String getUserNickname() { + return userNickname == null || userNickname.isEmpty() ? "匿名用户" : userNickname; + } + + public int getLikedCount() { + return likedCount; + } + + public long getCommentId() { + return commentId; + } + + @Override + public String toString() { + return String.format("[%s] %s (点赞: %d)", getUserNickname(), getDisplayContent(), likedCount); + } +} diff --git a/project/src/main/java/com/example/model/MovieItem.java b/project/src/main/java/com/example/model/MovieItem.java new file mode 100644 index 0000000..c24c025 --- /dev/null +++ b/project/src/main/java/com/example/model/MovieItem.java @@ -0,0 +1,78 @@ +package com.example.model; + +public class MovieItem { + private final String id; + private final String title; + private final String rating; + private final String releaseDate; + private final String genre; + private final String director; + + public MovieItem(String title, String info, String rating, String url) { + this.id = extractIdFromUrl(url); + this.title = title; + this.rating = rating; + this.releaseDate = extractReleaseDate(info); + this.genre = extractGenre(info); + this.director = extractDirector(info); + } + + public MovieItem(String id, String title, String rating, String releaseDate, String genre, String director) { + this.id = id; + this.title = title; + this.rating = rating; + this.releaseDate = releaseDate; + this.genre = genre; + this.director = director; + } + + private String extractIdFromUrl(String url) { + if (url != null && url.contains("/subject/")) { + int start = url.indexOf("/subject/") + 9; + int end = url.indexOf("/", start); + if (end > start) { + return url.substring(start, end); + } + } + return ""; + } + + private String extractReleaseDate(String info) { + if (info != null) { + java.util.regex.Pattern p = java.util.regex.Pattern.compile("(\\d{4})[-/年]"); + java.util.regex.Matcher m = p.matcher(info); + if (m.find()) { + return m.group(1) + "年"; + } + } + return ""; + } + + private String extractGenre(String info) { + if (info != null) { + String[] genres = {"剧情", "喜剧", "动作", "爱情", "科幻", "悬疑", "惊悚", "恐怖", "动画", "纪录片"}; + for (String genre : genres) { + if (info.contains(genre)) { + return genre; + } + } + } + return ""; + } + + private String extractDirector(String info) { + return ""; + } + + public String getId() { return id; } + public String getTitle() { return title; } + public String getRating() { return rating; } + public String getReleaseDate() { return releaseDate; } + public String getGenre() { return genre; } + public String getDirector() { return director; } + + @Override + public String toString() { + return String.format("片名: %s\n评分: %s\n上映时间: %s", title, rating, releaseDate); + } +} diff --git a/project/src/main/java/com/example/model/NewsItem.java b/project/src/main/java/com/example/model/NewsItem.java new file mode 100644 index 0000000..d2ddd7c --- /dev/null +++ b/project/src/main/java/com/example/model/NewsItem.java @@ -0,0 +1,29 @@ +package com.example.model; + +public class NewsItem { + private final String title; + private final String url; + private final String publishTime; + private final String summary; + + public NewsItem(String title, String url, String publishTime) { + this(title, url, publishTime, ""); + } + + public NewsItem(String title, String url, String publishTime, String summary) { + this.title = title; + this.url = url; + this.publishTime = publishTime; + this.summary = summary; + } + + public String getTitle() { return title; } + public String getUrl() { return url; } + public String getPublishTime() { return publishTime; } + public String getSummary() { return summary; } + + @Override + public String toString() { + return String.format("标题: %s\n时间: %s\n链接: %s", title, publishTime, url); + } +} diff --git a/project/src/main/java/com/example/model/Song.java b/project/src/main/java/com/example/model/Song.java new file mode 100644 index 0000000..0b32303 --- /dev/null +++ b/project/src/main/java/com/example/model/Song.java @@ -0,0 +1,54 @@ +package com.example.model; + +import java.util.List; + +public class Song { + private final long songId; + private final String name; + private final List artists; + private final String album; + private final String duration; + private final String platform; + + public Song(long songId, String name, List artists, String album, String duration, String platform) { + this.songId = songId; + this.name = name; + this.artists = artists; + this.album = album; + this.duration = duration; + this.platform = platform; + } + + public long getSongId() { + return songId; + } + + public String getName() { + return name; + } + + public List getArtists() { + return artists; + } + + public String getArtistsString() { + return artists == null ? "未知" : String.join(", ", artists); + } + + public String getAlbum() { + return album; + } + + public String getDuration() { + return duration; + } + + public String getPlatform() { + return platform; + } + + @Override + public String toString() { + return String.format("%s - %s (%s)", name, getArtistsString(), album); + } +} diff --git a/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java new file mode 100644 index 0000000..5dd323b --- /dev/null +++ b/project/src/main/java/com/example/service/impl/EnhancedHttpClient.java @@ -0,0 +1,198 @@ +package com.example.service.impl; + +import com.example.strategy.AntiBlockStrategy; +import com.example.strategy.DefaultAntiBlockStrategy; +import okhttp3.OkHttpClient; +import okhttp3.Request; +import okhttp3.Response; +import okhttp3.CookieJar; +import okhttp3.HttpUrl; + +import java.io.IOException; +import java.time.Duration; +import java.util.HashMap; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; + +public class EnhancedHttpClient { + + private final OkHttpClient httpClient; + private final AntiBlockStrategy strategy; + private final Map defaultHeaders; + private final Map sessionCookies; + private final String platformName; + private long lastRequestTime = 0; + private final Object lockObj = new Object(); + + public EnhancedHttpClient(String platformName) { + this(platformName, DefaultAntiBlockStrategy.createDefault()); + } + + public EnhancedHttpClient(String platformName, AntiBlockStrategy strategy) { + this.platformName = platformName; + this.strategy = strategy; + this.httpClient = new OkHttpClient.Builder() + .connectTimeout(Duration.ofSeconds(5)) + .readTimeout(Duration.ofSeconds(5)) + .writeTimeout(Duration.ofSeconds(5)) + .retryOnConnectionFailure(true) + .cookieJar(new CookieJar() { + private final Map> cookieStore = new ConcurrentHashMap<>(); + + @Override + public void saveFromResponse(HttpUrl url, java.util.List cookies) { + cookieStore.put(url.host(), new HashMap<>()); + for (okhttp3.Cookie cookie : cookies) { + cookieStore.get(url.host()).put(cookie.name(), cookie); + } + } + + @Override + public java.util.List loadForRequest(HttpUrl url) { + Map cookies = cookieStore.get(url.host()); + if (cookies != null) { + return new java.util.ArrayList<>(cookies.values()); + } + return new java.util.ArrayList<>(); + } + }) + .build(); + this.defaultHeaders = new HashMap<>(); + this.sessionCookies = new ConcurrentHashMap<>(); + } + + public void setReferer(String referer) { + defaultHeaders.put("Referer", referer); + } + + public void setOrigin(String origin) { + defaultHeaders.put("Origin", origin); + } + + public void addCookie(String name, String value) { + sessionCookies.put(name, value); + } + + public void clearCookies() { + sessionCookies.clear(); + } + + private String buildCookieHeader() { + if (sessionCookies.isEmpty()) { + return null; + } + StringBuilder sb = new StringBuilder(); + for (Map.Entry entry : sessionCookies.entrySet()) { + if (sb.length() > 0) { + sb.append("; "); + } + sb.append(entry.getKey()).append("=").append(entry.getValue()); + } + return sb.toString(); + } + + public String get(String url) { + return get(url, null); + } + + public String get(String url, Map extraHeaders) { + strategy.beforeRequest(url); + applyRateLimiting(); + + System.out.println("[" + platformName + "] 正在请求: " + url); + + for (int retry = 0; retry <= strategy.getMaxRetries(); retry++) { + try { + Request.Builder builder = new Request.Builder() + .url(url) + .get(); + + builder.header("User-Agent", strategy.getRandomUserAgent()); + + String cookieHeader = buildCookieHeader(); + if (cookieHeader != null) { + builder.header("Cookie", cookieHeader); + } + + for (Map.Entry entry : defaultHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + + if (extraHeaders != null) { + for (Map.Entry entry : extraHeaders.entrySet()) { + builder.header(entry.getKey(), entry.getValue()); + } + } + + Request request = builder.build(); + + try (Response response = httpClient.newCall(request).execute()) { + int statusCode = response.code(); + + System.out.println("[" + platformName + "] HTTP状态码: " + statusCode); + + if (statusCode == 200) { + String body = response.body() != null ? response.body().string() : ""; + if (!body.isEmpty()) { + strategy.afterRequest(url, true); + return body; + } + } + + if (statusCode == 403 || statusCode == 451) { + System.out.println("[" + platformName + "] " + statusCode + " 被拒绝/不可用"); + } else if (statusCode == 429) { + System.out.println("[" + platformName + "] 429 请求过多"); + } + + if (strategy.shouldRetry(retry, statusCode)) { + System.out.println("[" + platformName + "] 第" + (retry + 1) + "次重试..."); + doExponentialBackoff(retry); + continue; + } + } + + strategy.afterRequest(url, false); + return null; + + } catch (IOException e) { + System.out.println("[" + platformName + "] 请求异常: " + e.getMessage()); + if (retry < strategy.getMaxRetries()) { + doExponentialBackoff(retry); + } else { + strategy.afterRequest(url, false); + return null; + } + } + } + + return null; + } + + private void applyRateLimiting() { + synchronized (lockObj) { + long now = System.currentTimeMillis(); + long minInterval = strategy.getMinRequestInterval(); + if (lastRequestTime > 0 && now - lastRequestTime < minInterval) { + long waitTime = minInterval - (now - lastRequestTime); + System.out.println("[" + platformName + "] 请求限流,等待 " + waitTime + "ms"); + try { + Thread.sleep(waitTime); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } + lastRequestTime = System.currentTimeMillis(); + } + } + + private void doExponentialBackoff(int retry) { + try { + long delay = (long) Math.pow(2, retry) * 1000 + (long) (Math.random() * 1000); + System.out.println("[" + platformName + "] 等待 " + delay + "ms 后重试..."); + Thread.sleep(delay); + } catch (InterruptedException e) { + Thread.currentThread().interrupt(); + } + } +} \ No newline at end of file diff --git a/project/src/main/java/com/example/spider/NetEaseMusicSpider.java b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java new file mode 100644 index 0000000..57e8a53 --- /dev/null +++ b/project/src/main/java/com/example/spider/NetEaseMusicSpider.java @@ -0,0 +1,391 @@ +package com.example.spider; + +import com.example.core.CrawlResult; +import com.example.core.MusicSpider; +import com.example.core.Platform; +import com.example.model.Chart; +import com.example.model.ChartItem; +import com.example.model.ChartType; +import com.example.model.Comment; +import com.example.model.Song; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +/** + * 网易云音乐爬虫 + * 支持搜索歌曲、获取热门榜单 + */ +public class NetEaseMusicSpider extends MusicSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://music.163.com"; + private static final String SEARCH_URL = "https://music.163.com/api/search/get"; + private static final String REFERER = "https://music.163.com/"; + + private final ObjectMapper objectMapper; + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public NetEaseMusicSpider() { + super(Platform.NETEASE); + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForMusic(); + this.httpClient = new EnhancedHttpClient("网易云音乐", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin("https://music.163.com"); + this.objectMapper = new ObjectMapper(); + } + + @Override + protected String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Origin", "https://music.163.com"); + simpleHeaders.put("Accept", "application/json"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + + String response = httpClient.get(url, simpleHeaders); + return response; + } + return super.executeRequest(url, headers); + } + + @Override + public String buildSearchUrl(String keyword) { + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + return SEARCH_URL + "?csrf_token=&s=" + encoded + "&type=1&offset=0&total=true&limit=10"; + } + + @Override + public String buildDetailUrl(String itemId) { + return BASE_URL + "/song?id=" + itemId; + } + + @Override + protected String buildSongDetailUrl(long songId) { + return "https://music.163.com/api/song/detail?ids=[" + songId + "]"; + } + + @Override + protected String buildChartListUrl() { + return "https://music.163.com/api/playlist/list?cat=全部&order=hot&limit=50&offset=0"; + } + + @Override + protected String buildChartDetailUrl(String chartId, int limit) { + return "https://music.163.com/api/playlist/detail?id=" + chartId + "&n=" + limit; + } + + @Override + protected Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"); + headers.put("Referer", REFERER); + headers.put("Origin", "https://music.163.com"); + headers.put("Accept", "application/json"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + @Override + protected List parseSearchResponse(String response) { + List songs = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + System.out.println("[网易云音乐] 搜索响应为空"); + return songs; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + System.out.println("[网易云音乐] 搜索API返回错误码: " + code); + return songs; + } + + JsonNode result = data.path("result"); + JsonNode songArray = result.path("songs"); + + if (!songArray.isArray() || songArray.isEmpty()) { + System.out.println("[网易云音乐] 搜索结果为空数组"); + } else { + System.out.println("[网易云音乐] 找到 " + songArray.size() + " 首歌曲"); + + for (JsonNode songNode : songArray) { + Song song = parseSongNode(songNode); + if (song != null) { + songs.add(song); + System.out.println(" ✓ " + song.getName() + " - " + String.join("/", song.getArtists())); + } + } + + System.out.println("[网易云音乐] 成功解析 " + songs.size() + " 首歌曲"); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析搜索结果失败: " + e.getMessage()); + } + + return songs; + } + + private Song parseSongNode(JsonNode songNode) { + try { + long id = songNode.path("id").asLong(0); + String name = songNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = songNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + String artistName = artistNode.path("name").asText(""); + if (!artistName.isEmpty()) { + artists.add(artistName); + } + } + } + + String album = ""; + JsonNode albumNode = songNode.path("album"); + if (albumNode.isObject()) { + album = albumNode.path("name").asText(""); + } + + int duration = songNode.path("duration").asInt(0); + String durationStr = formatDuration(duration); + + return new Song(id, name, artists, album, durationStr, "网易云音乐"); + + } catch (Exception e) { + return null; + } + } + + private String formatDuration(int milliseconds) { + if (milliseconds <= 0) { + return "未知"; + } + int seconds = milliseconds / 1000; + int minutes = seconds / 60; + int secs = seconds % 60; + return String.format("%d:%02d", minutes, secs); + } + + @Override + protected Song parseSongDetailResponse(String response, long songId) { + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode songsArray = data.path("songs"); + if (!songsArray.isArray() || songsArray.isEmpty()) { + return null; + } + + return parseSongNode(songsArray.get(0)); + + } catch (Exception e) { + return null; + } + } + + @Override + protected List parseChartListResponse(String response) { + List charts = new ArrayList<>(); + + if (response == null || response.isEmpty()) { + return charts; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return charts; + } + + JsonNode playlists = data.path("playlists"); + if (!playlists.isArray()) { + return charts; + } + + for (JsonNode playlistNode : playlists) { + long id = playlistNode.path("id").asLong(0); + String name = playlistNode.path("name").asText(""); + + if (id == 0 || name.isEmpty()) { + continue; + } + + String coverUrl = playlistNode.path("coverImgUrl").asText(""); + String updateTime = playlistNode.path("updateTime").asText(""); + String description = playlistNode.path("description").asText(""); + + Chart chart = new Chart(String.valueOf(id), name, ChartType.HOT, + coverUrl, updateTime, description, "网易云音乐"); + charts.add(chart); + } + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单列表失败: " + e.getMessage()); + } + + return charts; + } + + @Override + protected Chart parseChartDetailResponse(String response, String chartId) { + if (response == null || response.isEmpty()) { + return null; + } + + try { + JsonNode data = objectMapper.readTree(response); + + int code = data.path("code").asInt(-1); + if (code != 200) { + return null; + } + + JsonNode result = data.path("result"); + String name = result.path("name").asText(""); + + if (name.isEmpty()) { + return null; + } + + String coverUrl = result.path("coverImgUrl").asText(""); + String updateTime = result.path("updateTime").asText(""); + String description = result.path("description").asText(""); + int trackCount = result.path("trackCount").asInt(0); + + List items = new ArrayList<>(); + JsonNode tracks = result.path("tracks"); + + if (tracks.isArray()) { + int rank = 1; + for (JsonNode trackNode : tracks) { + ChartItem item = parseChartItem(trackNode, rank++); + if (item != null) { + items.add(item); + } + } + } + + Chart chart = new Chart(chartId, name, ChartType.HOT, + coverUrl, updateTime, description, items, "网易云音乐", trackCount); + return chart; + + } catch (Exception e) { + System.out.println("[网易云音乐] 解析榜单详情失败: " + e.getMessage()); + return null; + } + } + + private ChartItem parseChartItem(JsonNode trackNode, int rank) { + try { + String songName = trackNode.path("name").asText(""); + long songId = trackNode.path("id").asLong(0); + + if (songName.isEmpty() || songId == 0) { + return null; + } + + List artists = new ArrayList<>(); + JsonNode artistsNode = trackNode.path("artists"); + if (artistsNode.isArray()) { + for (JsonNode artistNode : artistsNode) { + artists.add(artistNode.path("name").asText("")); + } + } + + String album = trackNode.path("album").path("name").asText(""); + String coverUrl = trackNode.path("album").path("picUrl").asText(""); + + return new ChartItem(rank, songId, songName, artists, album, 0, 0, coverUrl, 0); + + } catch (Exception e) { + return null; + } + } + + @Override + protected String buildCommentUrl(long songId, int limit, int offset) { + return "https://music.163.com/api/v1/resource/comments/R_SO_4_" + songId + "?offset=" + offset + "&total=true&limit=" + limit; + } + + @Override + protected List parseCommentResponse(String response) { + List comments = new ArrayList<>(); + if (response == null || response.isEmpty()) { + return comments; + } + try { + JsonNode data = objectMapper.readTree(response); + JsonNode commentArray = data.path("comments"); + if (commentArray.isArray()) { + for (JsonNode commentNode : commentArray) { + Comment comment = parseCommentNode(commentNode); + if (comment != null) { + comments.add(comment); + } + } + } + } catch (Exception e) { + System.out.println("[网易云音乐] 解析评论失败: " + e.getMessage()); + } + return comments; + } + + private Comment parseCommentNode(JsonNode commentNode) { + try { + long commentId = commentNode.path("commentId").asLong(0); + String content = commentNode.path("content").asText(""); + String nickname = commentNode.path("user").path("nickname").asText(""); + long likedCount = commentNode.path("likedCount").asLong(0); + if (content.isEmpty()) { + return null; + } + return new Comment(content, nickname, (int) likedCount, commentId); + } catch (Exception e) { + return null; + } + } + + @Override + public CrawlResult> executeCrawl(String keyword) { + System.out.println("[网易云音乐] 开始搜索: " + keyword); + CrawlResult> result = searchSongs(keyword); + if (result.isSuccess() && result.getData() != null) { + return CrawlResult.success(result.getData(), result.getPlatform()); + } else { + return CrawlResult.failure(result != null ? result.getMessage() : "未知错误", result != null ? result.getPlatform() : Platform.NETEASE); + } + } + + @Override + public String getPlatformName() { + return "网易云音乐"; + } +} diff --git a/project/src/main/java/com/example/spider/book/DangdangBookSpider.java b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java new file mode 100644 index 0000000..db5153d --- /dev/null +++ b/project/src/main/java/com/example/spider/book/DangdangBookSpider.java @@ -0,0 +1,494 @@ +package com.example.spider.book; + +import com.example.core.CrawlResult; +import com.example.core.Platform; +import com.example.model.BookItem; +import com.example.service.impl.EnhancedHttpClient; +import com.example.strategy.EnhancedAntiBlockStrategy; +import com.example.strategy.SpiderStrategy; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import net.sourceforge.pinyin4j.PinyinHelper; +import net.sourceforge.pinyin4j.format.HanyuPinyinCaseType; +import net.sourceforge.pinyin4j.format.HanyuPinyinOutputFormat; +import net.sourceforge.pinyin4j.format.HanyuPinyinToneType; +import net.sourceforge.pinyin4j.format.HanyuPinyinVCharType; +import net.sourceforge.pinyin4j.format.exception.BadHanyuPinyinOutputFormatCombination; + +import java.net.URLEncoder; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.regex.Pattern; + +/** + * 当当图书爬虫 + * 支持搜索图书、获取热门榜单 + */ +public class DangdangBookSpider implements SpiderStrategy { + + private static final String BASE_URL = "https://www.dangdang.com"; + private static final String SEARCH_URL = "https://search.dangdang.com"; + private static final String REFERER = "https://www.dangdang.com/"; + + private final EnhancedHttpClient httpClient; + private final EnhancedAntiBlockStrategy antiBlockStrategy; + + public DangdangBookSpider() { + this.antiBlockStrategy = EnhancedAntiBlockStrategy.createForBook(); + this.httpClient = new EnhancedHttpClient("当当图书", antiBlockStrategy); + this.httpClient.setReferer(REFERER); + this.httpClient.setOrigin(BASE_URL); + } + + private String executeRequest(String url, Map headers) { + if (httpClient != null) { + Map simpleHeaders = new HashMap<>(); + simpleHeaders.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + simpleHeaders.put("Referer", REFERER); + simpleHeaders.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + simpleHeaders.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return httpClient.get(url, simpleHeaders); + } + return null; + } + + private Map getHeaders() { + Map headers = new HashMap<>(); + headers.put("User-Agent", antiBlockStrategy.getRandomUserAgent()); + headers.put("Referer", REFERER); + headers.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); + headers.put("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); + return headers; + } + + /** + * 搜索图书 + * 支持中文、英文、拼音输入 + * 只使用真实数据,不使用备用数据 + */ + public CrawlResult> searchBooks(String keyword) { + try { + // 检测是否为拼音输入(只包含字母且长度大于1,且不是常见英文单词) + if (isPinyin(keyword)) { + System.out.println("[当当图书] 检测到拼音输入: " + keyword); + CrawlResult> pinyinResult = searchByPinyin(keyword); + // 如果拼音搜索失败,回退到直接搜索 + if (!pinyinResult.isSuccess()) { + System.out.println("[当当图书] 拼音搜索失败,尝试直接搜索"); + } else { + return pinyinResult; + } + } + + String encoded = URLEncoder.encode(keyword, StandardCharsets.UTF_8); + String url = SEARCH_URL + "/?key=" + encoded + "&act=input&page_index=1&sort_type=sort_default"; + + System.out.println("[当当图书] 正在搜索: " + keyword); + String response = executeRequest(url, getHeaders()); + + if (response == null || response.isEmpty()) { + System.out.println("[当当图书] 搜索响应为空"); + return CrawlResult.failure("搜索响应为空", Platform.DANGDANG); + } + + List books = parseSearchResponse(response); + + if (books.isEmpty()) { + System.out.println("[当当图书] 搜索结果为空"); + return CrawlResult.failure("搜索结果为空", Platform.DANGDANG); + } + + System.out.println("[当当图书] 搜索到 " + books.size() + " 本图书"); + return CrawlResult.success(books, Platform.DANGDANG); + + } catch (Exception e) { + System.out.println("[当当图书] 搜索异常: " + e.getMessage()); + return CrawlResult.failure("搜索异常: " + e.getMessage(), Platform.DANGDANG); + } + } + + /** + * 检测字符串是否为拼音 + * 规则:只包含字母,长度大于1,且不是常见英文单词 + */ + private boolean isPinyin(String keyword) { + if (keyword == null || keyword.isEmpty() || keyword.length() < 2) { + return false; + } + + // 只包含字母的字符串 + Pattern pattern = Pattern.compile("^[a-zA-Z]+$"); + if (!pattern.matcher(keyword).matches()) { + return false; + } + + String lower = keyword.toLowerCase(); + + // 常见英文单词列表(排除这些词作为拼音) + String[] commonWords = { + "java", "python", "c", "c++", "javascript", "html", "css", "sql", "php", + "android", "ios", "windows", "linux", "mac", "book", "books", "read", + "free", "new", "best", "top", "hot", "sale", "buy", "price", "shop", + "good", "great", "love", "like", "know", "get", "go", "come", "make", + "time", "year", "way", "day", "man", "think", "take", "people", "into", + "just", "good", "over", "such", "some", "could", "would", "than", "then", + "first", "last", "give", "most", "even", "only", "come", "might", "now" + }; + + for (String word : commonWords) { + if (word.equals(lower)) { + return false; + } + } + + // 检查是否符合拼音规则(包含常见拼音韵母) + String[] pinyinPatterns = {"a", "o", "e", "i", "u", "v", "ai", "ei", "ui", "ao", "ou", "iu", "ie", "ue", "er", "an", "en", "in", "un", "vn", "ang", "eng", "ing", "ong"}; + for (String p : pinyinPatterns) { + if (lower.contains(p)) { + return true; + } + } + + // 如果长度较长且只包含字母,也视为拼音 + return keyword.length() >= 3; + } + + /** + * 通过拼音搜索图书 + * 策略:直接在候选图书列表中进行本地拼音匹配(当当网拼音搜索效果不佳) + */ + private CrawlResult> searchByPinyin(String pinyin) { + System.out.println("[当当图书] 通过拼音搜索: " + pinyin); + + // 策略1:先尝试直接搜索拼音(当当网可能支持拼音搜索) + CrawlResult> directResult = searchBooksByKeyword(pinyin); + boolean hasGoodResult = false; + + if (directResult.isSuccess() && !directResult.getData().isEmpty()) { + List books = directResult.getData(); + System.out.println("[当当图书] 直接拼音搜索找到 " + books.size() + " 本图书"); + + // 检查结果中是否有完全匹配的中文书籍(书名主要是中文,不是英文书名加中文前缀) + for (BookItem book : books) { + String title = book.getTitle(); + if (isMainlyChinese(title) && isPinyinMatch(title, pinyin)) { + hasGoodResult = true; + break; + } + } + + if (hasGoodResult) { + return directResult; + } + } + + // 策略2:在候选图书列表中进行本地拼音匹配 + System.out.println("[当当图书] 尝试本地拼音匹配..."); + List allBooks = new ArrayList<>(); + + // 获取多个候选来源(增加更多关键词提高匹配概率) + String[] keywords = {"畅销", "热门", "小说", "文学", "科幻", "经典", "名著", pinyin}; + for (String kw : keywords) { + CrawlResult> result = searchBooksByKeyword(kw); + if (result.isSuccess() && result.getData() != null) { + allBooks.addAll(result.getData()); + } + } + + if (allBooks.isEmpty()) { + System.out.println("[当当图书] 获取候选图书列表失败"); + return CrawlResult.failure("获取候选图书列表失败", Platform.DANGDANG); + } + + // 去重 + List