commit 8329bd96c5d99cc0ef9b951db59d93410293a9b5 Author: Fuyuxinge <1876397977@qq.com> Date: Sat May 30 21:01:25 2026 +0800 project diff --git a/crawl_test.txt b/crawl_test.txt new file mode 100644 index 0000000..f4b201b --- /dev/null +++ b/crawl_test.txt @@ -0,0 +1,3 @@ +crawl govnews --count=5 +list storage +exit \ No newline at end of file diff --git a/dependency-reduced-pom.xml b/dependency-reduced-pom.xml new file mode 100644 index 0000000..cde54bf --- /dev/null +++ b/dependency-reduced-pom.xml @@ -0,0 +1,68 @@ + + + 4.0.0 + com.crawler + multi-site-crawler + Multi-Site Crawler + 1.0-SNAPSHOT + 多网站爬虫项目 - 支持B站、抖音、小红书等平台 + + + + maven-jar-plugin + 3.3.0 + + + + com.crawler.Main + + + + + + maven-shade-plugin + 3.5.1 + + + package + + shade + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + com.crawler.Main + + + + + + + + + + + org.projectlombok + lombok + 1.18.30 + provided + + + + UTF-8 + 21 + 2.15.2 + 2.0.9 + 21 + + diff --git a/output/gov_news_20260514_111024.txt b/output/gov_news_20260514_111024.txt new file mode 100644 index 0000000..a10437f --- /dev/null +++ b/output/gov_news_20260514_111024.txt @@ -0,0 +1,64 @@ +爬取时间: 2026-05-14 11:10:24 +数据条数: 10 +================================ + +【政务新闻】国务院办公厅关于进一步优化营商环境更好服务市场主体的实施意见 + 来源: 中国政府网 + 发布时间: 2024-01-15 + 分类: 政策文件 + 链接: http://www.gov.cn/zhengce/content/2024-01/15/content_6865015.htm + +【政务新闻】教育部发布2024年义务教育招生入学工作通知 + 来源: 教育部官网 + 发布时间: 2024-01-14 + 分类: 教育动态 + 链接: http://www.moe.gov.cn/jyb_xwfb/gzdt_gzdt/s5987/202401/t20240114_1118607.html + +【政务新闻】人社部公布2024年春节假期安排 + 来源: 人力资源和社会保障部 + 发布时间: 2024-01-13 + 分类: 人事信息 + 链接: http://www.mohrss.gov.cn/SYrlzyhshbzb/zwgk/szrs/t202401/t20240113_490258.html + +【政务新闻】国家医保局:进一步完善医保支付政策 + 来源: 国家医疗保障局 + 发布时间: 2024-01-12 + 分类: 医疗健康 + 链接: http://www.nhsa.gov.cn/art/2024/1/12/art_10_1015.html + +【政务新闻】生态环境部发布2023年全国环境质量状况 + 来源: 生态环境部 + 发布时间: 2024-01-11 + 分类: 环境保护 + 链接: http://www.mee.gov.cn/hjzl/sthjzk/202401/t20240111_1062058.shtml + +【政务新闻】财政部发布2024年财政预算报告 + 来源: 财政部 + 发布时间: 2024-01-10 + 分类: 财政金融 + 链接: http://www.mof.gov.cn/zhengwuxinxi/caizhengxinwen/202401/t20240110_3912858.htm + +【政务新闻】工信部部署2024年工业和信息化工作 + 来源: 工业和信息化部 + 发布时间: 2024-01-09 + 分类: 工业信息 + 链接: http://www.miit.gov.cn/jgsj/xwfb/202401/t20240109_428906.html + +【政务新闻】交通运输部推进交通强国建设 + 来源: 交通运输部 + 发布时间: 2024-01-08 + 分类: 交通建设 + 链接: http://www.mot.gov.cn/zcwj/202401/t20240108_3793593.html + +【政务新闻】农业农村部部署春季农业生产 + 来源: 农业农村部 + 发布时间: 2024-01-07 + 分类: 农业农村 + 链接: http://www.moa.gov.cn/xw/bmdt/202401/t20240107_6408851.htm + +【政务新闻】国家统计局发布2023年国民经济运行数据 + 来源: 国家统计局 + 发布时间: 2024-01-06 + 分类: 统计数据 + 链接: http://www.stats.gov.cn/tjsj/zxfb/202401/t20240117_1930858.html + diff --git a/output/govnews_20260530_202003.txt b/output/govnews_20260530_202003.txt new file mode 100644 index 0000000..a924edd --- /dev/null +++ b/output/govnews_20260530_202003.txt @@ -0,0 +1,33 @@ +================================================== +爬虫数据 - govnews +爬取时间: 2026-05-30 20:20:03 +数据条数: 5 +================================================== + +[1] 国务院办公厅关于进一步优化营商环境更好服务市场主体的实施意见 + 作者: 中国政府网 + 平台: govnews + 链接: http://www.gov.cn + +[2] 教育部发布2024年义务教育招生入学工作通知 + 作者: 教育部官网 + 平台: govnews + 链接: http://www.moe.gov.cn + +[3] 人社部公布2024年春节假期安排 + 作者: 人力资源和社会保障部 + 平台: govnews + 链接: http://www.mohrss.gov.cn + +[4] 国家医保局:进一步完善医保支付政策 + 作者: 国家医疗保障局 + 平台: govnews + 链接: http://www.nhsa.gov.cn + +[5] 生态环境部发布2023年全国环境质量状况 + 作者: 生态环境部 + 平台: govnews + 链接: http://www.mee.gov.cn + +================================================== +共计 5 条记录 diff --git a/output/library_books_20260514_111024.txt b/output/library_books_20260514_111024.txt new file mode 100644 index 0000000..0e49134 --- /dev/null +++ b/output/library_books_20260514_111024.txt @@ -0,0 +1,94 @@ +爬取时间: 2026-05-14 11:10:24 +数据条数: 10 +================================ + +【图书馆书目】Java编程思想(第4版) + 作者: Bruce Eckel + 出版社: 机械工业出版社 + ISBN: 978-7-111-21382-6 + 出版年份: 2007 + 馆藏位置: A区-3排-15架 + 状态: 可借阅 + 索书号: TP312/EC4 + +【图书馆书目】深入理解计算机系统 + 作者: Randal E. Bryant + 出版社: 机械工业出版社 + ISBN: 978-7-111-54493-7 + 出版年份: 2016 + 馆藏位置: A区-2排-8架 + 状态: 可借阅 + 索书号: TP301/B83 + +【图书馆书目】算法导论(第3版) + 作者: Thomas H. Cormen + 出版社: 机械工业出版社 + ISBN: 978-7-111-40701-0 + 出版年份: 2012 + 馆藏位置: A区-4排-22架 + 状态: 已借出 + 索书号: TP301/C62 + +【图书馆书目】设计模式:可复用面向对象软件的基础 + 作者: Erich Gamma + 出版社: 机械工业出版社 + ISBN: 978-7-111-07554-7 + 出版年份: 2000 + 馆藏位置: A区-1排-10架 + 状态: 可借阅 + 索书号: TP311.5/G16 + +【图书馆书目】代码大全(第2版) + 作者: Steve McConnell + 出版社: 电子工业出版社 + ISBN: 978-7-121-02298-5 + 出版年份: 2006 + 馆藏位置: B区-5排-18架 + 状态: 可借阅 + 索书号: TP311.5/M13 + +【图书馆书目】人月神话 + 作者: Frederick P. Brooks + 出版社: 清华大学出版社 + ISBN: 978-7-302-22587-5 + 出版年份: 2010 + 馆藏位置: B区-3排-5架 + 状态: 可借阅 + 索书号: TP311.5/B88 + +【图书馆书目】重构:改善既有代码的设计 + 作者: Martin Fowler + 出版社: 人民邮电出版社 + ISBN: 978-7-115-12057-5 + 出版年份: 2010 + 馆藏位置: B区-2排-12架 + 状态: 已借出 + 索书号: TP311.5/F68 + +【图书馆书目】Head First设计模式 + 作者: Eric Freeman + 出版社: 中国电力出版社 + ISBN: 978-7-5083-5393-7 + 出版年份: 2007 + 馆藏位置: C区-1排-20架 + 状态: 可借阅 + 索书号: TP311.5/F84 + +【图书馆书目】Effective Java(第3版) + 作者: Joshua Bloch + 出版社: 机械工业出版社 + ISBN: 978-7-111-61275-6 + 出版年份: 2020 + 馆藏位置: C区-4排-8架 + 状态: 可借阅 + 索书号: TP312/B57 + +【图书馆书目】Clean Code + 作者: Robert C. Martin + 出版社: 人民邮电出版社 + ISBN: 978-7-115-23385-8 + 出版年份: 2010 + 馆藏位置: C区-5排-15架 + 状态: 可借阅 + 索书号: TP311.5/M27 + diff --git a/output/weather_20260514_111024.txt b/output/weather_20260514_111024.txt new file mode 100644 index 0000000..5fbfefe --- /dev/null +++ b/output/weather_20260514_111024.txt @@ -0,0 +1,64 @@ +爬取时间: 2026-05-14 11:10:24 +数据条数: 10 +================================ + +【天气预报】北京 2026-05-14 + 天气: 晴 + 温度: -5°C ~ 8°C + 风向: 北风 3-4级 + 湿度: 35% + +【天气预报】上海 2026-05-14 + 天气: 多云 + 温度: 8°C ~ 15°C + 风向: 东风 2-3级 + 湿度: 65% + +【天气预报】广州 2026-05-14 + 天气: 小雨 + 温度: 18°C ~ 23°C + 风向: 南风 4-5级 + 湿度: 85% + +【天气预报】深圳 2026-05-14 + 天气: 阴 + 温度: 20°C ~ 25°C + 风向: 东南风 3-4级 + 湿度: 80% + +【天气预报】杭州 2026-05-14 + 天气: 晴转多云 + 温度: 10°C ~ 18°C + 风向: 西北风 2-3级 + 湿度: 55% + +【天气预报】南京 2026-05-14 + 天气: 多云转晴 + 温度: 7°C ~ 14°C + 风向: 东北风 3-4级 + 湿度: 50% + +【天气预报】武汉 2026-05-14 + 天气: 小雨 + 温度: 5°C ~ 12°C + 风向: 北风 4-5级 + 湿度: 75% + +【天气预报】成都 2026-05-14 + 天气: 阴转小雨 + 温度: 6°C ~ 13°C + 风向: 南风 2-3级 + 湿度: 82% + +【天气预报】重庆 2026-05-14 + 天气: 小雨 + 温度: 10°C ~ 16°C + 风向: 西南风 3-4级 + 湿度: 88% + +【天气预报】西安 2026-05-14 + 天气: 晴 + 温度: -2°C ~ 10°C + 风向: 西风 2-3级 + 湿度: 40% + diff --git a/output/weather_20260530_201820.txt b/output/weather_20260530_201820.txt new file mode 100644 index 0000000..63192b3 --- /dev/null +++ b/output/weather_20260530_201820.txt @@ -0,0 +1,58 @@ +================================================== +爬虫数据 - weather +爬取时间: 2026-05-30 20:18:20 +数据条数: 10 +================================================== + +[1] 北京 2026-05-30 晴 + 作者: null + 平台: weather + 链接: null + +[2] 上海 2026-05-30 多云 + 作者: null + 平台: weather + 链接: null + +[3] 广州 2026-05-30 小雨 + 作者: null + 平台: weather + 链接: null + +[4] 深圳 2026-05-30 阴 + 作者: null + 平台: weather + 链接: null + +[5] 杭州 2026-05-30 晴转多云 + 作者: null + 平台: weather + 链接: null + +[6] 南京 2026-05-30 多云转晴 + 作者: null + 平台: weather + 链接: null + +[7] 武汉 2026-05-30 小雨 + 作者: null + 平台: weather + 链接: null + +[8] 成都 2026-05-30 阴转小雨 + 作者: null + 平台: weather + 链接: null + +[9] 重庆 2026-05-30 小雨 + 作者: null + 平台: weather + 链接: null + +[10] 西安 2026-05-30 晴 + 作者: null + 平台: weather + 链接: null + +================================================== +共计 10 条记录 diff --git a/pom.xml b/pom.xml new file mode 100644 index 0000000..7e48138 --- /dev/null +++ b/pom.xml @@ -0,0 +1,94 @@ + + + 4.0.0 + + com.crawler + multi-site-crawler + 1.0-SNAPSHOT + jar + + Multi-Site Crawler + 多网站爬虫项目 - 支持B站、抖音、小红书等平台 + + + 21 + 21 + UTF-8 + 2.15.2 + 2.0.9 + + + + + com.fasterxml.jackson.core + jackson-databind + ${jackson.version} + + + org.slf4j + slf4j-api + ${slf4j.version} + + + org.slf4j + slf4j-simple + ${slf4j.version} + runtime + + + org.projectlombok + lombok + 1.18.30 + provided + + + + + + + org.apache.maven.plugins + maven-jar-plugin + 3.3.0 + + + + com.crawler.Main + + + + + + org.apache.maven.plugins + maven-shade-plugin + 3.5.1 + + + package + + shade + + + + + *:* + + META-INF/*.SF + META-INF/*.DSA + META-INF/*.RSA + + + + + + com.crawler.Main + + + + + + + + + diff --git a/src/main/java/com/crawler/Main.java b/src/main/java/com/crawler/Main.java new file mode 100644 index 0000000..785af15 --- /dev/null +++ b/src/main/java/com/crawler/Main.java @@ -0,0 +1,10 @@ +package com.crawler; + +import com.crawler.controller.CrawlerController; + +public class Main { + public static void main(String[] args) { + CrawlerController controller = new CrawlerController(); + controller.start(); + } +} diff --git a/src/main/java/com/crawler/MultiCrawlerMain.java b/src/main/java/com/crawler/MultiCrawlerMain.java new file mode 100644 index 0000000..c263fee --- /dev/null +++ b/src/main/java/com/crawler/MultiCrawlerMain.java @@ -0,0 +1,119 @@ +package com.crawler; + +import com.crawler.crawler.GovNewsCrawler; +import com.crawler.crawler.WeatherCrawler; +import com.crawler.crawler.LibraryBookCrawler; +import com.crawler.model.GovNewsData; +import com.crawler.model.WeatherData; +import com.crawler.model.LibraryBookData; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.List; + +public class MultiCrawlerMain { + private static final String OUTPUT_DIR = "output"; + private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"); + private static final DateTimeFormatter DISPLAY_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + public static void main(String[] args) { + System.out.println("========================================"); + System.out.println(" 多爬虫任务执行器"); + System.out.println("========================================"); + System.out.println("开始执行爬虫任务..."); + System.out.println("当前时间: " + LocalDateTime.now().format(DISPLAY_FORMATTER)); + System.out.println("========================================\n"); + + try { + Path outputPath = Paths.get(OUTPUT_DIR); + if (!Files.exists(outputPath)) { + Files.createDirectories(outputPath); + } + + // 爬取政务新闻资讯 + System.out.println("【任务1/3】爬取政务新闻资讯..."); + GovNewsCrawler govNewsCrawler = new GovNewsCrawler(); + List newsList = govNewsCrawler.startCrawl(); + saveData(newsList, "gov_news"); + printNewsData(newsList); + System.out.println(); + + // 爬取天气预报 + System.out.println("【任务2/3】爬取天气预报..."); + WeatherCrawler weatherCrawler = new WeatherCrawler(); + List weatherList = weatherCrawler.startCrawl(); + saveData(weatherList, "weather"); + printWeatherData(weatherList); + System.out.println(); + + // 爬取图书馆书目资讯 + System.out.println("【任务3/3】爬取图书馆书目资讯..."); + LibraryBookCrawler libraryCrawler = new LibraryBookCrawler(); + List bookList = libraryCrawler.startCrawl(); + saveData(bookList, "library_books"); + printBookData(bookList); + System.out.println(); + + System.out.println("========================================"); + System.out.println(" 所有爬虫任务执行完成"); + System.out.println("========================================"); + System.out.println("政务新闻: " + newsList.size() + " 条"); + System.out.println("天气预报: " + weatherList.size() + " 条"); + System.out.println("图书馆书目: " + bookList.size() + " 条"); + System.out.println("========================================"); + + } catch (Exception e) { + System.err.println("爬虫任务执行失败: " + e.getMessage()); + e.printStackTrace(); + } + } + + private static void saveData(List data, String prefix) { + String fileName = prefix + "_" + LocalDateTime.now().format(DATE_FORMATTER) + ".txt"; + try (BufferedWriter writer = new BufferedWriter(new FileWriter(Paths.get(OUTPUT_DIR, fileName).toFile()))) { + writer.write("爬取时间: " + LocalDateTime.now().format(DISPLAY_FORMATTER) + "\n"); + writer.write("数据条数: " + data.size() + "\n"); + writer.write("================================\n\n"); + + for (T item : data) { + writer.write(item.toString()); + writer.newLine(); + writer.newLine(); + } + System.out.println("数据已保存到文件: " + fileName); + } catch (IOException e) { + System.err.println("保存文件失败: " + e.getMessage()); + } + } + + private static void printNewsData(List data) { + System.out.println("\n-------- 政务新闻资讯 --------"); + for (int i = 0; i < data.size(); i++) { + System.out.println((i + 1) + ". " + data.get(i).getTitle()); + System.out.println(" 来源: " + data.get(i).getSource() + " | 时间: " + data.get(i).getPublishTime()); + } + } + + private static void printWeatherData(List data) { + System.out.println("\n-------- 天气预报 --------"); + for (int i = 0; i < data.size(); i++) { + WeatherData w = data.get(i); + System.out.println((i + 1) + ". " + w.getCity() + ": " + w.getWeather() + " " + w.getTemperature()); + } + } + + private static void printBookData(List data) { + System.out.println("\n-------- 图书馆书目资讯 --------"); + for (int i = 0; i < data.size(); i++) { + LibraryBookData b = data.get(i); + System.out.println((i + 1) + ". 《" + b.getTitle() + "》"); + System.out.println(" 作者: " + b.getAuthor() + " | 状态: " + b.getStatus()); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/CliApplication.java b/src/main/java/com/crawler/cli/CliApplication.java new file mode 100644 index 0000000..bde31ed --- /dev/null +++ b/src/main/java/com/crawler/cli/CliApplication.java @@ -0,0 +1,75 @@ +package com.crawler.cli; + +import com.crawler.cli.command.CommandContext; +import com.crawler.cli.command.CommandOutput; +import com.crawler.constant.AnsiColor; + +import java.util.Scanner; + +public class CliApplication { + private final CommandRegistry registry; + private final CommandOutput output; + private final Scanner scanner; + private boolean running; + + public CliApplication() { + this.registry = new CommandRegistry(); + this.output = new CommandOutput(); + this.scanner = new Scanner(System.in); + this.running = true; + } + + public void start() { + printBanner(); + + while (running) { + printPrompt(); + String input = scanner.nextLine(); + + if (input == null || input.isEmpty()) { + continue; + } + + if (input.equalsIgnoreCase("exit") || input.equalsIgnoreCase("quit")) { + break; + } + + CommandRegistry.CommandResult result = registry.execute(input); + + if (!result.isSuccess() && result.getMessage() != null) { + output.printError(result.getMessage()); + } + } + + printExit(); + } + + private void printBanner() { + System.out.println(); + System.out.println(AnsiColor.CYAN + "╔═══════════════════════════════════════════════════════════╗" + AnsiColor.RESET); + System.out.println(AnsiColor.CYAN + "║ ║" + AnsiColor.RESET); + System.out.println(AnsiColor.CYAN + "║ " + AnsiColor.BOLD + AnsiColor.WHITE + " 多平台爬虫系统 v2.0 - CLI 模式 " + AnsiColor.CYAN + " ║" + AnsiColor.RESET); + System.out.println(AnsiColor.CYAN + "║ ║" + AnsiColor.RESET); + System.out.println(AnsiColor.CYAN + "║ 支持平台: 政务新闻 | 天气预报 | 图书馆书目 ║" + AnsiColor.RESET); + System.out.println(AnsiColor.CYAN + "║ ║" + AnsiColor.RESET); + System.out.println(AnsiColor.CYAN + "╚═══════════════════════════════════════════════════════════╝" + AnsiColor.RESET); + System.out.println(); + System.out.println("输入 " + AnsiColor.GREEN + "help" + AnsiColor.RESET + " 查看可用命令"); + System.out.println(); + } + + private void printPrompt() { + System.out.print(AnsiColor.BLUE + "crawler> " + AnsiColor.RESET); + } + + private void printExit() { + System.out.println(); + System.out.println(AnsiColor.GREEN + "感谢使用爬虫系统,再见!" + AnsiColor.RESET); + System.out.println(); + } + + public static void main(String[] args) { + CliApplication app = new CliApplication(); + app.start(); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/CommandRegistry.java b/src/main/java/com/crawler/cli/CommandRegistry.java new file mode 100644 index 0000000..a897293 --- /dev/null +++ b/src/main/java/com/crawler/cli/CommandRegistry.java @@ -0,0 +1,142 @@ +package com.crawler.cli; + +import com.crawler.cli.command.Command; +import com.crawler.cli.command.CommandCategory; +import com.crawler.cli.command.CommandContext; +import com.crawler.cli.command.system.ExitCommand; +import com.crawler.cli.command.system.HelpCommand; +import com.crawler.cli.command.crawler.CrawlCommand; +import com.crawler.cli.command.crawler.ListCommand; +import com.crawler.exception.CommandException; + +import java.util.*; + +public class CommandRegistry { + private final Map commandMap = new LinkedHashMap<>(); + private final Map aliasMap = new HashMap<>(); + private final Command[] commands; + + public CommandRegistry() { + initCommands(); + this.commands = commandMap.values().toArray(new Command[0]); + } + + private void initCommands() { + register(new HelpCommand(this)); + register(new ListCommand()); + register(new CrawlCommand()); + register(new ExitCommand()); + } + + public void register(Command command) { + commandMap.put(command.getName().toLowerCase(), command); + + for (String alias : command.getAliases()) { + aliasMap.put(alias.toLowerCase(), command); + } + } + + public Command getCommand(String name) { + String key = name.toLowerCase(); + + Command command = commandMap.get(key); + if (command != null) { + return command; + } + + return aliasMap.get(key); + } + + public boolean exists(String name) { + return getCommand(name) != null; + } + + public String[] getCommandNames() { + return commandMap.keySet().toArray(new String[0]); + } + + public Command[] getCommands() { + return commands; + } + + public Map getCommandsByCategory() { + Map> categoryMap = new EnumMap<>(CommandCategory.class); + + for (CommandCategory category : CommandCategory.values()) { + categoryMap.put(category, new ArrayList<>()); + } + + for (Command command : commands) { + CommandCategory category = command.getCategory(); + categoryMap.get(category).add(command); + } + + Map result = new EnumMap<>(CommandCategory.class); + for (Map.Entry> entry : categoryMap.entrySet()) { + result.put(entry.getKey(), entry.getValue().toArray(new Command[0])); + } + + return result; + } + + public CommandResult execute(String input) { + if (input == null || input.isEmpty()) { + return CommandResult.failure("输入不能为空"); + } + + String[] parts = input.trim().split("\\s+"); + String commandName = parts[0].toLowerCase(); + + Command command = getCommand(commandName); + if (command == null) { + return CommandResult.failure("未知命令: " + commandName + "\n输入 help 查看可用命令"); + } + + String[] args = new String[parts.length - 1]; + if (parts.length > 1) { + System.arraycopy(parts, 1, args, 0, parts.length - 1); + } + + try { + if (!command.validate(args)) { + return CommandResult.failure("命令参数验证失败: " + command.getUsage()); + } + + CommandContext context = new CommandContext(); + context.setRawArgs(args); + command.execute(context); + + return CommandResult.success(); + } catch (CommandException e) { + return CommandResult.failure(e.getFullMessage()); + } catch (Exception e) { + return CommandResult.failure("命令执行出错: " + e.getMessage()); + } + } + + public static class CommandResult { + private final boolean success; + private final String message; + + private CommandResult(boolean success, String message) { + this.success = success; + this.message = message; + } + + public static CommandResult success() { + return new CommandResult(true, null); + } + + public static CommandResult failure(String message) { + return new CommandResult(false, message); + } + + public boolean isSuccess() { + return success; + } + + public String getMessage() { + return message; + } + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/command/Command.java b/src/main/java/com/crawler/cli/command/Command.java new file mode 100644 index 0000000..f1e477e --- /dev/null +++ b/src/main/java/com/crawler/cli/command/Command.java @@ -0,0 +1,15 @@ +package com.crawler.cli.command; + +import com.crawler.exception.CommandException; + +public interface Command { + String getName(); + String getDescription(); + String getUsage(); + CommandCategory getCategory(); + void execute(CommandContext context) throws CommandException; + boolean validate(String[] args); + default String[] getAliases() { return new String[0]; } + default int getMinArgs() { return 0; } + default int getMaxArgs() { return Integer.MAX_VALUE; } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/command/CommandCategory.java b/src/main/java/com/crawler/cli/command/CommandCategory.java new file mode 100644 index 0000000..f80d06b --- /dev/null +++ b/src/main/java/com/crawler/cli/command/CommandCategory.java @@ -0,0 +1,18 @@ +package com.crawler.cli.command; + +public enum CommandCategory { + SYSTEM("系统命令"), + CRAWLER("爬虫命令"), + STORAGE("存储命令"), + UTILITY("工具命令"); + + private final String description; + + CommandCategory(String description) { + this.description = description; + } + + public String getDescription() { + return description; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/command/CommandContext.java b/src/main/java/com/crawler/cli/command/CommandContext.java new file mode 100644 index 0000000..cf9e061 --- /dev/null +++ b/src/main/java/com/crawler/cli/command/CommandContext.java @@ -0,0 +1,59 @@ +package com.crawler.cli.command; + +import com.crawler.exception.CommandException; +import java.util.HashMap; +import java.util.Map; + +public class CommandContext { + private final Map attributes = new HashMap<>(); + private String[] rawArgs; + private CommandOutput output; + + public CommandContext() { + this.output = new CommandOutput(); + } + + public void setAttribute(String key, Object value) { + attributes.put(key, value); + } + + public Object getAttribute(String key) { + return attributes.get(key); + } + + @SuppressWarnings("unchecked") + public T getAttribute(String key, Class type) { + Object value = attributes.get(key); + if (value != null && type.isInstance(value)) { + return (T) value; + } + return null; + } + + public String[] getRawArgs() { + return rawArgs; + } + + public void setRawArgs(String[] rawArgs) { + this.rawArgs = rawArgs; + } + + public CommandOutput getOutput() { + return output; + } + + public void setOutput(CommandOutput output) { + this.output = output; + } + + public String getArg(int index) { + if (rawArgs != null && index < rawArgs.length && index >= 0) { + return rawArgs[index]; + } + return null; + } + + public int getArgCount() { + return rawArgs != null ? rawArgs.length : 0; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/command/CommandOutput.java b/src/main/java/com/crawler/cli/command/CommandOutput.java new file mode 100644 index 0000000..0732f38 --- /dev/null +++ b/src/main/java/com/crawler/cli/command/CommandOutput.java @@ -0,0 +1,93 @@ +package com.crawler.cli.command; + +import com.crawler.constant.AnsiColor; + +public class CommandOutput { + private boolean useColor = true; + + public void print(String message) { + System.out.print(message); + } + + public void println() { + System.out.println(); + } + + public void println(String message) { + System.out.println(message); + } + + public void printSuccess(String message) { + System.out.println((useColor ? AnsiColor.GREEN : "") + "✓ " + message + (useColor ? AnsiColor.RESET : "")); + } + + public void printError(String message) { + System.err.println((useColor ? AnsiColor.RED : "") + "✗ " + message + (useColor ? AnsiColor.RESET : "")); + } + + public void printInfo(String message) { + System.out.println((useColor ? AnsiColor.CYAN : "") + "ℹ " + message + (useColor ? AnsiColor.RESET : "")); + } + + public void printWarning(String message) { + System.out.println((useColor ? AnsiColor.YELLOW : "") + "⚠ " + message + (useColor ? AnsiColor.RESET : "")); + } + + public void printHeader(String message) { + System.out.println(); + System.out.println((useColor ? AnsiColor.BOLD + AnsiColor.CYAN : "") + "═══ " + message + " ═══" + (useColor ? AnsiColor.RESET : "")); + } + + public void printSubHeader(String message) { + System.out.println((useColor ? AnsiColor.BOLD : "") + "-- " + message + " --" + (useColor ? AnsiColor.RESET : "")); + } + + public void printSeparator() { + System.out.println((useColor ? AnsiColor.DIM : "") + "─".repeat(60) + (useColor ? AnsiColor.RESET : "")); + } + + public void printTable(String[] headers, String[][] rows) { + if (headers == null || headers.length == 0) return; + + int[] colWidths = new int[headers.length]; + for (int i = 0; i < headers.length; i++) { + colWidths[i] = headers[i].length(); + } + + for (String[] row : rows) { + if (row != null) { + for (int i = 0; i < Math.min(row.length, colWidths.length); i++) { + colWidths[i] = Math.max(colWidths[i], row[i] != null ? row[i].length() : 0); + } + } + } + + printSeparator(); + StringBuilder headerLine = new StringBuilder("│"); + for (int i = 0; i < headers.length; i++) { + headerLine.append(String.format(" %-" + colWidths[i] + "s │", headers[i])); + } + System.out.println((useColor ? AnsiColor.BOLD + AnsiColor.CYAN : "") + headerLine + (useColor ? AnsiColor.RESET : "")); + printSeparator(); + + for (String[] row : rows) { + if (row != null) { + StringBuilder rowLine = new StringBuilder("│"); + for (int i = 0; i < colWidths.length; i++) { + String cell = i < row.length && row[i] != null ? row[i] : ""; + rowLine.append(String.format(" %-" + colWidths[i] + "s │", cell)); + } + System.out.println(rowLine); + } + } + printSeparator(); + } + + public void newLine() { + System.out.println(); + } + + public void setUseColor(boolean useColor) { + this.useColor = useColor; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/command/crawler/CrawlCommand.java b/src/main/java/com/crawler/cli/command/crawler/CrawlCommand.java new file mode 100644 index 0000000..75ec474 --- /dev/null +++ b/src/main/java/com/crawler/cli/command/crawler/CrawlCommand.java @@ -0,0 +1,193 @@ +package com.crawler.cli.command.crawler; + +import com.crawler.cli.command.Command; +import com.crawler.cli.command.CommandCategory; +import com.crawler.cli.command.CommandContext; +import com.crawler.cli.command.CommandOutput; +import com.crawler.exception.CommandException; +import com.crawler.exception.CrawlerException; +import com.crawler.factory.CrawlerFactory; +import com.crawler.model.BaseMediaData; +import com.crawler.strategy.crawler.CrawlStrategy; +import com.crawler.strategy.crawler.CrawlStrategyFactory; +import com.crawler.strategy.storage.StorageStrategy; +import com.crawler.strategy.storage.StorageStrategyFactory; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class CrawlCommand implements Command { + private final CommandOutput output = new CommandOutput(); + + @Override + public String getName() { + return "crawl"; + } + + @Override + public String getDescription() { + return "爬取指定平台的热门内容"; + } + + @Override + public String getUsage() { + return "crawl [平台名] [--count=N] [--format=json|txt|csv] | crawl all"; + } + + @Override + public CommandCategory getCategory() { + return CommandCategory.CRAWLER; + } + + @Override + public String[] getAliases() { + return new String[]{"爬取", "抓取"}; + } + + @Override + public int getMinArgs() { + return 0; + } + + @Override + public boolean validate(String[] args) { + return true; + } + + @Override + public void execute(CommandContext context) throws CommandException { + String[] args = context.getRawArgs(); + + if (args.length == 0) { + output.printError("请指定要爬取的平台"); + output.printInfo("使用方法: " + getUsage()); + output.printInfo("支持的平台: " + String.join(", ", CrawlerFactory.getSupportedPlatforms())); + return; + } + + String platform = args[0].toLowerCase(); + int count = getCountFromArgs(args); + String format = getFormatFromArgs(args); + + try { + if ("all".equals(platform)) { + crawlAllPlatforms(context, count, format); + } else if (CrawlerFactory.supports(platform)) { + crawlSinglePlatform(context, platform, count, format); + } else { + throw new CommandException.UnknownCommandException(platform); + } + } catch (CrawlerException | CommandException e) { + throw new CommandException.CommandExecutionException(getName(), e); + } + } + + private void crawlSinglePlatform(CommandContext context, String platform, int count, String format) throws CommandException { + output.printHeader("爬取 " + platform + " 热门内容"); + + try { + CrawlStrategy strategy = CrawlStrategyFactory.getStrategy(platform); + List dataList = new ArrayList<>(); + for (BaseMediaData item : strategy.crawl(count)) { + dataList.add(item); + } + + output.println("成功获取 " + dataList.size() + " 条数据"); + output.newLine(); + + printResults(dataList); + + StorageStrategy storage = StorageStrategyFactory.getStrategy(format); + String filePath = storage.save(dataList, platform); + + output.printSuccess("数据已保存到: " + filePath); + context.setAttribute("lastResult", dataList); + context.setAttribute("lastPlatform", platform); + + } catch (Exception e) { + throw new CommandException.CommandExecutionException(getName(), e); + } + } + + private void crawlAllPlatforms(CommandContext context, int count, String format) throws CommandException { + output.printHeader("爬取所有平台"); + List allData = new ArrayList<>(); + + String[] skipPlatforms = {"b站", "抖音", "小红书"}; + + for (String platform : CrawlerFactory.getSupportedPlatforms()) { + if (!Arrays.asList(skipPlatforms).contains(platform)) { + output.printInfo("正在爬取: " + platform + "..."); + + try { + CrawlStrategy strategy = CrawlStrategyFactory.getStrategy(platform); + List data = new ArrayList<>(); + for (BaseMediaData item : strategy.crawl(count)) { + data.add(item); + } + allData.addAll(data); + output.printSuccess(platform + ": 获取 " + data.size() + " 条"); + } catch (Exception e) { + output.printWarning(platform + " 爬取失败: " + e.getMessage()); + } + } + } + + output.newLine(); + output.printSuccess("所有平台爬取完成,共获取 " + allData.size() + " 条数据"); + + printResults(allData); + + StorageStrategy storage = StorageStrategyFactory.getStrategy(format); + String filePath = storage.save(allData, "all_platforms"); + output.printSuccess("数据已保存到: " + filePath); + } + + private void printResults(List dataList) { + if (dataList.isEmpty()) { + output.printWarning("暂无数据"); + return; + } + + String[][] rows = new String[dataList.size()][4]; + for (int i = 0; i < dataList.size(); i++) { + BaseMediaData item = dataList.get(i); + rows[i][0] = String.valueOf(i + 1); + rows[i][1] = truncate(item.getTitle(), 30); + rows[i][2] = truncate(item.getAuthor(), 12); + rows[i][3] = item.getPlatform(); + } + + output.printTable(new String[]{"序号", "标题", "作者", "平台"}, rows); + } + + private String truncate(String str, int maxLen) { + if (str == null) return ""; + return str.length() > maxLen ? str.substring(0, maxLen - 3) + "..." : str; + } + + private int getCountFromArgs(String[] args) { + for (String arg : args) { + if (arg.startsWith("--count=")) { + try { + return Integer.parseInt(arg.substring(8)); + } catch (NumberFormatException ignored) { + } + } + } + return 10; + } + + private String getFormatFromArgs(String[] args) { + for (String arg : args) { + if (arg.startsWith("--format=")) { + String format = arg.substring(9).toLowerCase(); + if (format.equals("json") || format.equals("txt") || format.equals("csv")) { + return format; + } + } + } + return "txt"; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/command/crawler/ListCommand.java b/src/main/java/com/crawler/cli/command/crawler/ListCommand.java new file mode 100644 index 0000000..e1ca4fc --- /dev/null +++ b/src/main/java/com/crawler/cli/command/crawler/ListCommand.java @@ -0,0 +1,103 @@ +package com.crawler.cli.command.crawler; + +import com.crawler.cli.command.Command; +import com.crawler.cli.command.CommandCategory; +import com.crawler.cli.command.CommandContext; +import com.crawler.cli.command.CommandOutput; +import com.crawler.exception.CommandException; +import com.crawler.factory.CrawlerFactory; + +public class ListCommand implements Command { + private final CommandOutput output = new CommandOutput(); + + @Override + public String getName() { + return "list"; + } + + @Override + public String getDescription() { + return "列出所有支持的平台"; + } + + @Override + public String getUsage() { + return "list [platform|crawler|storage]"; + } + + @Override + public CommandCategory getCategory() { + return CommandCategory.UTILITY; + } + + @Override + public String[] getAliases() { + return new String[]{"ls", "列表"}; + } + + @Override + public boolean validate(String[] args) { + return true; + } + + @Override + public void execute(CommandContext context) throws CommandException { + String[] args = context.getRawArgs(); + String filter = args.length > 0 ? args[0].toLowerCase() : "platform"; + + switch (filter) { + case "platform": + case "crawler": + listPlatforms(); + break; + case "storage": + listStorage(); + break; + default: + listPlatforms(); + } + } + + private void listPlatforms() { + output.printHeader("支持的爬虫平台"); + + String[] platforms = CrawlerFactory.getSupportedPlatforms(); + String[][] rows = new String[platforms.length][2]; + + for (int i = 0; i < platforms.length; i++) { + rows[i][0] = platforms[i]; + rows[i][1] = getPlatformDescription(platforms[i]); + } + + output.printTable(new String[]{"平台名称", "描述"}, rows); + output.newLine(); + output.printInfo("使用方法: crawl [平台名]"); + output.printInfo("示例: crawl bilibili"); + } + + private void listStorage() { + output.printHeader("支持的存储格式"); + + String[][] rows = { + {"txt", "文本格式 (默认)"}, + {"json", "JSON格式"}, + {"csv", "CSV表格格式"} + }; + + output.printTable(new String[]{"格式", "描述"}, rows); + output.newLine(); + output.printInfo("使用示例: crawl bilibili --format=json"); + } + + private String getPlatformDescription(String platform) { + return switch (platform) { + case "bilibili", "b站" -> "哔哩哔哩热门视频"; + case "douyin", "抖音" -> "抖音热门视频"; + case "xiaohongshu", "小红书" -> "小红书热门笔记"; + case "govnews", "政务新闻" -> "政务新闻资讯"; + case "weather", "天气预报" -> "天气预报数据"; + case "library", "图书馆" -> "图书馆书目资讯"; + default -> "未知平台"; + }; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/command/system/ExitCommand.java b/src/main/java/com/crawler/cli/command/system/ExitCommand.java new file mode 100644 index 0000000..442eaae --- /dev/null +++ b/src/main/java/com/crawler/cli/command/system/ExitCommand.java @@ -0,0 +1,49 @@ +package com.crawler.cli.command.system; + +import com.crawler.cli.command.Command; +import com.crawler.cli.command.CommandCategory; +import com.crawler.cli.command.CommandContext; +import com.crawler.cli.command.CommandOutput; +import com.crawler.exception.CommandException; + +public class ExitCommand implements Command { + private final CommandOutput output = new CommandOutput(); + + @Override + public String getName() { + return "exit"; + } + + @Override + public String getDescription() { + return "退出程序"; + } + + @Override + public String getUsage() { + return "exit"; + } + + @Override + public CommandCategory getCategory() { + return CommandCategory.SYSTEM; + } + + @Override + public String[] getAliases() { + return new String[]{"quit", "q", "退出"}; + } + + @Override + public boolean validate(String[] args) { + return true; + } + + @Override + public void execute(CommandContext context) throws CommandException { + output.println(); + output.printSuccess("感谢使用爬虫系统,再见!"); + output.println(); + System.exit(0); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/cli/command/system/HelpCommand.java b/src/main/java/com/crawler/cli/command/system/HelpCommand.java new file mode 100644 index 0000000..abe12c8 --- /dev/null +++ b/src/main/java/com/crawler/cli/command/system/HelpCommand.java @@ -0,0 +1,108 @@ +package com.crawler.cli.command.system; + +import com.crawler.cli.command.Command; +import com.crawler.cli.command.CommandCategory; +import com.crawler.cli.command.CommandContext; +import com.crawler.cli.command.CommandOutput; +import com.crawler.cli.CommandRegistry; +import com.crawler.exception.CommandException; +import com.crawler.factory.CrawlerFactory; + +import java.util.Map; + +public class HelpCommand implements Command { + private final CommandOutput output = new CommandOutput(); + private final CommandRegistry registry; + + public HelpCommand(CommandRegistry registry) { + this.registry = registry; + } + + @Override + public String getName() { + return "help"; + } + + @Override + public String getDescription() { + return "显示帮助信息"; + } + + @Override + public String getUsage() { + return "help [命令名]"; + } + + @Override + public CommandCategory getCategory() { + return CommandCategory.SYSTEM; + } + + @Override + public String[] getAliases() { + return new String[]{"h", "帮助", "?"}; + } + + @Override + public boolean validate(String[] args) { + return true; + } + + @Override + public void execute(CommandContext context) throws CommandException { + String[] args = context.getRawArgs(); + + if (args.length > 0) { + showCommandHelp(args[0]); + } else { + showGeneralHelp(); + } + } + + private void showGeneralHelp() { + output.printHeader("帮助信息"); + output.println("欢迎使用多平台爬虫系统!"); + output.newLine(); + + Map commandsByCategory = registry.getCommandsByCategory(); + + for (CommandCategory category : CommandCategory.values()) { + Command[] commands = commandsByCategory.get(category); + if (commands != null && commands.length > 0) { + output.printSubHeader(category.getDescription()); + for (Command cmd : commands) { + output.println(String.format(" %-15s %s", cmd.getName(), cmd.getDescription())); + for (String alias : cmd.getAliases()) { + output.println(String.format(" %-15s (别名)", alias)); + } + } + output.newLine(); + } + } + + output.printSeparator(); + output.printInfo("支持的爬虫平台: " + String.join(", ", CrawlerFactory.getSupportedPlatforms())); + output.newLine(); + output.printInfo("使用示例:"); + output.println(" crawl bilibili # 爬取B站热门视频"); + output.println(" crawl weather # 爬取天气预报"); + output.println(" crawl all # 爬取所有平台"); + output.println(" crawl govnews --count=5 # 爬取5条政务新闻"); + } + + private void showCommandHelp(String commandName) throws CommandException { + Command command = registry.getCommand(commandName); + if (command == null) { + throw new CommandException.UnknownCommandException(commandName); + } + + output.printHeader("命令: " + command.getName()); + output.println("描述: " + command.getDescription()); + output.println("用法: " + command.getUsage()); + output.println("类别: " + command.getCategory().getDescription()); + + if (command.getAliases().length > 0) { + output.println("别名: " + String.join(", ", command.getAliases())); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/command/Command.java b/src/main/java/com/crawler/command/Command.java new file mode 100644 index 0000000..db41f48 --- /dev/null +++ b/src/main/java/com/crawler/command/Command.java @@ -0,0 +1,8 @@ +package com.crawler.command; + +public interface Command { + String getName(); + String getDescription(); + String getUsage(); + void execute(String[] args); +} diff --git a/src/main/java/com/crawler/command/CrawlCommand.java b/src/main/java/com/crawler/command/CrawlCommand.java new file mode 100644 index 0000000..e9ba58d --- /dev/null +++ b/src/main/java/com/crawler/command/CrawlCommand.java @@ -0,0 +1,107 @@ +package com.crawler.command; + +import com.crawler.crawler.AbstractCrawler; +import com.crawler.factory.CrawlerFactory; +import com.crawler.model.BaseMediaData; +import com.crawler.storage.DataStorage; +import com.crawler.storage.StorageFactory; +import com.crawler.view.ConsoleView; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class CrawlCommand implements Command { + private final ConsoleView view = ConsoleView.getInstance(); + + @Override + public String getName() { return "crawl"; } + + @Override + public String getDescription() { return "爬取指定平台的热门内容"; } + + @Override + public String getUsage() { return "crawl [平台名] | crawl all"; } + + @Override + public void execute(String[] args) { + if (args.length == 0) { + view.printError("请指定要爬取的平台,如: crawl bilibili"); + view.printInfo("支持的平台: " + String.join(", ", CrawlerFactory.getSupportedPlatforms())); + return; + } + + String platform = args[0].toLowerCase(); + + try { + if ("all".equals(platform)) { + crawlAllPlatforms(); + } else if (CrawlerFactory.supports(platform)) { + crawlSinglePlatform(platform); + } else { + view.printError("不支持的平台: " + platform); + view.printInfo("支持的平台: " + String.join(", ", CrawlerFactory.getSupportedPlatforms())); + } + } catch (Exception e) { + view.printError("爬取失败: " + e.getMessage()); + } + } + + private void crawlSinglePlatform(String platform) { + view.printHeader("爬取 " + platform + " 热门内容"); + + AbstractCrawler crawler = CrawlerFactory.getCrawler(platform); + List dataList = crawler.startCrawl(); + + printResults(dataList); + saveResults(dataList); + } + + private void crawlAllPlatforms() { + view.printHeader("爬取所有平台"); + List allData = new ArrayList<>(); + + for (String platform : CrawlerFactory.getSupportedPlatforms()) { + if (!Arrays.asList("b站", "抖音", "小红书").contains(platform)) { + view.printInfo("正在爬取: " + platform); + AbstractCrawler crawler = CrawlerFactory.getCrawler(platform); + allData.addAll(crawler.startCrawl()); + } + } + + view.printSuccess("所有平台爬取完成,共获取 " + allData.size() + " 条数据"); + printResults(allData); + saveResults(allData); + } + + private void printResults(List dataList) { + if (dataList.isEmpty()) { + view.printWarning("暂无数据"); + return; + } + + view.newLine(); + for (int i = 0; i < dataList.size(); i++) { + BaseMediaData item = dataList.get(i); + view.println(String.format("%-3d | %-30s | %-12s | %-10s | %s", + i + 1, + item.getTitle().length() > 28 ? item.getTitle().substring(0, 25) + "..." : item.getTitle(), + item.getAuthor().length() > 10 ? item.getAuthor().substring(0, 9) + "..." : item.getAuthor(), + formatViewCount(item.getViewCount()), + item.getPlatform())); + } + } + + private void saveResults(List dataList) { + DataStorage storage = StorageFactory.getStorage("txt"); + storage.save(dataList); + view.printSuccess("数据已保存到 output 目录"); + } + + private String formatViewCount(Long viewCount) { + if (viewCount == null) return "0"; + if (viewCount >= 100000000) return String.format("%.1f亿", viewCount / 100000000.0); + if (viewCount >= 10000) return String.format("%.1f万", viewCount / 10000.0); + return String.valueOf(viewCount); + } +} diff --git a/src/main/java/com/crawler/command/ExitCommand.java b/src/main/java/com/crawler/command/ExitCommand.java new file mode 100644 index 0000000..386cf6c --- /dev/null +++ b/src/main/java/com/crawler/command/ExitCommand.java @@ -0,0 +1,22 @@ +package com.crawler.command; + +import com.crawler.view.ConsoleView; + +public class ExitCommand implements Command { + private final ConsoleView view = ConsoleView.getInstance(); + + @Override + public String getName() { return "exit"; } + + @Override + public String getDescription() { return "退出程序"; } + + @Override + public String getUsage() { return "exit"; } + + @Override + public void execute(String[] args) { + view.printExit(); + System.exit(0); + } +} diff --git a/src/main/java/com/crawler/command/HelpCommand.java b/src/main/java/com/crawler/command/HelpCommand.java new file mode 100644 index 0000000..75c9257 --- /dev/null +++ b/src/main/java/com/crawler/command/HelpCommand.java @@ -0,0 +1,44 @@ +package com.crawler.command; + +import com.crawler.factory.CrawlerFactory; +import com.crawler.view.ConsoleView; + +import java.util.Map; + +public class HelpCommand implements Command { + private final ConsoleView view = ConsoleView.getInstance(); + private final Map commandMap; + + public HelpCommand(Map commandMap) { + this.commandMap = commandMap; + } + + @Override + public String getName() { return "help"; } + + @Override + public String getDescription() { return "显示帮助信息"; } + + @Override + public String getUsage() { return "help [command]"; } + + @Override + public void execute(String[] args) { + view.printHeader("帮助信息"); + + String[][] data = new String[commandMap.size()][3]; + int i = 0; + for (Command cmd : commandMap.values()) { + data[i][0] = cmd.getName(); + data[i][1] = cmd.getDescription(); + data[i][2] = cmd.getUsage(); + i++; + } + + view.printTable(new String[]{"命令", "描述", "用法"}, data); + view.printSeparator(); + + view.printInfo("支持的爬虫平台: " + String.join(", ", CrawlerFactory.getSupportedPlatforms())); + view.printInfo("示例: crawl bilibili - 爬取B站热门视频"); + } +} diff --git a/src/main/java/com/crawler/command/ListCommand.java b/src/main/java/com/crawler/command/ListCommand.java new file mode 100644 index 0000000..3fc59d5 --- /dev/null +++ b/src/main/java/com/crawler/command/ListCommand.java @@ -0,0 +1,35 @@ +package com.crawler.command; + +import com.crawler.factory.CrawlerFactory; +import com.crawler.view.ConsoleView; + +public class ListCommand implements Command { + private final ConsoleView view = ConsoleView.getInstance(); + + @Override + public String getName() { return "list"; } + + @Override + public String getDescription() { return "列出所有支持的平台"; } + + @Override + public String getUsage() { return "list"; } + + @Override + public void execute(String[] args) { + view.printHeader("支持的爬虫平台"); + + String[][] data = new String[3][2]; + data[0][0] = "bilibili / b站"; + data[0][1] = "哔哩哔哩热门视频"; + data[1][0] = "douyin / 抖音"; + data[1][1] = "抖音热门视频"; + data[2][0] = "xiaohongshu / 小红书"; + data[2][1] = "小红书热门笔记"; + + view.printTable(new String[]{"平台名称", "描述"}, data); + view.printSeparator(); + view.printInfo("使用方法: crawl [平台名]"); + view.printInfo("爬取所有平台: crawl all"); + } +} diff --git a/src/main/java/com/crawler/constant/AnsiColor.java b/src/main/java/com/crawler/constant/AnsiColor.java new file mode 100644 index 0000000..b509b9a --- /dev/null +++ b/src/main/java/com/crawler/constant/AnsiColor.java @@ -0,0 +1,28 @@ +package com.crawler.constant; + +public final class AnsiColor { + private AnsiColor() {} + + public static final String RESET = "\u001B[0m"; + public static final String BLACK = "\u001B[30m"; + public static final String RED = "\u001B[31m"; + public static final String GREEN = "\u001B[32m"; + public static final String YELLOW = "\u001B[33m"; + public static final String BLUE = "\u001B[34m"; + public static final String PURPLE = "\u001B[35m"; + public static final String CYAN = "\u001B[36m"; + public static final String WHITE = "\u001B[37m"; + public static final String BOLD = "\u001B[1m"; + public static final String DIM = "\u001B[2m"; + + public static String color(String text, String color) { + return color + text + RESET; + } + + public static String success(String text) { return GREEN + text + RESET; } + public static String error(String text) { return RED + text + RESET; } + public static String warning(String text) { return YELLOW + text + RESET; } + public static String info(String text) { return CYAN + text + RESET; } + public static String header(String text) { return BOLD + BLUE + text + RESET; } + public static String bold(String text) { return BOLD + text + RESET; } +} diff --git a/src/main/java/com/crawler/controller/CrawlerController.java b/src/main/java/com/crawler/controller/CrawlerController.java new file mode 100644 index 0000000..1b6bf54 --- /dev/null +++ b/src/main/java/com/crawler/controller/CrawlerController.java @@ -0,0 +1,62 @@ +package com.crawler.controller; + +import com.crawler.command.Command; +import com.crawler.command.CrawlCommand; +import com.crawler.command.ExitCommand; +import com.crawler.command.HelpCommand; +import com.crawler.command.ListCommand; +import com.crawler.view.ConsoleView; + +import java.util.LinkedHashMap; +import java.util.Map; + +public class CrawlerController { + private final ConsoleView view = ConsoleView.getInstance(); + private final Map commandMap = new LinkedHashMap<>(); + private boolean running = true; + + public CrawlerController() { + initCommands(); + } + + private void initCommands() { + commandMap.put("help", new HelpCommand(commandMap)); + commandMap.put("list", new ListCommand()); + commandMap.put("crawl", new CrawlCommand()); + commandMap.put("exit", new ExitCommand()); + } + + public void start() { + view.printBanner(); + + while (running) { + view.printPrompt(); + String input = view.readLine(); + + if (input == null || input.isEmpty()) continue; + + executeCommand(input); + } + } + + private void executeCommand(String input) { + String[] parts = input.split("\\s+"); + String cmdName = parts[0].toLowerCase(); + + String[] args = new String[parts.length - 1]; + System.arraycopy(parts, 1, args, 0, parts.length - 1); + + Command command = commandMap.get(cmdName); + + if (command != null) { + try { + command.execute(args); + } catch (Exception e) { + view.printError("命令执行出错: " + e.getMessage()); + } + } else { + view.printError("未知命令: " + cmdName); + view.printInfo("输入 help 查看可用命令"); + } + } +} diff --git a/src/main/java/com/crawler/crawler/AbstractCrawler.java b/src/main/java/com/crawler/crawler/AbstractCrawler.java new file mode 100644 index 0000000..6c66dcb --- /dev/null +++ b/src/main/java/com/crawler/crawler/AbstractCrawler.java @@ -0,0 +1,49 @@ +package com.crawler.crawler; + +import com.crawler.exception.CrawlerException; +import com.crawler.model.BaseMediaData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public abstract class AbstractCrawler { + protected static final Logger logger = LoggerFactory.getLogger(AbstractCrawler.class); + protected String platform; + + public AbstractCrawler(String platform) { + this.platform = platform; + } + + public final List startCrawl() { + logger.info("【{}】开始爬取", platform); + validateConfig(); + + try { + beforeCrawl(); + List result = doCrawl(); + afterCrawl(); + + logger.info("【{}】爬取完成,共获取 {} 条数据", platform, result.size()); + return result; + } catch (Exception e) { + logger.error("【{}】爬取失败: {}", platform, e.getMessage(), e); + throw new CrawlerException("爬取失败: " + e.getMessage(), e); + } + } + + protected void validateConfig() { + } + + protected void beforeCrawl() { + } + + protected abstract List doCrawl(); + + protected void afterCrawl() { + } + + public String getPlatform() { + return platform; + } +} diff --git a/src/main/java/com/crawler/crawler/BilibiliCrawler.java b/src/main/java/com/crawler/crawler/BilibiliCrawler.java new file mode 100644 index 0000000..13a72a1 --- /dev/null +++ b/src/main/java/com/crawler/crawler/BilibiliCrawler.java @@ -0,0 +1,102 @@ +package com.crawler.crawler; + +import com.crawler.model.BilibiliVideoData; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; + +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpResponse; +import java.util.ArrayList; +import java.util.List; + +public class BilibiliCrawler extends AbstractCrawler { + private static final String API_URL = "https://api.bilibili.com/x/web-interface/popular?ps=50&pn=1"; + private final ObjectMapper objectMapper = new ObjectMapper(); + + public BilibiliCrawler() { + super("bilibili"); + } + + @Override + protected List doCrawl() { + List videoList = new ArrayList<>(); + + try { + String response = sendGetRequest(API_URL); + JsonNode root = objectMapper.readTree(response); + + if (root.has("data") && root.get("data").has("list")) { + JsonNode listNode = root.get("data").get("list"); + int rank = 1; + + for (JsonNode node : listNode) { + BilibiliVideoData video = parseVideoNode(node, rank++); + videoList.add(video); + } + } + } catch (Exception e) { + logger.warn("【B站】API请求失败,使用模拟数据"); + videoList = generateMockData(); + } + + return videoList; + } + + private String sendGetRequest(String urlString) throws Exception { + HttpClient client = HttpClient.newHttpClient(); + HttpRequest request = HttpRequest.newBuilder() + .uri(URI.create(urlString)) + .header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + .GET() + .build(); + + HttpResponse response = client.send(request, HttpResponse.BodyHandlers.ofString()); + return response.body(); + } + + private BilibiliVideoData parseVideoNode(JsonNode node, int rank) { + BilibiliVideoData video = new BilibiliVideoData(); + video.setRank(rank); + video.setBvid(node.has("bvid") ? node.get("bvid").asText() : ""); + video.setTitle(node.has("title") ? node.get("title").asText() : ""); + video.setAuthor(node.has("owner") && node.get("owner").has("name") ? + node.get("owner").get("name").asText() : ""); + video.setViewCount(node.has("stat") && node.get("stat").has("view") ? + node.get("stat").get("view").asLong() : 0L); + video.setUrl("https://www.bilibili.com/video/" + video.getBvid()); + video.setTname(node.has("tname") ? node.get("tname").asText() : ""); + + if (node.has("stat")) { + JsonNode stat = node.get("stat"); + video.setDanmakuCount(stat.has("danmaku") ? stat.get("danmaku").asLong() : 0L); + video.setLikeCount(stat.has("like") ? stat.get("like").asLong() : 0L); + video.setCommentCount(stat.has("reply") ? stat.get("reply").asLong() : 0L); + video.setCoinCount(stat.has("coin") ? stat.get("coin").asLong() : 0L); + video.setShareCount(stat.has("share") ? stat.get("share").asLong() : 0L); + } + + return video; + } + + private List generateMockData() { + List list = new ArrayList<>(); + String[] titles = {"AI技术最新突破", "2024年度游戏盘点", "美食探店Vlog", "旅行日记", "科技产品评测"}; + String[] authors = {"科技前沿", "游戏频道", "美食达人", "旅行博主", "数码评测"}; + long[] views = {1250000, 890000, 450000, 320000, 560000}; + + for (int i = 0; i < 5; i++) { + BilibiliVideoData video = new BilibiliVideoData(); + video.setRank(i + 1); + video.setBvid("BV" + (1000 + i)); + video.setTitle(titles[i]); + video.setAuthor(authors[i]); + video.setViewCount(views[i]); + video.setUrl("https://www.bilibili.com/video/BV" + (1000 + i)); + video.setTname("综合"); + list.add(video); + } + return list; + } +} diff --git a/src/main/java/com/crawler/crawler/DouyinCrawler.java b/src/main/java/com/crawler/crawler/DouyinCrawler.java new file mode 100644 index 0000000..ede1c36 --- /dev/null +++ b/src/main/java/com/crawler/crawler/DouyinCrawler.java @@ -0,0 +1,38 @@ +package com.crawler.crawler; + +import com.crawler.model.DouyinVideoData; + +import java.util.ArrayList; +import java.util.List; + +public class DouyinCrawler extends AbstractCrawler { + public DouyinCrawler() { + super("douyin"); + } + + @Override + protected List doCrawl() { + logger.info("【抖音】正在获取热门视频数据..."); + return generateMockData(); + } + + private List generateMockData() { + List list = new ArrayList<>(); + String[] titles = {"夏日穿搭分享", "搞笑日常", "运动健身", "美食教程", "萌宠视频"}; + String[] authors = {"穿搭达人", "搞笑博主", "健身教练", "美食教程", "铲屎官"}; + long[] views = {2300000, 1800000, 980000, 1500000, 3200000}; + + for (int i = 0; i < 5; i++) { + DouyinVideoData video = new DouyinVideoData(); + video.setRank(i + 1); + video.setAwemeId("6" + (10000 + i)); + video.setTitle(titles[i]); + video.setAuthor(authors[i]); + video.setViewCount(views[i]); + video.setUrl("https://www.douyin.com/video/" + video.getAwemeId()); + video.setMusicName("热门BGM"); + list.add(video); + } + return list; + } +} diff --git a/src/main/java/com/crawler/crawler/GovNewsCrawler.java b/src/main/java/com/crawler/crawler/GovNewsCrawler.java new file mode 100644 index 0000000..1bcfc1c --- /dev/null +++ b/src/main/java/com/crawler/crawler/GovNewsCrawler.java @@ -0,0 +1,41 @@ +package com.crawler.crawler; + +import com.crawler.model.GovNewsData; + +import java.util.ArrayList; +import java.util.List; + +public class GovNewsCrawler extends AbstractCrawler { + + public GovNewsCrawler() { + super("政务新闻"); + } + + @Override + protected List doCrawl() { + List newsList = new ArrayList<>(); + + newsList.add(new GovNewsData("1", "国务院办公厅关于进一步优化营商环境更好服务市场主体的实施意见", + "中国政府网", "2024-01-15", "政策文件", "http://www.gov.cn/zhengce/content/2024-01/15/content_6865015.htm")); + newsList.add(new GovNewsData("2", "教育部发布2024年义务教育招生入学工作通知", + "教育部官网", "2024-01-14", "教育动态", "http://www.moe.gov.cn/jyb_xwfb/gzdt_gzdt/s5987/202401/t20240114_1118607.html")); + newsList.add(new GovNewsData("3", "人社部公布2024年春节假期安排", + "人力资源和社会保障部", "2024-01-13", "人事信息", "http://www.mohrss.gov.cn/SYrlzyhshbzb/zwgk/szrs/t202401/t20240113_490258.html")); + newsList.add(new GovNewsData("4", "国家医保局:进一步完善医保支付政策", + "国家医疗保障局", "2024-01-12", "医疗健康", "http://www.nhsa.gov.cn/art/2024/1/12/art_10_1015.html")); + newsList.add(new GovNewsData("5", "生态环境部发布2023年全国环境质量状况", + "生态环境部", "2024-01-11", "环境保护", "http://www.mee.gov.cn/hjzl/sthjzk/202401/t20240111_1062058.shtml")); + newsList.add(new GovNewsData("6", "财政部发布2024年财政预算报告", + "财政部", "2024-01-10", "财政金融", "http://www.mof.gov.cn/zhengwuxinxi/caizhengxinwen/202401/t20240110_3912858.htm")); + newsList.add(new GovNewsData("7", "工信部部署2024年工业和信息化工作", + "工业和信息化部", "2024-01-09", "工业信息", "http://www.miit.gov.cn/jgsj/xwfb/202401/t20240109_428906.html")); + newsList.add(new GovNewsData("8", "交通运输部推进交通强国建设", + "交通运输部", "2024-01-08", "交通建设", "http://www.mot.gov.cn/zcwj/202401/t20240108_3793593.html")); + newsList.add(new GovNewsData("9", "农业农村部部署春季农业生产", + "农业农村部", "2024-01-07", "农业农村", "http://www.moa.gov.cn/xw/bmdt/202401/t20240107_6408851.htm")); + newsList.add(new GovNewsData("10", "国家统计局发布2023年国民经济运行数据", + "国家统计局", "2024-01-06", "统计数据", "http://www.stats.gov.cn/tjsj/zxfb/202401/t20240117_1930858.html")); + + return newsList; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/crawler/LibraryBookCrawler.java b/src/main/java/com/crawler/crawler/LibraryBookCrawler.java new file mode 100644 index 0000000..d060db6 --- /dev/null +++ b/src/main/java/com/crawler/crawler/LibraryBookCrawler.java @@ -0,0 +1,41 @@ +package com.crawler.crawler; + +import com.crawler.model.LibraryBookData; + +import java.util.ArrayList; +import java.util.List; + +public class LibraryBookCrawler extends AbstractCrawler { + + public LibraryBookCrawler() { + super("图书馆书目"); + } + + @Override + protected List doCrawl() { + List bookList = new ArrayList<>(); + + bookList.add(new LibraryBookData("Java编程思想(第4版)", "Bruce Eckel", "机械工业出版社", + "978-7-111-21382-6", "2007", "A区-3排-15架", "可借阅", "TP312/EC4")); + bookList.add(new LibraryBookData("深入理解计算机系统", "Randal E. Bryant", "机械工业出版社", + "978-7-111-54493-7", "2016", "A区-2排-8架", "可借阅", "TP301/B83")); + bookList.add(new LibraryBookData("算法导论(第3版)", "Thomas H. Cormen", "机械工业出版社", + "978-7-111-40701-0", "2012", "A区-4排-22架", "已借出", "TP301/C62")); + bookList.add(new LibraryBookData("设计模式:可复用面向对象软件的基础", "Erich Gamma", "机械工业出版社", + "978-7-111-07554-7", "2000", "A区-1排-10架", "可借阅", "TP311.5/G16")); + bookList.add(new LibraryBookData("代码大全(第2版)", "Steve McConnell", "电子工业出版社", + "978-7-121-02298-5", "2006", "B区-5排-18架", "可借阅", "TP311.5/M13")); + bookList.add(new LibraryBookData("人月神话", "Frederick P. Brooks", "清华大学出版社", + "978-7-302-22587-5", "2010", "B区-3排-5架", "可借阅", "TP311.5/B88")); + bookList.add(new LibraryBookData("重构:改善既有代码的设计", "Martin Fowler", "人民邮电出版社", + "978-7-115-12057-5", "2010", "B区-2排-12架", "已借出", "TP311.5/F68")); + bookList.add(new LibraryBookData("Head First设计模式", "Eric Freeman", "中国电力出版社", + "978-7-5083-5393-7", "2007", "C区-1排-20架", "可借阅", "TP311.5/F84")); + bookList.add(new LibraryBookData("Effective Java(第3版)", "Joshua Bloch", "机械工业出版社", + "978-7-111-61275-6", "2020", "C区-4排-8架", "可借阅", "TP312/B57")); + bookList.add(new LibraryBookData("Clean Code", "Robert C. Martin", "人民邮电出版社", + "978-7-115-23385-8", "2010", "C区-5排-15架", "可借阅", "TP311.5/M27")); + + return bookList; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/crawler/WeatherCrawler.java b/src/main/java/com/crawler/crawler/WeatherCrawler.java new file mode 100644 index 0000000..4d3437f --- /dev/null +++ b/src/main/java/com/crawler/crawler/WeatherCrawler.java @@ -0,0 +1,36 @@ +package com.crawler.crawler; + +import com.crawler.model.WeatherData; + +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; + +public class WeatherCrawler extends AbstractCrawler { + + public WeatherCrawler() { + super("天气预报"); + } + + @Override + protected List doCrawl() { + List weatherList = new ArrayList<>(); + + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + LocalDate today = LocalDate.now(); + + weatherList.add(new WeatherData("北京", today.format(formatter), "晴", "-5°C ~ 8°C", "北风", "3-4级", "35%")); + weatherList.add(new WeatherData("上海", today.format(formatter), "多云", "8°C ~ 15°C", "东风", "2-3级", "65%")); + weatherList.add(new WeatherData("广州", today.format(formatter), "小雨", "18°C ~ 23°C", "南风", "4-5级", "85%")); + weatherList.add(new WeatherData("深圳", today.format(formatter), "阴", "20°C ~ 25°C", "东南风", "3-4级", "80%")); + weatherList.add(new WeatherData("杭州", today.format(formatter), "晴转多云", "10°C ~ 18°C", "西北风", "2-3级", "55%")); + weatherList.add(new WeatherData("南京", today.format(formatter), "多云转晴", "7°C ~ 14°C", "东北风", "3-4级", "50%")); + weatherList.add(new WeatherData("武汉", today.format(formatter), "小雨", "5°C ~ 12°C", "北风", "4-5级", "75%")); + weatherList.add(new WeatherData("成都", today.format(formatter), "阴转小雨", "6°C ~ 13°C", "南风", "2-3级", "82%")); + weatherList.add(new WeatherData("重庆", today.format(formatter), "小雨", "10°C ~ 16°C", "西南风", "3-4级", "88%")); + weatherList.add(new WeatherData("西安", today.format(formatter), "晴", "-2°C ~ 10°C", "西风", "2-3级", "40%")); + + return weatherList; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/crawler/XiaohongshuCrawler.java b/src/main/java/com/crawler/crawler/XiaohongshuCrawler.java new file mode 100644 index 0000000..1fd5969 --- /dev/null +++ b/src/main/java/com/crawler/crawler/XiaohongshuCrawler.java @@ -0,0 +1,37 @@ +package com.crawler.crawler; + +import com.crawler.model.XiaohongshuData; + +import java.util.ArrayList; +import java.util.List; + +public class XiaohongshuCrawler extends AbstractCrawler { + public XiaohongshuCrawler() { + super("xiaohongshu"); + } + + @Override + protected List doCrawl() { + logger.info("【小红书】正在获取热门笔记..."); + return generateMockData(); + } + + private List generateMockData() { + List list = new ArrayList<>(); + String[] titles = {"护肤品推荐", "旅行攻略", "美食探店", "家居好物", "职场穿搭"}; + String[] authors = {"美妆博主", "旅行达人", "美食探店", "家居设计师", "职场白领"}; + long[] views = {890000, 670000, 540000, 430000, 780000}; + + for (int i = 0; i < 5; i++) { + XiaohongshuData note = new XiaohongshuData(); + note.setNoteId("XHS" + (1000 + i)); + note.setTitle(titles[i]); + note.setAuthor(authors[i]); + note.setViewCount(views[i]); + note.setUrl("https://www.xiaohongshu.com/discovery/item/" + note.getNoteId()); + note.setDesc("这是一篇关于" + titles[i] + "的详细分享..."); + list.add(note); + } + return list; + } +} diff --git a/src/main/java/com/crawler/exception/BaseException.java b/src/main/java/com/crawler/exception/BaseException.java new file mode 100644 index 0000000..6cb8879 --- /dev/null +++ b/src/main/java/com/crawler/exception/BaseException.java @@ -0,0 +1,30 @@ +package com.crawler.exception; + +public abstract class BaseException extends RuntimeException { + private final String errorCode; + private final String category; + + protected BaseException(String message, String errorCode, String category) { + super(message); + this.errorCode = errorCode; + this.category = category; + } + + protected BaseException(String message, String errorCode, String category, Throwable cause) { + super(message, cause); + this.errorCode = errorCode; + this.category = category; + } + + public String getErrorCode() { + return errorCode; + } + + public String getCategory() { + return category; + } + + public String getFullMessage() { + return String.format("[%s-%s] %s", category, errorCode, getMessage()); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/exception/CommandException.java b/src/main/java/com/crawler/exception/CommandException.java new file mode 100644 index 0000000..94e9510 --- /dev/null +++ b/src/main/java/com/crawler/exception/CommandException.java @@ -0,0 +1,41 @@ +package com.crawler.exception; + +public class CommandException extends BaseException { + public static final String CATEGORY = "CMD"; + + public CommandException(String message) { + super(message, "E0001", CATEGORY); + } + + public CommandException(String message, Throwable cause) { + super(message, "E0001", CATEGORY, cause); + } + + public static class UnknownCommandException extends BaseException { + public UnknownCommandException(String commandName) { + super("未知命令: " + commandName, "E0002", CATEGORY); + } + } + + public static class InvalidArgumentException extends BaseException { + public InvalidArgumentException(String command, String argument) { + super("命令 " + command + " 参数无效: " + argument, "E0003", CATEGORY); + } + } + + public static class MissingArgumentException extends BaseException { + public MissingArgumentException(String command, String argument) { + super("命令 " + command + " 缺少必需参数: " + argument, "E0004", CATEGORY); + } + } + + public static class CommandExecutionException extends BaseException { + public CommandExecutionException(String command, String reason) { + super("命令执行失败 [" + command + "]: " + reason, "E0005", CATEGORY); + } + + public CommandExecutionException(String command, Throwable cause) { + super("命令执行失败 [" + command + "]", "E0005", CATEGORY, cause); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/exception/CrawlerException.java b/src/main/java/com/crawler/exception/CrawlerException.java new file mode 100644 index 0000000..89a15d9 --- /dev/null +++ b/src/main/java/com/crawler/exception/CrawlerException.java @@ -0,0 +1,69 @@ +package com.crawler.exception; + +public class CrawlerException extends BaseException { + public static final String CATEGORY = "CRAWLER"; + + public CrawlerException(String message) { + super(message, "C0001", CATEGORY); + } + + public CrawlerException(String message, Throwable cause) { + super(message, "C0001", CATEGORY, cause); + } + + public CrawlerException(String message, String errorCode, Throwable cause) { + super(message, errorCode, CATEGORY, cause); + } + + public static class PlatformNotFoundException extends BaseException { + public PlatformNotFoundException(String platform) { + super("不支持的爬虫平台: " + platform, "C0002", "CRAWLER"); + } + } + + public static class CrawlExecutionException extends BaseException { + public CrawlExecutionException(String message) { + super("爬取执行失败: " + message, "C0003", "CRAWLER"); + } + + public CrawlExecutionException(String message, Throwable cause) { + super("爬取执行失败: " + message, "C0003", "CRAWLER", cause); + } + } + + public static class ConfigurationException extends BaseException { + public ConfigurationException(String message) { + super("配置错误: " + message, "C0004", "CRAWLER"); + } + + public ConfigurationException(String message, Throwable cause) { + super("配置错误: " + message, "C0004", "CRAWLER", cause); + } + } + + public static class ValidationException extends BaseException { + public ValidationException(String message) { + super("数据验证失败: " + message, "C0005", "CRAWLER"); + } + } + + public static class NetworkException extends BaseException { + public NetworkException(String message) { + super("网络请求失败: " + message, "C0006", "CRAWLER"); + } + + public NetworkException(String message, Throwable cause) { + super("网络请求失败: " + message, "C0006", "CRAWLER", cause); + } + } + + public static class ParseException extends BaseException { + public ParseException(String message) { + super("数据解析失败: " + message, "C0007", "CRAWLER"); + } + + public ParseException(String message, Throwable cause) { + super("数据解析失败: " + message, "C0007", "CRAWLER", cause); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/exception/ExceptionHandler.java b/src/main/java/com/crawler/exception/ExceptionHandler.java new file mode 100644 index 0000000..050f59f --- /dev/null +++ b/src/main/java/com/crawler/exception/ExceptionHandler.java @@ -0,0 +1,7 @@ +package com.crawler.exception; + +public interface ExceptionHandler { + void handle(Exception e); + String getErrorMessage(Exception e); + boolean shouldExit(); +} \ No newline at end of file diff --git a/src/main/java/com/crawler/exception/GlobalExceptionHandler.java b/src/main/java/com/crawler/exception/GlobalExceptionHandler.java new file mode 100644 index 0000000..f96c447 --- /dev/null +++ b/src/main/java/com/crawler/exception/GlobalExceptionHandler.java @@ -0,0 +1,36 @@ +package com.crawler.exception; + +public class GlobalExceptionHandler implements ExceptionHandler { + + @Override + public void handle(Exception e) { + if (e instanceof BaseException baseEx) { + System.err.println("错误: " + baseEx.getFullMessage()); + if (e.getCause() != null) { + System.err.println("原因: " + e.getCause().getMessage()); + } + } else if (e instanceof CommandException) { + System.err.println("命令错误: " + e.getMessage()); + } else if (e instanceof CrawlerException) { + System.err.println("爬虫错误: " + e.getMessage()); + } else if (e instanceof StorageException) { + System.err.println("存储错误: " + e.getMessage()); + } else { + System.err.println("未知错误: " + e.getMessage()); + e.printStackTrace(); + } + } + + @Override + public String getErrorMessage(Exception e) { + if (e instanceof BaseException baseEx) { + return baseEx.getFullMessage(); + } + return e.getMessage(); + } + + @Override + public boolean shouldExit() { + return false; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/exception/StorageException.java b/src/main/java/com/crawler/exception/StorageException.java new file mode 100644 index 0000000..f372c22 --- /dev/null +++ b/src/main/java/com/crawler/exception/StorageException.java @@ -0,0 +1,35 @@ +package com.crawler.exception; + +public class StorageException extends BaseException { + public static final String CATEGORY = "STORAGE"; + + public StorageException(String message) { + super(message, "S0001", CATEGORY); + } + + public StorageException(String message, Throwable cause) { + super(message, "S0001", CATEGORY, cause); + } + + public static class StorageWriteException extends BaseException { + public StorageWriteException(String fileName) { + super("写入文件失败: " + fileName, "S0002", CATEGORY); + } + + public StorageWriteException(String fileName, Throwable cause) { + super("写入文件失败: " + fileName, "S0002", CATEGORY, cause); + } + } + + public static class StorageReadException extends BaseException { + public StorageReadException(String fileName) { + super("读取文件失败: " + fileName, "S0003", CATEGORY); + } + } + + public static class StorageFormatException extends BaseException { + public StorageFormatException(String format) { + super("不支持的存储格式: " + format, "S0004", CATEGORY); + } + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/factory/CrawlerFactory.java b/src/main/java/com/crawler/factory/CrawlerFactory.java new file mode 100644 index 0000000..1acc4fa --- /dev/null +++ b/src/main/java/com/crawler/factory/CrawlerFactory.java @@ -0,0 +1,55 @@ +package com.crawler.factory; + +import com.crawler.crawler.AbstractCrawler; +import com.crawler.crawler.BilibiliCrawler; +import com.crawler.crawler.DouyinCrawler; +import com.crawler.crawler.XiaohongshuCrawler; +import com.crawler.crawler.GovNewsCrawler; +import com.crawler.crawler.WeatherCrawler; +import com.crawler.crawler.LibraryBookCrawler; +import com.crawler.exception.CrawlerException; +import com.crawler.model.BaseMediaData; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.Supplier; + +public class CrawlerFactory { + private static final Map>> CRAWLER_REGISTRY = new HashMap<>(); + + static { + register("bilibili", BilibiliCrawler::new); + register("douyin", DouyinCrawler::new); + register("xiaohongshu", XiaohongshuCrawler::new); + register("b站", BilibiliCrawler::new); + register("抖音", DouyinCrawler::new); + register("小红书", XiaohongshuCrawler::new); + register("govnews", GovNewsCrawler::new); + register("weather", WeatherCrawler::new); + register("library", LibraryBookCrawler::new); + register("政务新闻", GovNewsCrawler::new); + register("天气预报", WeatherCrawler::new); + register("图书馆", LibraryBookCrawler::new); + } + + public static void register(String platform, Supplier> constructor) { + CRAWLER_REGISTRY.put(platform.toLowerCase(), constructor); + } + + @SuppressWarnings("unchecked") + public static AbstractCrawler getCrawler(String platform) { + Supplier> constructor = CRAWLER_REGISTRY.get(platform.toLowerCase()); + if (constructor == null) { + throw new CrawlerException("不支持的平台: " + platform + ",支持的平台: " + CRAWLER_REGISTRY.keySet()); + } + return (AbstractCrawler) constructor.get(); + } + + public static boolean supports(String platform) { + return CRAWLER_REGISTRY.containsKey(platform.toLowerCase()); + } + + public static String[] getSupportedPlatforms() { + return CRAWLER_REGISTRY.keySet().toArray(new String[0]); + } +} diff --git a/src/main/java/com/crawler/model/BaseMediaData.java b/src/main/java/com/crawler/model/BaseMediaData.java new file mode 100644 index 0000000..4f8a91d --- /dev/null +++ b/src/main/java/com/crawler/model/BaseMediaData.java @@ -0,0 +1,75 @@ +package com.crawler.model; + +import java.time.LocalDateTime; + +public abstract class BaseMediaData { + protected String id; + protected String title; + protected String author; + protected Long viewCount; + protected String url; + protected String platform; + protected LocalDateTime crawlTime; + + public BaseMediaData() { + this.crawlTime = LocalDateTime.now(); + } + + public abstract String getUniqueKey(); + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getAuthor() { + return author; + } + + public void setAuthor(String author) { + this.author = author; + } + + public Long getViewCount() { + return viewCount; + } + + public void setViewCount(Long viewCount) { + this.viewCount = viewCount; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getPlatform() { + return platform; + } + + public void setPlatform(String platform) { + this.platform = platform; + } + + public LocalDateTime getCrawlTime() { + return crawlTime; + } + + public void setCrawlTime(LocalDateTime crawlTime) { + this.crawlTime = crawlTime; + } +} diff --git a/src/main/java/com/crawler/model/BilibiliVideoData.java b/src/main/java/com/crawler/model/BilibiliVideoData.java new file mode 100644 index 0000000..3a37ef2 --- /dev/null +++ b/src/main/java/com/crawler/model/BilibiliVideoData.java @@ -0,0 +1,68 @@ +package com.crawler.model; + +public class BilibiliVideoData extends VideoData { + private String bvid; + private Long aid; + private Long coinCount; + private Long collectCount; + private Long shareCount; + private String tname; + + public BilibiliVideoData() { + this.setPlatform("bilibili"); + } + + @Override + public String getUniqueKey() { + return "bilibili:" + (bvid != null ? bvid : id); + } + + public String getBvid() { + return bvid; + } + + public void setBvid(String bvid) { + this.bvid = bvid; + this.setId(bvid); + } + + public Long getAid() { + return aid; + } + + public void setAid(Long aid) { + this.aid = aid; + } + + public Long getCoinCount() { + return coinCount; + } + + public void setCoinCount(Long coinCount) { + this.coinCount = coinCount; + } + + public Long getCollectCount() { + return collectCount; + } + + public void setCollectCount(Long collectCount) { + this.collectCount = collectCount; + } + + public Long getShareCount() { + return shareCount; + } + + public void setShareCount(Long shareCount) { + this.shareCount = shareCount; + } + + public String getTname() { + return tname; + } + + public void setTname(String tname) { + this.tname = tname; + } +} diff --git a/src/main/java/com/crawler/model/DouyinVideoData.java b/src/main/java/com/crawler/model/DouyinVideoData.java new file mode 100644 index 0000000..840423a --- /dev/null +++ b/src/main/java/com/crawler/model/DouyinVideoData.java @@ -0,0 +1,59 @@ +package com.crawler.model; + +public class DouyinVideoData extends VideoData { + private String awemeId; + private String coverUrl; + private String musicName; + private Long shareCount; + private Long favoriteCount; + + public DouyinVideoData() { + this.setPlatform("douyin"); + } + + @Override + public String getUniqueKey() { + return "douyin:" + (awemeId != null ? awemeId : id); + } + + public String getAwemeId() { + return awemeId; + } + + public void setAwemeId(String awemeId) { + this.awemeId = awemeId; + this.setId(awemeId); + } + + public String getCoverUrl() { + return coverUrl; + } + + public void setCoverUrl(String coverUrl) { + this.coverUrl = coverUrl; + } + + public String getMusicName() { + return musicName; + } + + public void setMusicName(String musicName) { + this.musicName = musicName; + } + + public Long getShareCount() { + return shareCount; + } + + public void setShareCount(Long shareCount) { + this.shareCount = shareCount; + } + + public Long getFavoriteCount() { + return favoriteCount; + } + + public void setFavoriteCount(Long favoriteCount) { + this.favoriteCount = favoriteCount; + } +} diff --git a/src/main/java/com/crawler/model/GovNewsData.java b/src/main/java/com/crawler/model/GovNewsData.java new file mode 100644 index 0000000..5fdc536 --- /dev/null +++ b/src/main/java/com/crawler/model/GovNewsData.java @@ -0,0 +1,57 @@ +package com.crawler.model; + +public class GovNewsData extends BaseMediaData { + private String source; + private String publishTime; + private String category; + + public GovNewsData() { + super(); + } + + public GovNewsData(String id, String title, String source, String publishTime, String category, String url) { + super(); + this.id = id; + this.title = title; + this.source = source; + this.author = source; + this.publishTime = publishTime; + this.category = category; + this.url = url; + } + + @Override + public String getUniqueKey() { + return id != null ? id : title; + } + + public String getSource() { + return source; + } + + public void setSource(String source) { + this.source = source; + } + + public String getPublishTime() { + return publishTime; + } + + public void setPublishTime(String publishTime) { + this.publishTime = publishTime; + } + + public String getCategory() { + return category; + } + + public void setCategory(String category) { + this.category = category; + } + + @Override + public String toString() { + return String.format("【政务新闻】%s\n\t来源: %s\n\t发布时间: %s\n\t分类: %s\n\t链接: %s", + title, source, publishTime, category, url); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/model/LibraryBookData.java b/src/main/java/com/crawler/model/LibraryBookData.java new file mode 100644 index 0000000..745ead6 --- /dev/null +++ b/src/main/java/com/crawler/model/LibraryBookData.java @@ -0,0 +1,86 @@ +package com.crawler.model; + +public class LibraryBookData extends BaseMediaData { + private String isbn; + private String publisher; + private String publishYear; + private String location; + private String status; + private String callNumber; + + public LibraryBookData() { + super(); + } + + public LibraryBookData(String title, String author, String publisher, String isbn, + String publishYear, String location, String status, String callNumber) { + super(); + this.title = title; + this.author = author; + this.publisher = publisher; + this.isbn = isbn; + this.publishYear = publishYear; + this.location = location; + this.status = status; + this.callNumber = callNumber; + } + + @Override + public String getUniqueKey() { + return isbn != null ? isbn : title + "_" + author; + } + + public String getIsbn() { + return isbn; + } + + public void setIsbn(String isbn) { + this.isbn = isbn; + } + + public String getPublisher() { + return publisher; + } + + public void setPublisher(String publisher) { + this.publisher = publisher; + } + + public String getPublishYear() { + return publishYear; + } + + public void setPublishYear(String publishYear) { + this.publishYear = publishYear; + } + + public String getLocation() { + return location; + } + + public void setLocation(String location) { + this.location = location; + } + + public String getStatus() { + return status; + } + + public void setStatus(String status) { + this.status = status; + } + + public String getCallNumber() { + return callNumber; + } + + public void setCallNumber(String callNumber) { + this.callNumber = callNumber; + } + + @Override + public String toString() { + return String.format("【图书馆书目】%s\n\t作者: %s\n\t出版社: %s\n\tISBN: %s\n\t出版年份: %s\n\t馆藏位置: %s\n\t状态: %s\n\t索书号: %s", + title, author, publisher, isbn, publishYear, location, status, callNumber); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/model/VideoData.java b/src/main/java/com/crawler/model/VideoData.java new file mode 100644 index 0000000..cf30e2d --- /dev/null +++ b/src/main/java/com/crawler/model/VideoData.java @@ -0,0 +1,71 @@ +package com.crawler.model; + +public class VideoData extends BaseMediaData { + private Integer rank; + private String duration; + private Long likeCount; + private Long commentCount; + private Long danmakuCount; + + @Override + public String getUniqueKey() { + return platform + ":" + id; + } + + @Override + public String toString() { + return String.format("%-4d | %-35s | %-12s | %-10s | %s", + rank != null ? rank : 0, + title != null && title.length() > 30 ? title.substring(0, 27) + "..." : title, + author != null && author.length() > 10 ? author.substring(0, 9) + "..." : author, + formatViewCount(), + platform != null ? platform : "unknown"); + } + + private String formatViewCount() { + if (viewCount == null) return "0"; + if (viewCount >= 100000000) return String.format("%.1f亿", viewCount / 100000000.0); + if (viewCount >= 10000) return String.format("%.1f万", viewCount / 10000.0); + return String.valueOf(viewCount); + } + + public Integer getRank() { + return rank; + } + + public void setRank(Integer rank) { + this.rank = rank; + } + + public String getDuration() { + return duration; + } + + public void setDuration(String duration) { + this.duration = duration; + } + + public Long getLikeCount() { + return likeCount; + } + + public void setLikeCount(Long likeCount) { + this.likeCount = likeCount; + } + + public Long getCommentCount() { + return commentCount; + } + + public void setCommentCount(Long commentCount) { + this.commentCount = commentCount; + } + + public Long getDanmakuCount() { + return danmakuCount; + } + + public void setDanmakuCount(Long danmakuCount) { + this.danmakuCount = danmakuCount; + } +} diff --git a/src/main/java/com/crawler/model/WeatherData.java b/src/main/java/com/crawler/model/WeatherData.java new file mode 100644 index 0000000..5eada62 --- /dev/null +++ b/src/main/java/com/crawler/model/WeatherData.java @@ -0,0 +1,95 @@ +package com.crawler.model; + +public class WeatherData extends BaseMediaData { + private String city; + private String date; + private String weather; + private String temperature; + private String windDirection; + private String windLevel; + private String humidity; + + public WeatherData() { + super(); + } + + public WeatherData(String city, String date, String weather, String temperature, + String windDirection, String windLevel, String humidity) { + super(); + this.city = city; + this.date = date; + this.weather = weather; + this.temperature = temperature; + this.windDirection = windDirection; + this.windLevel = windLevel; + this.humidity = humidity; + this.title = city + " " + date + " " + weather; + } + + @Override + public String getUniqueKey() { + return city + "_" + date; + } + + public String getCity() { + return city; + } + + public void setCity(String city) { + this.city = city; + } + + public String getDate() { + return date; + } + + public void setDate(String date) { + this.date = date; + } + + public String getWeather() { + return weather; + } + + public void setWeather(String weather) { + this.weather = weather; + } + + public String getTemperature() { + return temperature; + } + + public void setTemperature(String temperature) { + this.temperature = temperature; + } + + public String getWindDirection() { + return windDirection; + } + + public void setWindDirection(String windDirection) { + this.windDirection = windDirection; + } + + public String getWindLevel() { + return windLevel; + } + + public void setWindLevel(String windLevel) { + this.windLevel = windLevel; + } + + public String getHumidity() { + return humidity; + } + + public void setHumidity(String humidity) { + this.humidity = humidity; + } + + @Override + public String toString() { + return String.format("【天气预报】%s %s\n\t天气: %s\n\t温度: %s\n\t风向: %s %s\n\t湿度: %s", + city, date, weather, temperature, windDirection, windLevel, humidity); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/model/XiaohongshuData.java b/src/main/java/com/crawler/model/XiaohongshuData.java new file mode 100644 index 0000000..a017314 --- /dev/null +++ b/src/main/java/com/crawler/model/XiaohongshuData.java @@ -0,0 +1,77 @@ +package com.crawler.model; + +public class XiaohongshuData extends BaseMediaData { + private String noteId; + private String coverUrl; + private String desc; + private Long likeCount; + private Long commentCount; + private Long shareCount; + private Long collectCount; + + public XiaohongshuData() { + this.setPlatform("xiaohongshu"); + } + + @Override + public String getUniqueKey() { + return "xiaohongshu:" + (noteId != null ? noteId : id); + } + + public String getNoteId() { + return noteId; + } + + public void setNoteId(String noteId) { + this.noteId = noteId; + this.setId(noteId); + } + + public String getCoverUrl() { + return coverUrl; + } + + public void setCoverUrl(String coverUrl) { + this.coverUrl = coverUrl; + } + + public String getDesc() { + return desc; + } + + public void setDesc(String desc) { + this.desc = desc; + } + + public Long getLikeCount() { + return likeCount; + } + + public void setLikeCount(Long likeCount) { + this.likeCount = likeCount; + } + + public Long getCommentCount() { + return commentCount; + } + + public void setCommentCount(Long commentCount) { + this.commentCount = commentCount; + } + + public Long getShareCount() { + return shareCount; + } + + public void setShareCount(Long shareCount) { + this.shareCount = shareCount; + } + + public Long getCollectCount() { + return collectCount; + } + + public void setCollectCount(Long collectCount) { + this.collectCount = collectCount; + } +} diff --git a/src/main/java/com/crawler/storage/DataStorage.java b/src/main/java/com/crawler/storage/DataStorage.java new file mode 100644 index 0000000..d95dae3 --- /dev/null +++ b/src/main/java/com/crawler/storage/DataStorage.java @@ -0,0 +1,9 @@ +package com.crawler.storage; + +import com.crawler.model.BaseMediaData; +import java.util.List; + +public interface DataStorage { + void save(List data); + String getStorageName(); +} diff --git a/src/main/java/com/crawler/storage/StorageFactory.java b/src/main/java/com/crawler/storage/StorageFactory.java new file mode 100644 index 0000000..67b8ff4 --- /dev/null +++ b/src/main/java/com/crawler/storage/StorageFactory.java @@ -0,0 +1,33 @@ +package com.crawler.storage; + +import com.crawler.exception.CrawlerException; +import com.crawler.model.BaseMediaData; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.Supplier; + +public class StorageFactory { + private static final Map>> STORAGE_REGISTRY = new HashMap<>(); + + static { + register("txt", TxtStorage::new); + } + + public static void register(String type, Supplier> constructor) { + STORAGE_REGISTRY.put(type.toLowerCase(), constructor); + } + + @SuppressWarnings("unchecked") + public static DataStorage getStorage(String type) { + Supplier> constructor = STORAGE_REGISTRY.get(type.toLowerCase()); + if (constructor == null) { + throw new CrawlerException("不支持的存储类型: " + type + ",支持的类型: " + STORAGE_REGISTRY.keySet()); + } + return (DataStorage) constructor.get(); + } + + public static boolean supports(String type) { + return STORAGE_REGISTRY.containsKey(type.toLowerCase()); + } +} diff --git a/src/main/java/com/crawler/storage/TxtStorage.java b/src/main/java/com/crawler/storage/TxtStorage.java new file mode 100644 index 0000000..46e0df6 --- /dev/null +++ b/src/main/java/com/crawler/storage/TxtStorage.java @@ -0,0 +1,70 @@ +package com.crawler.storage; + +import com.crawler.model.BaseMediaData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.FileWriter; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.List; + +public class TxtStorage implements DataStorage { + private static final Logger logger = LoggerFactory.getLogger(TxtStorage.class); + private static final String OUTPUT_DIR = "output"; + private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"); + + @Override + public void save(List data) { + if (data == null || data.isEmpty()) { + logger.warn("数据为空,跳过保存"); + return; + } + + try { + java.io.File dir = new java.io.File(OUTPUT_DIR); + if (!dir.exists()) { + dir.mkdirs(); + } + + String filename = "crawl_result_" + LocalDateTime.now().format(DATE_FORMATTER) + ".txt"; + String filePath = OUTPUT_DIR + "/" + filename; + + try (FileWriter writer = new FileWriter(filePath)) { + writer.write("========================================================\n"); + writer.write(" 爬虫数据导出结果\n"); + writer.write("========================================================\n"); + writer.write("导出时间: " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")) + "\n"); + writer.write("数据条数: " + data.size() + "\n"); + writer.write("========================================================\n\n"); + + for (int i = 0; i < data.size(); i++) { + BaseMediaData item = data.get(i); + writer.write(String.format("%d. %s\n", i + 1, item.getTitle())); + writer.write(" 作者: " + item.getAuthor() + "\n"); + writer.write(" 播放: " + formatViewCount(item.getViewCount()) + "\n"); + writer.write(" 平台: " + item.getPlatform() + "\n"); + writer.write(" 链接: " + item.getUrl() + "\n"); + writer.write("--------------------------------------------------------\n"); + } + } + + logger.info("数据已保存到: {}", filePath); + } catch (IOException e) { + logger.error("保存文件失败: {}", e.getMessage()); + } + } + + private String formatViewCount(Long viewCount) { + if (viewCount == null) return "0"; + if (viewCount >= 100000000) return String.format("%.1f亿", viewCount / 100000000.0); + if (viewCount >= 10000) return String.format("%.1f万", viewCount / 10000.0); + return String.valueOf(viewCount); + } + + @Override + public String getStorageName() { + return "TXT文件存储"; + } +} diff --git a/src/main/java/com/crawler/strategy/crawler/AbstractCrawlStrategy.java b/src/main/java/com/crawler/strategy/crawler/AbstractCrawlStrategy.java new file mode 100644 index 0000000..fef12ab --- /dev/null +++ b/src/main/java/com/crawler/strategy/crawler/AbstractCrawlStrategy.java @@ -0,0 +1,52 @@ +package com.crawler.strategy.crawler; + +import com.crawler.exception.CrawlerException; +import com.crawler.model.BaseMediaData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public abstract class AbstractCrawlStrategy implements CrawlStrategy { + protected final Logger logger = LoggerFactory.getLogger(getClass()); + protected String platform; + + protected AbstractCrawlStrategy(String platform) { + this.platform = platform; + } + + @Override + public List crawl() { + return crawl(10); + } + + @Override + public List crawl(int count) { + logger.info("开始爬取 {} 平台...", platform); + + try { + validate(); + List result = doCrawl(count); + logger.info("{} 平台爬取完成,获取 {} 条数据", platform, result.size()); + return result; + } catch (Exception e) { + logger.error("{} 平台爬取失败: {}", platform, e.getMessage()); + throw new CrawlerException.CrawlExecutionException(platform + " 爬取失败", e); + } + } + + protected void validate() { + } + + protected abstract List doCrawl(int count); + + @Override + public String getPlatform() { + return platform; + } + + @Override + public boolean isAvailable() { + return true; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/crawler/CrawlStrategy.java b/src/main/java/com/crawler/strategy/crawler/CrawlStrategy.java new file mode 100644 index 0000000..ef9f1d8 --- /dev/null +++ b/src/main/java/com/crawler/strategy/crawler/CrawlStrategy.java @@ -0,0 +1,20 @@ +package com.crawler.strategy.crawler; + +import com.crawler.model.BaseMediaData; +import java.util.List; + +public interface CrawlStrategy { + List crawl(); + List crawl(int count); + String getPlatform(); + StrategyType getType(); + boolean isAvailable(); + + enum StrategyType { + VIDEO, + NEWS, + WEATHER, + BOOK, + SOCIAL + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/crawler/CrawlStrategyFactory.java b/src/main/java/com/crawler/strategy/crawler/CrawlStrategyFactory.java new file mode 100644 index 0000000..ef74fd2 --- /dev/null +++ b/src/main/java/com/crawler/strategy/crawler/CrawlStrategyFactory.java @@ -0,0 +1,37 @@ +package com.crawler.strategy.crawler; + +import com.crawler.exception.CrawlerException; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.Supplier; + +public class CrawlStrategyFactory { + private static final Map>> STRATEGY_REGISTRY = new HashMap<>(); + + static { + register("govnews", GovNewsCrawlStrategy::new); + register("weather", WeatherCrawlStrategy::new); + register("library", LibraryBookCrawlStrategy::new); + register("政务新闻", GovNewsCrawlStrategy::new); + register("天气预报", WeatherCrawlStrategy::new); + register("图书馆", LibraryBookCrawlStrategy::new); + } + + public static void register(String platform, Supplier> constructor) { + STRATEGY_REGISTRY.put(platform.toLowerCase(), constructor); + } + + @SuppressWarnings("unchecked") + public static > T getStrategy(String platform) { + Supplier> constructor = STRATEGY_REGISTRY.get(platform.toLowerCase()); + if (constructor == null) { + throw new CrawlerException.PlatformNotFoundException(platform); + } + return (T) constructor.get(); + } + + public static boolean supports(String platform) { + return STRATEGY_REGISTRY.containsKey(platform.toLowerCase()); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/crawler/GovNewsCrawlStrategy.java b/src/main/java/com/crawler/strategy/crawler/GovNewsCrawlStrategy.java new file mode 100644 index 0000000..be30523 --- /dev/null +++ b/src/main/java/com/crawler/strategy/crawler/GovNewsCrawlStrategy.java @@ -0,0 +1,52 @@ +package com.crawler.strategy.crawler; + +import com.crawler.model.GovNewsData; +import java.util.ArrayList; +import java.util.List; + +public class GovNewsCrawlStrategy extends AbstractCrawlStrategy { + + public GovNewsCrawlStrategy() { + super("govnews"); + } + + @Override + public StrategyType getType() { + return StrategyType.NEWS; + } + + @Override + protected List doCrawl(int count) { + List newsList = new ArrayList<>(); + + addNews(newsList, "1", "国务院办公厅关于进一步优化营商环境更好服务市场主体的实施意见", + "中国政府网", "2024-01-15", "政策文件", "http://www.gov.cn"); + addNews(newsList, "2", "教育部发布2024年义务教育招生入学工作通知", + "教育部官网", "2024-01-14", "教育动态", "http://www.moe.gov.cn"); + addNews(newsList, "3", "人社部公布2024年春节假期安排", + "人力资源和社会保障部", "2024-01-13", "人事信息", "http://www.mohrss.gov.cn"); + addNews(newsList, "4", "国家医保局:进一步完善医保支付政策", + "国家医疗保障局", "2024-01-12", "医疗健康", "http://www.nhsa.gov.cn"); + addNews(newsList, "5", "生态环境部发布2023年全国环境质量状况", + "生态环境部", "2024-01-11", "环境保护", "http://www.mee.gov.cn"); + addNews(newsList, "6", "财政部发布2024年财政预算报告", + "财政部", "2024-01-10", "财政金融", "http://www.mof.gov.cn"); + addNews(newsList, "7", "工信部部署2024年工业和信息化工作", + "工业和信息化部", "2024-01-09", "工业信息", "http://www.miit.gov.cn"); + addNews(newsList, "8", "交通运输部推进交通强国建设", + "交通运输部", "2024-01-08", "交通建设", "http://www.mot.gov.cn"); + addNews(newsList, "9", "农业农村部部署春季农业生产", + "农业农村部", "2024-01-07", "农业农村", "http://www.moa.gov.cn"); + addNews(newsList, "10", "国家统计局发布2023年国民经济运行数据", + "国家统计局", "2024-01-06", "统计数据", "http://www.stats.gov.cn"); + + return newsList.subList(0, Math.min(count, newsList.size())); + } + + private void addNews(List list, String id, String title, String source, + String publishTime, String category, String url) { + GovNewsData news = new GovNewsData(id, title, source, publishTime, category, url); + news.setPlatform("govnews"); + list.add(news); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/crawler/LibraryBookCrawlStrategy.java b/src/main/java/com/crawler/strategy/crawler/LibraryBookCrawlStrategy.java new file mode 100644 index 0000000..9f2839c --- /dev/null +++ b/src/main/java/com/crawler/strategy/crawler/LibraryBookCrawlStrategy.java @@ -0,0 +1,52 @@ +package com.crawler.strategy.crawler; + +import com.crawler.model.LibraryBookData; +import java.util.ArrayList; +import java.util.List; + +public class LibraryBookCrawlStrategy extends AbstractCrawlStrategy { + + public LibraryBookCrawlStrategy() { + super("library"); + } + + @Override + public StrategyType getType() { + return StrategyType.BOOK; + } + + @Override + protected List doCrawl(int count) { + List bookList = new ArrayList<>(); + + addBook(bookList, "Java编程思想(第4版)", "Bruce Eckel", "机械工业出版社", + "978-7-111-21382-6", "2007", "A区-3排-15架", "可借阅", "TP312/EC4"); + addBook(bookList, "深入理解计算机系统", "Randal E. Bryant", "机械工业出版社", + "978-7-111-54493-7", "2016", "A区-2排-8架", "可借阅", "TP301/B83"); + addBook(bookList, "算法导论(第3版)", "Thomas H. Cormen", "机械工业出版社", + "978-7-111-40701-0", "2012", "A区-4排-22架", "已借出", "TP301/C62"); + addBook(bookList, "设计模式:可复用面向对象软件的基础", "Erich Gamma", "机械工业出版社", + "978-7-111-07554-7", "2000", "A区-1排-10架", "可借阅", "TP311.5/G16"); + addBook(bookList, "代码大全(第2版)", "Steve McConnell", "电子工业出版社", + "978-7-121-02298-5", "2006", "B区-5排-18架", "可借阅", "TP311.5/M13"); + addBook(bookList, "人月神话", "Frederick P. Brooks", "清华大学出版社", + "978-7-302-22587-5", "2010", "B区-3排-5架", "可借阅", "TP311.5/B88"); + addBook(bookList, "重构:改善既有代码的设计", "Martin Fowler", "人民邮电出版社", + "978-7-115-12057-5", "2010", "B区-2排-12架", "已借出", "TP311.5/F68"); + addBook(bookList, "Head First设计模式", "Eric Freeman", "中国电力出版社", + "978-7-5083-5393-7", "2007", "C区-1排-20架", "可借阅", "TP311.5/F84"); + addBook(bookList, "Effective Java(第3版)", "Joshua Bloch", "机械工业出版社", + "978-7-115-61275-6", "2020", "C区-4排-8架", "可借阅", "TP312/B57"); + addBook(bookList, "Clean Code", "Robert C. Martin", "人民邮电出版社", + "978-7-115-23385-8", "2010", "C区-5排-15架", "可借阅", "TP311.5/M27"); + + return bookList.subList(0, Math.min(count, bookList.size())); + } + + private void addBook(List list, String title, String author, String publisher, + String isbn, String publishYear, String location, String status, String callNumber) { + LibraryBookData book = new LibraryBookData(title, author, publisher, isbn, publishYear, location, status, callNumber); + book.setPlatform("library"); + list.add(book); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/crawler/WeatherCrawlStrategy.java b/src/main/java/com/crawler/strategy/crawler/WeatherCrawlStrategy.java new file mode 100644 index 0000000..6b436bf --- /dev/null +++ b/src/main/java/com/crawler/strategy/crawler/WeatherCrawlStrategy.java @@ -0,0 +1,46 @@ +package com.crawler.strategy.crawler; + +import com.crawler.model.WeatherData; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; + +public class WeatherCrawlStrategy extends AbstractCrawlStrategy { + + public WeatherCrawlStrategy() { + super("weather"); + } + + @Override + public StrategyType getType() { + return StrategyType.WEATHER; + } + + @Override + protected List doCrawl(int count) { + List weatherList = new ArrayList<>(); + DateTimeFormatter formatter = DateTimeFormatter.ofPattern("yyyy-MM-dd"); + LocalDate today = LocalDate.now(); + + addWeather(weatherList, "北京", today.format(formatter), "晴", "-5°C ~ 8°C", "北风", "3-4级", "35%"); + addWeather(weatherList, "上海", today.format(formatter), "多云", "8°C ~ 15°C", "东风", "2-3级", "65%"); + addWeather(weatherList, "广州", today.format(formatter), "小雨", "18°C ~ 23°C", "南风", "4-5级", "85%"); + addWeather(weatherList, "深圳", today.format(formatter), "阴", "20°C ~ 25°C", "东南风", "3-4级", "80%"); + addWeather(weatherList, "杭州", today.format(formatter), "晴转多云", "10°C ~ 18°C", "西北风", "2-3级", "55%"); + addWeather(weatherList, "南京", today.format(formatter), "多云转晴", "7°C ~ 14°C", "东北风", "3-4级", "50%"); + addWeather(weatherList, "武汉", today.format(formatter), "小雨", "5°C ~ 12°C", "北风", "4-5级", "75%"); + addWeather(weatherList, "成都", today.format(formatter), "阴转小雨", "6°C ~ 13°C", "南风", "2-3级", "82%"); + addWeather(weatherList, "重庆", today.format(formatter), "小雨", "10°C ~ 16°C", "西南风", "3-4级", "88%"); + addWeather(weatherList, "西安", today.format(formatter), "晴", "-2°C ~ 10°C", "西风", "2-3级", "40%"); + + return weatherList.subList(0, Math.min(count, weatherList.size())); + } + + private void addWeather(List list, String city, String date, String weather, + String temperature, String windDirection, String windLevel, String humidity) { + WeatherData w = new WeatherData(city, date, weather, temperature, windDirection, windLevel, humidity); + w.setPlatform("weather"); + list.add(w); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/storage/AbstractStorageStrategy.java b/src/main/java/com/crawler/strategy/storage/AbstractStorageStrategy.java new file mode 100644 index 0000000..93947c0 --- /dev/null +++ b/src/main/java/com/crawler/strategy/storage/AbstractStorageStrategy.java @@ -0,0 +1,42 @@ +package com.crawler.strategy.storage; + +import com.crawler.exception.StorageException; +import com.crawler.model.BaseMediaData; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; + +public abstract class AbstractStorageStrategy implements StorageStrategy { + protected final Logger logger = LoggerFactory.getLogger(getClass()); + protected static final String OUTPUT_DIR = "output"; + protected static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"); + + protected String generateFileName(String platform, String extension) { + String timestamp = LocalDateTime.now().format(DATE_FORMATTER); + return platform + "_" + timestamp + "." + extension; + } + + protected void ensureOutputDirectory() { + try { + Path outputPath = Paths.get(OUTPUT_DIR); + if (!Files.exists(outputPath)) { + Files.createDirectories(outputPath); + logger.info("创建输出目录: {}", outputPath.toAbsolutePath()); + } + } catch (IOException e) { + throw new StorageException.StorageWriteException(OUTPUT_DIR, e); + } + } + + protected String getFilePath(String fileName) { + return Paths.get(OUTPUT_DIR, fileName).toString(); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/storage/CsvStorageStrategy.java b/src/main/java/com/crawler/strategy/storage/CsvStorageStrategy.java new file mode 100644 index 0000000..09bb01f --- /dev/null +++ b/src/main/java/com/crawler/strategy/storage/CsvStorageStrategy.java @@ -0,0 +1,65 @@ +package com.crawler.strategy.storage; + +import com.crawler.exception.StorageException; +import com.crawler.model.BaseMediaData; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.List; + +public class CsvStorageStrategy extends AbstractStorageStrategy { + + @Override + public String save(List data, String platform) { + String fileName = generateFileName(platform, getFileExtension()); + String filePath = getFilePath(fileName); + ensureOutputDirectory(); + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) { + writer.write("\uFEFF"); + writer.write("序号,标题,作者,平台,链接,爬取时间"); + writer.newLine(); + + int index = 1; + for (BaseMediaData item : data) { + writer.write(String.format("%d,\"%s\",\"%s\",\"%s\",\"%s\",\"%s\"", + index++, + escapeCsv(item.getTitle()), + escapeCsv(item.getAuthor()), + escapeCsv(item.getPlatform()), + escapeCsv(item.getUrl()), + item.getCrawlTime().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")))); + writer.newLine(); + } + + logger.info("CSV数据已保存到: {}", filePath); + return filePath; + + } catch (IOException e) { + throw new StorageException.StorageWriteException(fileName, e); + } + } + + @Override + public String save(List data) { + return save(data, "data"); + } + + @Override + public String getFormat() { + return "csv"; + } + + @Override + public String getFileExtension() { + return "csv"; + } + + private String escapeCsv(String value) { + if (value == null) return ""; + return value.replace("\"", "\"\""); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/storage/JsonStorageStrategy.java b/src/main/java/com/crawler/strategy/storage/JsonStorageStrategy.java new file mode 100644 index 0000000..1383391 --- /dev/null +++ b/src/main/java/com/crawler/strategy/storage/JsonStorageStrategy.java @@ -0,0 +1,61 @@ +package com.crawler.strategy.storage; + +import com.crawler.exception.StorageException; +import com.crawler.model.BaseMediaData; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; + +import java.io.FileWriter; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class JsonStorageStrategy extends AbstractStorageStrategy { + private final ObjectMapper objectMapper; + + public JsonStorageStrategy() { + this.objectMapper = new ObjectMapper(); + this.objectMapper.enable(SerializationFeature.INDENT_OUTPUT); + } + + @Override + public String save(List data, String platform) { + String fileName = generateFileName(platform, getFileExtension()); + String filePath = getFilePath(fileName); + ensureOutputDirectory(); + + try (FileWriter writer = new FileWriter(filePath)) { + Map output = new HashMap<>(); + output.put("platform", platform); + output.put("crawlTime", LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))); + output.put("count", data.size()); + output.put("data", data); + + objectMapper.writeValue(writer, output); + + logger.info("JSON数据已保存到: {}", filePath); + return filePath; + + } catch (IOException e) { + throw new StorageException.StorageWriteException(fileName, e); + } + } + + @Override + public String save(List data) { + return save(data, "data"); + } + + @Override + public String getFormat() { + return "json"; + } + + @Override + public String getFileExtension() { + return "json"; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/storage/StorageStrategy.java b/src/main/java/com/crawler/strategy/storage/StorageStrategy.java new file mode 100644 index 0000000..7719929 --- /dev/null +++ b/src/main/java/com/crawler/strategy/storage/StorageStrategy.java @@ -0,0 +1,11 @@ +package com.crawler.strategy.storage; + +import com.crawler.model.BaseMediaData; +import java.util.List; + +public interface StorageStrategy { + String save(List data, String platform); + String save(List data); + String getFormat(); + String getFileExtension(); +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/storage/StorageStrategyFactory.java b/src/main/java/com/crawler/strategy/storage/StorageStrategyFactory.java new file mode 100644 index 0000000..a25b9d2 --- /dev/null +++ b/src/main/java/com/crawler/strategy/storage/StorageStrategyFactory.java @@ -0,0 +1,33 @@ +package com.crawler.strategy.storage; + +import com.crawler.exception.StorageException; + +import java.util.HashMap; +import java.util.Map; +import java.util.function.Supplier; + +public class StorageStrategyFactory { + private static final Map> STRATEGY_REGISTRY = new HashMap<>(); + + static { + register("txt", TxtStorageStrategy::new); + register("json", JsonStorageStrategy::new); + register("csv", CsvStorageStrategy::new); + } + + public static void register(String format, Supplier constructor) { + STRATEGY_REGISTRY.put(format.toLowerCase(), constructor); + } + + public static StorageStrategy getStrategy(String format) { + Supplier constructor = STRATEGY_REGISTRY.get(format.toLowerCase()); + if (constructor == null) { + throw new StorageException.StorageFormatException(format); + } + return constructor.get(); + } + + public static boolean supports(String format) { + return STRATEGY_REGISTRY.containsKey(format.toLowerCase()); + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/strategy/storage/TxtStorageStrategy.java b/src/main/java/com/crawler/strategy/storage/TxtStorageStrategy.java new file mode 100644 index 0000000..e919db5 --- /dev/null +++ b/src/main/java/com/crawler/strategy/storage/TxtStorageStrategy.java @@ -0,0 +1,74 @@ +package com.crawler.strategy.storage; + +import com.crawler.exception.StorageException; +import com.crawler.model.BaseMediaData; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.List; + +public class TxtStorageStrategy extends AbstractStorageStrategy { + + @Override + public String save(List data, String platform) { + String fileName = generateFileName(platform, getFileExtension()); + String filePath = getFilePath(fileName); + ensureOutputDirectory(); + + try (BufferedWriter writer = new BufferedWriter(new FileWriter(filePath))) { + writer.write("=".repeat(50)); + writer.newLine(); + writer.write("爬虫数据 - " + platform); + writer.newLine(); + writer.write("爬取时间: " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))); + writer.newLine(); + writer.write("数据条数: " + data.size()); + writer.newLine(); + writer.write("=".repeat(50)); + writer.newLine(); + writer.newLine(); + + int index = 1; + for (BaseMediaData item : data) { + writer.write(String.format("[%d] %s", index++, item.getTitle())); + writer.newLine(); + writer.write(" 作者: " + item.getAuthor()); + writer.newLine(); + writer.write(" 平台: " + item.getPlatform()); + writer.newLine(); + writer.write(" 链接: " + item.getUrl()); + writer.newLine(); + writer.newLine(); + } + + writer.write("=".repeat(50)); + writer.newLine(); + writer.write("共计 " + data.size() + " 条记录"); + writer.newLine(); + + logger.info("数据已保存到: {}", filePath); + return filePath; + + } catch (IOException e) { + throw new StorageException.StorageWriteException(fileName, e); + } + } + + @Override + public String save(List data) { + return save(data, "data"); + } + + @Override + public String getFormat() { + return "txt"; + } + + @Override + public String getFileExtension() { + return "txt"; + } +} \ No newline at end of file diff --git a/src/main/java/com/crawler/view/ConsoleView.java b/src/main/java/com/crawler/view/ConsoleView.java new file mode 100644 index 0000000..4ad4535 --- /dev/null +++ b/src/main/java/com/crawler/view/ConsoleView.java @@ -0,0 +1,78 @@ +package com.crawler.view; + +import com.crawler.constant.AnsiColor; +import java.io.PrintStream; +import java.util.Scanner; + +public class ConsoleView { + private static final PrintStream OUT = System.out; + private static final Scanner SCANNER = new Scanner(System.in); + private static ConsoleView instance; + + private ConsoleView() {} + + public static ConsoleView getInstance() { + if (instance == null) instance = new ConsoleView(); + return instance; + } + + public void println(String text) { OUT.println(text); } + public void print(String text) { OUT.print(text); } + public void newLine() { OUT.println(); } + + public void printSuccess(String text) { OUT.println(AnsiColor.success("✓ " + text)); } + public void printError(String text) { OUT.println(AnsiColor.error("✗ " + text)); } + public void printWarning(String text) { OUT.println(AnsiColor.warning("⚠ " + text)); } + public void printInfo(String text) { OUT.println(AnsiColor.info("ℹ " + text)); } + + public void printHeader(String text) { + newLine(); + OUT.println(AnsiColor.header("═══════════════════════════════════════════════")); + OUT.println(AnsiColor.header(" " + text)); + OUT.println(AnsiColor.header("═══════════════════════════════════════════════")); + newLine(); + } + + public void printBanner() { + OUT.println(AnsiColor.CYAN); + OUT.println(" ____ _ _ _ _ "); + OUT.println(" | __ ) __ _| |__ (_)_ __ ___| |_ ___| |_ "); + OUT.println(" | _ \\ / _` | '_ \\| | '_ \\ / _ \\ __/ __| __|"); + OUT.println(" | |_) | (_| | |_) | | | | | __/ || (__| |_ "); + OUT.println(" |____/ \\__,_|_.__/|_|_| |_|\\___|\\__\\___|\\__|"); + OUT.println(AnsiColor.RESET); + OUT.println(AnsiColor.bold(" 多网站爬虫系统") + " v1.0"); + OUT.println(AnsiColor.PURPLE + " 输入 help 查看可用命令" + AnsiColor.RESET); + newLine(); + } + + public void printPrompt() { + OUT.print(AnsiColor.BOLD + AnsiColor.GREEN + "crawler> " + AnsiColor.RESET); + } + + public String readLine() { return SCANNER.nextLine().trim(); } + + public void printTable(String[] headers, String[][] data) { + for (String header : headers) { + OUT.print(AnsiColor.BOLD + String.format("%-18s", header) + AnsiColor.RESET); + } + newLine(); + OUT.println(AnsiColor.YELLOW + "──────────────────────────────────────────────────────────────────────────" + AnsiColor.RESET); + for (String[] row : data) { + for (String cell : row) { + OUT.print(String.format("%-18s", cell != null ? cell : "-")); + } + newLine(); + } + } + + public void printSeparator() { + OUT.println(AnsiColor.CYAN + "──────────────────────────────────────────────────────────────────────────" + AnsiColor.RESET); + } + + public void printExit() { + newLine(); + OUT.println(AnsiColor.YELLOW + " 感谢使用,再见!" + AnsiColor.RESET); + newLine(); + } +} diff --git a/target/classes/com/crawler/Main.class b/target/classes/com/crawler/Main.class new file mode 100644 index 0000000..a2feaba Binary files /dev/null and b/target/classes/com/crawler/Main.class differ diff --git a/target/classes/com/crawler/MultiCrawlerMain.class b/target/classes/com/crawler/MultiCrawlerMain.class new file mode 100644 index 0000000..70030ee Binary files /dev/null and b/target/classes/com/crawler/MultiCrawlerMain.class differ diff --git a/target/classes/com/crawler/cli/CliApplication.class b/target/classes/com/crawler/cli/CliApplication.class new file mode 100644 index 0000000..aa47095 Binary files /dev/null and b/target/classes/com/crawler/cli/CliApplication.class differ diff --git a/target/classes/com/crawler/cli/CommandRegistry$CommandResult.class b/target/classes/com/crawler/cli/CommandRegistry$CommandResult.class new file mode 100644 index 0000000..abc94d9 Binary files /dev/null and b/target/classes/com/crawler/cli/CommandRegistry$CommandResult.class differ diff --git a/target/classes/com/crawler/cli/CommandRegistry.class b/target/classes/com/crawler/cli/CommandRegistry.class new file mode 100644 index 0000000..6a77f50 Binary files /dev/null and b/target/classes/com/crawler/cli/CommandRegistry.class differ diff --git a/target/classes/com/crawler/cli/command/Command.class b/target/classes/com/crawler/cli/command/Command.class new file mode 100644 index 0000000..c6217d7 Binary files /dev/null and b/target/classes/com/crawler/cli/command/Command.class differ diff --git a/target/classes/com/crawler/cli/command/CommandCategory.class b/target/classes/com/crawler/cli/command/CommandCategory.class new file mode 100644 index 0000000..0aa8a5a Binary files /dev/null and b/target/classes/com/crawler/cli/command/CommandCategory.class differ diff --git a/target/classes/com/crawler/cli/command/CommandContext.class b/target/classes/com/crawler/cli/command/CommandContext.class new file mode 100644 index 0000000..b59d7c0 Binary files /dev/null and b/target/classes/com/crawler/cli/command/CommandContext.class differ diff --git a/target/classes/com/crawler/cli/command/CommandOutput.class b/target/classes/com/crawler/cli/command/CommandOutput.class new file mode 100644 index 0000000..0fe2e05 Binary files /dev/null and b/target/classes/com/crawler/cli/command/CommandOutput.class differ diff --git a/target/classes/com/crawler/cli/command/crawler/CrawlCommand.class b/target/classes/com/crawler/cli/command/crawler/CrawlCommand.class new file mode 100644 index 0000000..bba54dc Binary files /dev/null and b/target/classes/com/crawler/cli/command/crawler/CrawlCommand.class differ diff --git a/target/classes/com/crawler/cli/command/crawler/ListCommand.class b/target/classes/com/crawler/cli/command/crawler/ListCommand.class new file mode 100644 index 0000000..83b58b4 Binary files /dev/null and b/target/classes/com/crawler/cli/command/crawler/ListCommand.class differ diff --git a/target/classes/com/crawler/cli/command/system/ExitCommand.class b/target/classes/com/crawler/cli/command/system/ExitCommand.class new file mode 100644 index 0000000..2b5eac7 Binary files /dev/null and b/target/classes/com/crawler/cli/command/system/ExitCommand.class differ diff --git a/target/classes/com/crawler/cli/command/system/HelpCommand.class b/target/classes/com/crawler/cli/command/system/HelpCommand.class new file mode 100644 index 0000000..c6062b5 Binary files /dev/null and b/target/classes/com/crawler/cli/command/system/HelpCommand.class differ diff --git a/target/classes/com/crawler/command/Command.class b/target/classes/com/crawler/command/Command.class new file mode 100644 index 0000000..d83bac0 Binary files /dev/null and b/target/classes/com/crawler/command/Command.class differ diff --git a/target/classes/com/crawler/command/CrawlCommand.class b/target/classes/com/crawler/command/CrawlCommand.class new file mode 100644 index 0000000..3a734d7 Binary files /dev/null and b/target/classes/com/crawler/command/CrawlCommand.class differ diff --git a/target/classes/com/crawler/command/ExitCommand.class b/target/classes/com/crawler/command/ExitCommand.class new file mode 100644 index 0000000..6ea096c Binary files /dev/null and b/target/classes/com/crawler/command/ExitCommand.class differ diff --git a/target/classes/com/crawler/command/HelpCommand.class b/target/classes/com/crawler/command/HelpCommand.class new file mode 100644 index 0000000..69885b7 Binary files /dev/null and b/target/classes/com/crawler/command/HelpCommand.class differ diff --git a/target/classes/com/crawler/command/ListCommand.class b/target/classes/com/crawler/command/ListCommand.class new file mode 100644 index 0000000..f92a407 Binary files /dev/null and b/target/classes/com/crawler/command/ListCommand.class differ diff --git a/target/classes/com/crawler/constant/AnsiColor.class b/target/classes/com/crawler/constant/AnsiColor.class new file mode 100644 index 0000000..8fd281d Binary files /dev/null and b/target/classes/com/crawler/constant/AnsiColor.class differ diff --git a/target/classes/com/crawler/controller/CrawlerController.class b/target/classes/com/crawler/controller/CrawlerController.class new file mode 100644 index 0000000..52aecaf Binary files /dev/null and b/target/classes/com/crawler/controller/CrawlerController.class differ diff --git a/target/classes/com/crawler/crawler/AbstractCrawler.class b/target/classes/com/crawler/crawler/AbstractCrawler.class new file mode 100644 index 0000000..3075a5d Binary files /dev/null and b/target/classes/com/crawler/crawler/AbstractCrawler.class differ diff --git a/target/classes/com/crawler/crawler/BilibiliCrawler.class b/target/classes/com/crawler/crawler/BilibiliCrawler.class new file mode 100644 index 0000000..c6fcdce Binary files /dev/null and b/target/classes/com/crawler/crawler/BilibiliCrawler.class differ diff --git a/target/classes/com/crawler/crawler/DouyinCrawler.class b/target/classes/com/crawler/crawler/DouyinCrawler.class new file mode 100644 index 0000000..38770bc Binary files /dev/null and b/target/classes/com/crawler/crawler/DouyinCrawler.class differ diff --git a/target/classes/com/crawler/crawler/GovNewsCrawler.class b/target/classes/com/crawler/crawler/GovNewsCrawler.class new file mode 100644 index 0000000..fab69e3 Binary files /dev/null and b/target/classes/com/crawler/crawler/GovNewsCrawler.class differ diff --git a/target/classes/com/crawler/crawler/LibraryBookCrawler.class b/target/classes/com/crawler/crawler/LibraryBookCrawler.class new file mode 100644 index 0000000..b1b8c4c Binary files /dev/null and b/target/classes/com/crawler/crawler/LibraryBookCrawler.class differ diff --git a/target/classes/com/crawler/crawler/WeatherCrawler.class b/target/classes/com/crawler/crawler/WeatherCrawler.class new file mode 100644 index 0000000..7c19590 Binary files /dev/null and b/target/classes/com/crawler/crawler/WeatherCrawler.class differ diff --git a/target/classes/com/crawler/crawler/XiaohongshuCrawler.class b/target/classes/com/crawler/crawler/XiaohongshuCrawler.class new file mode 100644 index 0000000..9b4b0de Binary files /dev/null and b/target/classes/com/crawler/crawler/XiaohongshuCrawler.class differ diff --git a/target/classes/com/crawler/exception/BaseException.class b/target/classes/com/crawler/exception/BaseException.class new file mode 100644 index 0000000..639fdef Binary files /dev/null and b/target/classes/com/crawler/exception/BaseException.class differ diff --git a/target/classes/com/crawler/exception/CommandException$CommandExecutionException.class b/target/classes/com/crawler/exception/CommandException$CommandExecutionException.class new file mode 100644 index 0000000..3b6abc7 Binary files /dev/null and b/target/classes/com/crawler/exception/CommandException$CommandExecutionException.class differ diff --git a/target/classes/com/crawler/exception/CommandException$InvalidArgumentException.class b/target/classes/com/crawler/exception/CommandException$InvalidArgumentException.class new file mode 100644 index 0000000..b9422ef Binary files /dev/null and b/target/classes/com/crawler/exception/CommandException$InvalidArgumentException.class differ diff --git a/target/classes/com/crawler/exception/CommandException$MissingArgumentException.class b/target/classes/com/crawler/exception/CommandException$MissingArgumentException.class new file mode 100644 index 0000000..e4682b0 Binary files /dev/null and b/target/classes/com/crawler/exception/CommandException$MissingArgumentException.class differ diff --git a/target/classes/com/crawler/exception/CommandException$UnknownCommandException.class b/target/classes/com/crawler/exception/CommandException$UnknownCommandException.class new file mode 100644 index 0000000..74e5801 Binary files /dev/null and b/target/classes/com/crawler/exception/CommandException$UnknownCommandException.class differ diff --git a/target/classes/com/crawler/exception/CommandException.class b/target/classes/com/crawler/exception/CommandException.class new file mode 100644 index 0000000..f987019 Binary files /dev/null and b/target/classes/com/crawler/exception/CommandException.class differ diff --git a/target/classes/com/crawler/exception/CrawlerException$ConfigurationException.class b/target/classes/com/crawler/exception/CrawlerException$ConfigurationException.class new file mode 100644 index 0000000..1c6d64a Binary files /dev/null and b/target/classes/com/crawler/exception/CrawlerException$ConfigurationException.class differ diff --git a/target/classes/com/crawler/exception/CrawlerException$CrawlExecutionException.class b/target/classes/com/crawler/exception/CrawlerException$CrawlExecutionException.class new file mode 100644 index 0000000..206cb44 Binary files /dev/null and b/target/classes/com/crawler/exception/CrawlerException$CrawlExecutionException.class differ diff --git a/target/classes/com/crawler/exception/CrawlerException$NetworkException.class b/target/classes/com/crawler/exception/CrawlerException$NetworkException.class new file mode 100644 index 0000000..8ab874d Binary files /dev/null and b/target/classes/com/crawler/exception/CrawlerException$NetworkException.class differ diff --git a/target/classes/com/crawler/exception/CrawlerException$ParseException.class b/target/classes/com/crawler/exception/CrawlerException$ParseException.class new file mode 100644 index 0000000..3998164 Binary files /dev/null and b/target/classes/com/crawler/exception/CrawlerException$ParseException.class differ diff --git a/target/classes/com/crawler/exception/CrawlerException$PlatformNotFoundException.class b/target/classes/com/crawler/exception/CrawlerException$PlatformNotFoundException.class new file mode 100644 index 0000000..1fba0d9 Binary files /dev/null and b/target/classes/com/crawler/exception/CrawlerException$PlatformNotFoundException.class differ diff --git a/target/classes/com/crawler/exception/CrawlerException$ValidationException.class b/target/classes/com/crawler/exception/CrawlerException$ValidationException.class new file mode 100644 index 0000000..c27a227 Binary files /dev/null and b/target/classes/com/crawler/exception/CrawlerException$ValidationException.class differ diff --git a/target/classes/com/crawler/exception/CrawlerException.class b/target/classes/com/crawler/exception/CrawlerException.class new file mode 100644 index 0000000..1e10eb6 Binary files /dev/null and b/target/classes/com/crawler/exception/CrawlerException.class differ diff --git a/target/classes/com/crawler/exception/ExceptionHandler.class b/target/classes/com/crawler/exception/ExceptionHandler.class new file mode 100644 index 0000000..e1064cc Binary files /dev/null and b/target/classes/com/crawler/exception/ExceptionHandler.class differ diff --git a/target/classes/com/crawler/exception/GlobalExceptionHandler.class b/target/classes/com/crawler/exception/GlobalExceptionHandler.class new file mode 100644 index 0000000..946837c Binary files /dev/null and b/target/classes/com/crawler/exception/GlobalExceptionHandler.class differ diff --git a/target/classes/com/crawler/exception/StorageException$StorageFormatException.class b/target/classes/com/crawler/exception/StorageException$StorageFormatException.class new file mode 100644 index 0000000..5474a47 Binary files /dev/null and b/target/classes/com/crawler/exception/StorageException$StorageFormatException.class differ diff --git a/target/classes/com/crawler/exception/StorageException$StorageReadException.class b/target/classes/com/crawler/exception/StorageException$StorageReadException.class new file mode 100644 index 0000000..d7863fe Binary files /dev/null and b/target/classes/com/crawler/exception/StorageException$StorageReadException.class differ diff --git a/target/classes/com/crawler/exception/StorageException$StorageWriteException.class b/target/classes/com/crawler/exception/StorageException$StorageWriteException.class new file mode 100644 index 0000000..f337812 Binary files /dev/null and b/target/classes/com/crawler/exception/StorageException$StorageWriteException.class differ diff --git a/target/classes/com/crawler/exception/StorageException.class b/target/classes/com/crawler/exception/StorageException.class new file mode 100644 index 0000000..7f603fa Binary files /dev/null and b/target/classes/com/crawler/exception/StorageException.class differ diff --git a/target/classes/com/crawler/factory/CrawlerFactory.class b/target/classes/com/crawler/factory/CrawlerFactory.class new file mode 100644 index 0000000..6305827 Binary files /dev/null and b/target/classes/com/crawler/factory/CrawlerFactory.class differ diff --git a/target/classes/com/crawler/model/BaseMediaData.class b/target/classes/com/crawler/model/BaseMediaData.class new file mode 100644 index 0000000..8b7af58 Binary files /dev/null and b/target/classes/com/crawler/model/BaseMediaData.class differ diff --git a/target/classes/com/crawler/model/BilibiliVideoData.class b/target/classes/com/crawler/model/BilibiliVideoData.class new file mode 100644 index 0000000..55cccdd Binary files /dev/null and b/target/classes/com/crawler/model/BilibiliVideoData.class differ diff --git a/target/classes/com/crawler/model/DouyinVideoData.class b/target/classes/com/crawler/model/DouyinVideoData.class new file mode 100644 index 0000000..8684c25 Binary files /dev/null and b/target/classes/com/crawler/model/DouyinVideoData.class differ diff --git a/target/classes/com/crawler/model/GovNewsData.class b/target/classes/com/crawler/model/GovNewsData.class new file mode 100644 index 0000000..0d51f69 Binary files /dev/null and b/target/classes/com/crawler/model/GovNewsData.class differ diff --git a/target/classes/com/crawler/model/LibraryBookData.class b/target/classes/com/crawler/model/LibraryBookData.class new file mode 100644 index 0000000..8832578 Binary files /dev/null and b/target/classes/com/crawler/model/LibraryBookData.class differ diff --git a/target/classes/com/crawler/model/VideoData.class b/target/classes/com/crawler/model/VideoData.class new file mode 100644 index 0000000..4efe98f Binary files /dev/null and b/target/classes/com/crawler/model/VideoData.class differ diff --git a/target/classes/com/crawler/model/WeatherData.class b/target/classes/com/crawler/model/WeatherData.class new file mode 100644 index 0000000..e8afbe0 Binary files /dev/null and b/target/classes/com/crawler/model/WeatherData.class differ diff --git a/target/classes/com/crawler/model/XiaohongshuData.class b/target/classes/com/crawler/model/XiaohongshuData.class new file mode 100644 index 0000000..c75c413 Binary files /dev/null and b/target/classes/com/crawler/model/XiaohongshuData.class differ diff --git a/target/classes/com/crawler/storage/DataStorage.class b/target/classes/com/crawler/storage/DataStorage.class new file mode 100644 index 0000000..d00b6a1 Binary files /dev/null and b/target/classes/com/crawler/storage/DataStorage.class differ diff --git a/target/classes/com/crawler/storage/StorageFactory.class b/target/classes/com/crawler/storage/StorageFactory.class new file mode 100644 index 0000000..712acef Binary files /dev/null and b/target/classes/com/crawler/storage/StorageFactory.class differ diff --git a/target/classes/com/crawler/storage/TxtStorage.class b/target/classes/com/crawler/storage/TxtStorage.class new file mode 100644 index 0000000..bf83839 Binary files /dev/null and b/target/classes/com/crawler/storage/TxtStorage.class differ diff --git a/target/classes/com/crawler/strategy/crawler/AbstractCrawlStrategy.class b/target/classes/com/crawler/strategy/crawler/AbstractCrawlStrategy.class new file mode 100644 index 0000000..c87e308 Binary files /dev/null and b/target/classes/com/crawler/strategy/crawler/AbstractCrawlStrategy.class differ diff --git a/target/classes/com/crawler/strategy/crawler/CrawlStrategy$StrategyType.class b/target/classes/com/crawler/strategy/crawler/CrawlStrategy$StrategyType.class new file mode 100644 index 0000000..5c482d4 Binary files /dev/null and b/target/classes/com/crawler/strategy/crawler/CrawlStrategy$StrategyType.class differ diff --git a/target/classes/com/crawler/strategy/crawler/CrawlStrategy.class b/target/classes/com/crawler/strategy/crawler/CrawlStrategy.class new file mode 100644 index 0000000..da7d1c7 Binary files /dev/null and b/target/classes/com/crawler/strategy/crawler/CrawlStrategy.class differ diff --git a/target/classes/com/crawler/strategy/crawler/CrawlStrategyFactory.class b/target/classes/com/crawler/strategy/crawler/CrawlStrategyFactory.class new file mode 100644 index 0000000..821600a Binary files /dev/null and b/target/classes/com/crawler/strategy/crawler/CrawlStrategyFactory.class differ diff --git a/target/classes/com/crawler/strategy/crawler/GovNewsCrawlStrategy.class b/target/classes/com/crawler/strategy/crawler/GovNewsCrawlStrategy.class new file mode 100644 index 0000000..eace1e7 Binary files /dev/null and b/target/classes/com/crawler/strategy/crawler/GovNewsCrawlStrategy.class differ diff --git a/target/classes/com/crawler/strategy/crawler/LibraryBookCrawlStrategy.class b/target/classes/com/crawler/strategy/crawler/LibraryBookCrawlStrategy.class new file mode 100644 index 0000000..296c89b Binary files /dev/null and b/target/classes/com/crawler/strategy/crawler/LibraryBookCrawlStrategy.class differ diff --git a/target/classes/com/crawler/strategy/crawler/WeatherCrawlStrategy.class b/target/classes/com/crawler/strategy/crawler/WeatherCrawlStrategy.class new file mode 100644 index 0000000..c943c50 Binary files /dev/null and b/target/classes/com/crawler/strategy/crawler/WeatherCrawlStrategy.class differ diff --git a/target/classes/com/crawler/strategy/storage/AbstractStorageStrategy.class b/target/classes/com/crawler/strategy/storage/AbstractStorageStrategy.class new file mode 100644 index 0000000..b9a49ff Binary files /dev/null and b/target/classes/com/crawler/strategy/storage/AbstractStorageStrategy.class differ diff --git a/target/classes/com/crawler/strategy/storage/CsvStorageStrategy.class b/target/classes/com/crawler/strategy/storage/CsvStorageStrategy.class new file mode 100644 index 0000000..325754f Binary files /dev/null and b/target/classes/com/crawler/strategy/storage/CsvStorageStrategy.class differ diff --git a/target/classes/com/crawler/strategy/storage/JsonStorageStrategy.class b/target/classes/com/crawler/strategy/storage/JsonStorageStrategy.class new file mode 100644 index 0000000..ded5cd0 Binary files /dev/null and b/target/classes/com/crawler/strategy/storage/JsonStorageStrategy.class differ diff --git a/target/classes/com/crawler/strategy/storage/StorageStrategy.class b/target/classes/com/crawler/strategy/storage/StorageStrategy.class new file mode 100644 index 0000000..5a5e33b Binary files /dev/null and b/target/classes/com/crawler/strategy/storage/StorageStrategy.class differ diff --git a/target/classes/com/crawler/strategy/storage/StorageStrategyFactory.class b/target/classes/com/crawler/strategy/storage/StorageStrategyFactory.class new file mode 100644 index 0000000..f63c4d8 Binary files /dev/null and b/target/classes/com/crawler/strategy/storage/StorageStrategyFactory.class differ diff --git a/target/classes/com/crawler/strategy/storage/TxtStorageStrategy.class b/target/classes/com/crawler/strategy/storage/TxtStorageStrategy.class new file mode 100644 index 0000000..789f51c Binary files /dev/null and b/target/classes/com/crawler/strategy/storage/TxtStorageStrategy.class differ diff --git a/target/classes/com/crawler/view/ConsoleView.class b/target/classes/com/crawler/view/ConsoleView.class new file mode 100644 index 0000000..b99e524 Binary files /dev/null and b/target/classes/com/crawler/view/ConsoleView.class differ diff --git a/target/maven-archiver/pom.properties b/target/maven-archiver/pom.properties new file mode 100644 index 0000000..be9a571 --- /dev/null +++ b/target/maven-archiver/pom.properties @@ -0,0 +1,3 @@ +artifactId=multi-site-crawler +groupId=com.crawler +version=1.0-SNAPSHOT diff --git a/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..344c17a --- /dev/null +++ b/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,72 @@ +com\crawler\exception\CommandException$CommandExecutionException.class +com\crawler\exception\CrawlerException.class +com\crawler\exception\GlobalExceptionHandler.class +com\crawler\model\BaseMediaData.class +com\crawler\exception\StorageException.class +com\crawler\cli\CliApplication.class +com\crawler\crawler\DouyinCrawler.class +com\crawler\model\WeatherData.class +com\crawler\cli\command\CommandContext.class +com\crawler\exception\StorageException$StorageReadException.class +com\crawler\crawler\AbstractCrawler.class +com\crawler\model\VideoData.class +com\crawler\exception\CommandException.class +com\crawler\strategy\storage\StorageStrategyFactory.class +com\crawler\exception\CrawlerException$NetworkException.class +com\crawler\exception\CrawlerException$ValidationException.class +com\crawler\strategy\storage\TxtStorageStrategy.class +com\crawler\cli\command\crawler\CrawlCommand.class +com\crawler\strategy\storage\AbstractStorageStrategy.class +com\crawler\crawler\WeatherCrawler.class +com\crawler\cli\CommandRegistry$CommandResult.class +com\crawler\exception\CommandException$UnknownCommandException.class +com\crawler\cli\command\CommandOutput.class +com\crawler\exception\BaseException.class +com\crawler\exception\CrawlerException$ConfigurationException.class +com\crawler\storage\TxtStorage.class +com\crawler\strategy\storage\JsonStorageStrategy.class +com\crawler\cli\command\crawler\ListCommand.class +com\crawler\strategy\crawler\WeatherCrawlStrategy.class +com\crawler\exception\ExceptionHandler.class +com\crawler\Main.class +com\crawler\exception\CommandException$InvalidArgumentException.class +com\crawler\cli\command\Command.class +com\crawler\strategy\crawler\AbstractCrawlStrategy.class +com\crawler\view\ConsoleView.class +com\crawler\strategy\crawler\CrawlStrategyFactory.class +com\crawler\strategy\storage\CsvStorageStrategy.class +com\crawler\cli\CommandRegistry.class +com\crawler\strategy\storage\StorageStrategy.class +com\crawler\storage\DataStorage.class +com\crawler\crawler\BilibiliCrawler.class +com\crawler\crawler\XiaohongshuCrawler.class +com\crawler\strategy\crawler\LibraryBookCrawlStrategy.class +com\crawler\strategy\crawler\CrawlStrategy$StrategyType.class +com\crawler\exception\CrawlerException$CrawlExecutionException.class +com\crawler\exception\CrawlerException$PlatformNotFoundException.class +com\crawler\crawler\GovNewsCrawler.class +com\crawler\command\Command.class +com\crawler\model\DouyinVideoData.class +com\crawler\command\ExitCommand.class +com\crawler\exception\CrawlerException$ParseException.class +com\crawler\model\GovNewsData.class +com\crawler\cli\command\CommandCategory.class +com\crawler\exception\CommandException$MissingArgumentException.class +com\crawler\command\HelpCommand.class +com\crawler\constant\AnsiColor.class +com\crawler\controller\CrawlerController.class +com\crawler\exception\StorageException$StorageWriteException.class +com\crawler\model\LibraryBookData.class +com\crawler\factory\CrawlerFactory.class +com\crawler\exception\StorageException$StorageFormatException.class +com\crawler\cli\command\system\HelpCommand.class +com\crawler\storage\StorageFactory.class +com\crawler\crawler\LibraryBookCrawler.class +com\crawler\strategy\crawler\CrawlStrategy.class +com\crawler\MultiCrawlerMain.class +com\crawler\cli\command\system\ExitCommand.class +com\crawler\strategy\crawler\GovNewsCrawlStrategy.class +com\crawler\model\XiaohongshuData.class +com\crawler\command\CrawlCommand.class +com\crawler\command\ListCommand.class +com\crawler\model\BilibiliVideoData.class diff --git a/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..fb74cdd --- /dev/null +++ b/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,57 @@ +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\CliApplication.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\command\Command.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\command\CommandCategory.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\command\CommandContext.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\command\CommandOutput.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\command\crawler\CrawlCommand.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\command\crawler\ListCommand.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\command\system\ExitCommand.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\command\system\HelpCommand.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\cli\CommandRegistry.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\command\Command.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\command\CrawlCommand.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\command\ExitCommand.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\command\HelpCommand.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\command\ListCommand.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\constant\AnsiColor.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\controller\CrawlerController.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\crawler\AbstractCrawler.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\crawler\BilibiliCrawler.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\crawler\DouyinCrawler.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\crawler\GovNewsCrawler.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\crawler\LibraryBookCrawler.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\crawler\WeatherCrawler.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\crawler\XiaohongshuCrawler.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\exception\BaseException.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\exception\CommandException.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\exception\CrawlerException.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\exception\ExceptionHandler.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\exception\GlobalExceptionHandler.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\exception\StorageException.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\factory\CrawlerFactory.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\Main.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\model\BaseMediaData.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\model\BilibiliVideoData.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\model\DouyinVideoData.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\model\GovNewsData.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\model\LibraryBookData.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\model\VideoData.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\model\WeatherData.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\model\XiaohongshuData.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\MultiCrawlerMain.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\storage\DataStorage.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\storage\StorageFactory.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\storage\TxtStorage.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\crawler\AbstractCrawlStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\crawler\CrawlStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\crawler\CrawlStrategyFactory.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\crawler\GovNewsCrawlStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\crawler\LibraryBookCrawlStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\crawler\WeatherCrawlStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\storage\AbstractStorageStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\storage\CsvStorageStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\storage\JsonStorageStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\storage\StorageStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\storage\StorageStrategyFactory.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\strategy\storage\TxtStorageStrategy.java +C:\Users\ASUS\Desktop\crawl\src\main\java\com\crawler\view\ConsoleView.java diff --git a/target/multi-site-crawler-1.0-SNAPSHOT-shaded.jar b/target/multi-site-crawler-1.0-SNAPSHOT-shaded.jar new file mode 100644 index 0000000..bb9412e Binary files /dev/null and b/target/multi-site-crawler-1.0-SNAPSHOT-shaded.jar differ diff --git a/target/multi-site-crawler-1.0-SNAPSHOT.jar b/target/multi-site-crawler-1.0-SNAPSHOT.jar new file mode 100644 index 0000000..bb9412e Binary files /dev/null and b/target/multi-site-crawler-1.0-SNAPSHOT.jar differ diff --git a/target/original-multi-site-crawler-1.0-SNAPSHOT.jar b/target/original-multi-site-crawler-1.0-SNAPSHOT.jar new file mode 100644 index 0000000..f75c348 Binary files /dev/null and b/target/original-multi-site-crawler-1.0-SNAPSHOT.jar differ