Browse Source

提交课程项目及期末实验报告

main
WangJunyue 3 weeks ago
parent
commit
fa9770bf39
  1. BIN
      project/202506050307-汪君玥-期末实验报告.docx
  2. 4
      project/java-cli-期末课程项目/.gitignore
  3. 273
      project/java-cli-期末课程项目/data/csdnexport.json
  4. 185
      project/java-cli-期末课程项目/data/my_export.json
  5. 29
      project/java-cli-期末课程项目/data/sample_test.json
  6. 54
      project/java-cli-期末课程项目/minimal_test.ps1
  7. 67
      project/java-cli-期末课程项目/pom.xml
  8. 56
      project/java-cli-期末课程项目/simple_test.cmd
  9. 117
      project/java-cli-期末课程项目/simple_test.ps1
  10. 60
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/Main.java
  11. 103
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
  12. 8
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/Command.java
  13. 114
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/CrawlCommand.java
  14. 42
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExitCommand.java
  15. 66
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExportCommand.java
  16. 33
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/HelpCommand.java
  17. 71
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ImportCommand.java
  18. 26
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ListCommand.java
  19. 71
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/controller/CrawlerController.java
  20. 10
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/CrawlerException.java
  21. 56
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/DuplicateArticleException.java
  22. 63
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ExportException.java
  23. 56
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ImportException.java
  24. 10
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/NetworkException.java
  25. 10
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ParseException.java
  26. 30
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/UrlFormatException.java
  27. 72
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ValidationException.java
  28. 99
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/model/Article.java
  29. 172
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/ArticleRepository.java
  30. 182
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/PersistenceManager.java
  31. 11
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
  32. 115
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CsdnStrategy.java
  33. 77
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
  34. 83
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java
  35. 35
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
  36. 112
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/YouthStrategy.java
  37. 261
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonExporter.java
  38. 386
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonImporter.java
  39. 81
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonSerializer.java
  40. 39
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/RetryUtils.java
  41. 52
      project/java-cli-期末课程项目/src/main/java/com/example/datacollect/view/ConsoleView.java
  42. 25
      project/java-cli-期末课程项目/src/main/resources/logback.xml
  43. 25
      project/java-cli-期末课程项目/target/classes/logback.xml
  44. 3
      project/java-cli-期末课程项目/target/maven-archiver/pom.properties
  45. 0
      project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
  46. 32
      project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
  47. 92
      project/java-cli-期末课程项目/test_crawler.ps1
  48. 17
      project/java-cli-期末课程项目/test_export.json
  49. 236
      project/java-cli-期末课程项目/test_import_export.ps1
  50. 2
      project/java-cli-期末课程项目/test_input.txt

BIN
project/202506050307-汪君玥-期末实验报告.docx

Binary file not shown.

4
project/java-cli-期末课程项目/.gitignore

@ -0,0 +1,4 @@
*.jar
*.jar
*.class
*.log

273
project/java-cli-期末课程项目/data/csdnexport.json

@ -0,0 +1,273 @@
[ {
"title" : "7月1日起施行 超龄劳动者迎来权益保障新规",
"url" : "http://society.people.com.cn/n1/2026/0525/c1008-40727022.html",
"content" : ""
}, {
"title" : "经港珠澳大桥出入境港澳单牌车总量突破1000万辆次",
"url" : "http://gba.people.cn/n1/2026/0525/c42272-40726946.html",
"content" : ""
}, {
"title" : "外交部谈美伊谈判",
"url" : "http://world.people.com.cn/n1/2026/0525/c1002-40726926.html",
"content" : ""
}, {
"title" : "重庆发布今年首个地质灾害红色预警",
"url" : "http://society.people.com.cn/n1/2026/0525/c1008-40726849.html",
"content" : ""
}, {
"title" : "重庆发布今年首个地质灾害红色预警",
"url" : "http://cq.people.com.cn/n2/2026/0525/c365401-41590405.html",
"content" : ""
}, {
"title" : "账号管理规范",
"url" : "https://blog.csdn.net/blogdevteam/article/details/126135357",
"content" : ""
}, {
"title" : "代码产出暴涨250%,Claude Code已100%由自己编写!CC 之父 Boris 最新对话:我现在只负责写提示词",
"url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161325096",
"content" : ""
}, {
"title" : "我们公司全员把 Cursor 换成了自研的 全开源AtomCode",
"url" : "https://blog.csdn.net/jiangtao/article/details/161373705",
"content" : ""
}, {
"title" : "与菲尔兹奖得主Timothy Gowers对话:整个数学研究的范式将被AI改变",
"url" : "https://blog.csdn.net/jzagi/article/details/161327725",
"content" : ""
}, {
"title" : "AI又“翻车”!Gemini狂删2.8万行代码、系统宕机33分钟,还伪造沟通记录谎称“已恢复正常”",
"url" : "https://blog.csdn.net/csdnnews/article/details/161325101",
"content" : ""
}, {
"title" : "开源项目“离谱的死亡方式”",
"url" : "https://blog.csdn.net/csdnnews/article/details/161325111",
"content" : ""
}, {
"title" : "“DeepSeek崩了”又冲上热搜;特斯拉FSD中文名改为“特斯拉辅助驾驶”:价格依旧为6.4万元;苹果WWDC26将成库克告别秀 | 极客头条",
"url" : "https://blog.csdn.net/weixin_39786569/article/details/161394638",
"content" : ""
}, {
"title" : "“超级Agent”大梦初醒:任务一长就“飘”、动辄陷入“无限探索”?一场对话复盘工业级智能体的真实痛点与终局 | AI进化论",
"url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294914",
"content" : ""
}, {
"title" : "从全网群嘲到让学术界颤抖!OpenAI 攻破 80 年数学悬案,菲尔兹奖得主预言灵验:AI正将人类逐出科研循环",
"url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294921",
"content" : ""
}, {
"title" : "雷军直言“输给特斯拉不丢人”;传Manus创始人计划融资10亿美元回购公司 | 极客头条",
"url" : "https://blog.csdn.net/weixin_39786569/article/details/161313996",
"content" : ""
}, {
"title" : "GitHub遭入侵,黑客开价5万美元卖源码!员工装了个VS Code插件,致3800个内部仓库被盗",
"url" : "https://blog.csdn.net/csdnnews/article/details/161294926",
"content" : ""
}, {
"title" : "Chaterm — 开源SRE副驾驶,让你与服务器直接对话! 服务器 14.7K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/157735374",
"content" : ""
}, {
"title" : "拆箱开源版Coze:Agent核心三件套大公开,48小时揽下9K Star 人工智能 47.5K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149722641",
"content" : ""
}, {
"title" : "MinIO:开源对象存储解决方案的领先者 开源 67.6K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149424765",
"content" : ""
}, {
"title" : "LocalSend:比 AirDrop 更自由!这款神器让文件传输不再受限 https 64.1K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149356472",
"content" : ""
}, {
"title" : "Excalidraw:一款轻量、高效、极具手感的在线白板工具 产品经理 56.7K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149249425",
"content" : ""
}, {
"title" : "star31.6k,Aider:让代码编写如虎添翼的终端神器 人工智能 66.5K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149169547",
"content" : ""
}, {
"title" : "用Rust编写的开源支付解决方案——Hyperswitch rust 63.6K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149066439",
"content" : ""
}, {
"title" : "Langflow:这个拖拽式AI工作流神器正在颠覆传统编程 人工智能 76.9K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/148900678",
"content" : ""
}, {
"title" : "一键抠图有多强?19Kstar 的 Rembg 开源神器 python 58.7K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/148851428",
"content" : ""
}, {
"title" : "CHATERM AI:开启云资源氛围管理新篇章! 人工智能 70.3K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/148769366",
"content" : ""
}, {
"title" : "CSDN会员推广伙伴招募:分销返佣 + 资源互换,诚邀合作",
"url" : "https://blog.csdn.net/blogdevteam/article/details/160479095",
"content" : ""
}, {
"title" : "深入解析进程:从PCB到僵尸进程",
"url" : "https://blog.csdn.net/2401_86275172/article/details/160566166",
"content" : ""
}, {
"title" : "【功能跃升】Claude Code v2.1.145:开放 --json 脚本接口,打通 tmux 状态栏,超大文件智能截断",
"url" : "https://blog.csdn.net/Rthan/article/details/161241670",
"content" : ""
}, {
"title" : "【读书笔记】《幸福关系的七段旅程》",
"url" : "https://blog.csdn.net/Chandler2017/article/details/160967281",
"content" : ""
}, {
"title" : "Spring 核心原理:IoC/DI 与 Bean 生命周期全景解析",
"url" : "https://blog.csdn.net/2401_88151415/article/details/161253437",
"content" : ""
}, {
"title" : "鸿蒙 PC 跨设备拖拽:实现原理 + 实战代码",
"url" : "https://blog.csdn.net/qq_36478920/article/details/161291953",
"content" : ""
}, {
"title" : "volatile 的底层原理及应用场景",
"url" : "https://blog.csdn.net/tongluowan007/article/details/161230327",
"content" : ""
}, {
"title" : "ROS开发专栏---ROS2humble安装详细教程---适配Ubuntu 22.04",
"url" : "https://blog.csdn.net/weixin_61186812/article/details/161054923",
"content" : ""
}, {
"title" : "2026年全国青少年信息素养大赛算法应用主题赛(C++赛项-初赛-赛前冲刺模拟卷2:文末附答案和解析)",
"url" : "https://blog.csdn.net/weixin_66461496/article/details/161206019",
"content" : ""
}, {
"title" : "系统分析师 备考知识点整理",
"url" : "https://blog.csdn.net/david_232656/article/details/161291901",
"content" : ""
}, {
"title" : "Linux之文件",
"url" : "https://blog.csdn.net/bksczm/article/details/161055964",
"content" : ""
}, {
"title" : "Python 数据分析基础入门:《Excel Python:飞速搞定数据分析与处理》学习笔记系列(附录 C 高级 Python 概念)",
"url" : "https://blog.csdn.net/m0_67558301/article/details/161324964",
"content" : ""
}, {
"title" : "【LE Audio】CAP精讲[8]:CCID绑定术,打通音频流与控制的任督二脉",
"url" : "https://blog.csdn.net/weixin_37800531/article/details/161135741",
"content" : ""
}, {
"title" : "Codex Mac版安装教程(AppStore无法下载解决)",
"url" : "https://blog.csdn.net/weixin_41961749/article/details/161110569",
"content" : ""
}, {
"title" : "应用层中的UDP协议原理",
"url" : "https://blog.csdn.net/2503_90262217/article/details/161200229",
"content" : ""
}, {
"title" : "【AI】Git、Node.js 一站式保姆级安装指南",
"url" : "https://blog.csdn.net/2401_87342824/article/details/161199150",
"content" : ""
}, {
"title" : "Re: Linux系统篇(十八)进程篇·三:深度硬核!全面起底 Linux 进程状态变化与内核链表动态解绑",
"url" : "https://blog.csdn.net/Z2314246476/article/details/161076726",
"content" : ""
}, {
"title" : "本周 GitHub 最热项目全解析!Star History 2026年第20周(5月8日-14日)排行榜深度盘点",
"url" : "https://blog.csdn.net/yanceyxin/article/details/161130991",
"content" : ""
}, {
"title" : "Google I/O 2026深度解读:AI Agent时代全面到来,从“大模型时代“到“智能体时代“的历史性跨越",
"url" : "https://blog.csdn.net/shaobingj126/article/details/161307384",
"content" : ""
}, {
"title" : "c#基础知识合集07 方法值传递 引用传递 ref参数 out输出参数 in参数 参数列表",
"url" : "https://blog.csdn.net/2603_96051737/article/details/161256831",
"content" : ""
}, {
"title" : "谷歌辞职、创业失败、重读神经科学,她说 AI 时代最危险的事是外包你的思考 | 万有引力",
"url" : "https://blog.csdn.net/tangxiaoyin/article/details/161428871",
"content" : ""
}, {
"title" : "传字节向Seed员工开放「豆包股」认购权;滴滴出行App大规模故障,官方致歉;小米MiMo-V2.5系列API永久降价:最高降99% | 极客头条",
"url" : "https://blog.csdn.net/weixin_39786569/article/details/161446737",
"content" : ""
}, {
"title" : "华为韬定律刷屏,程序员真正该读懂的信号是什么? | 硅基时间",
"url" : "https://blog.csdn.net/csdnnews/article/details/161432746",
"content" : ""
}, {
"title" : "一位10年Android老兵选择「逆行」:“如果未来只剩AI写代码,那就把我落下吧!”",
"url" : "https://blog.csdn.net/csdnnews/article/details/161432759",
"content" : ""
}, {
"title" : "告别繁琐预处理!MindSpeed LLM推出Train_from_HF功能,实现加载即训练",
"url" : "https://blog.csdn.net/csdnnews/article/details/161426770",
"content" : ""
}, {
"title" : "MindSpeed LLM结合Agent-Skills适配Mamba3模型,解锁SSM模型新潜能",
"url" : "https://blog.csdn.net/csdnnews/article/details/161427107",
"content" : ""
}, {
"title" : "高性能计算:鲲鹏软硬协同定义AI4S 计算新范式",
"url" : "https://blog.csdn.net/csdnnews/article/details/161426451",
"content" : ""
}, {
"title" : "AI公司烧不起Token了!国产Agent杀出,逼近Opus 4.6还免费,天工AI发布SkyClaw-v1.0:面向真实工作流的百万上下文 Agent 模型",
"url" : "https://blog.csdn.net/csdnnews/article/details/161422508",
"content" : ""
}, {
"title" : "2026年618大促7000元内演唱会手机推荐:Find X9s Pro领衔,远摄防抖清晰度全解析",
"url" : "https://blog.csdn.net/2601_95822891/article/details/161261185",
"content" : ""
}, {
"title" : "Python运算符:身份运算符(is/is not)与双等号的区别",
"url" : "https://blog.csdn.net/AIRoses/article/details/161410239",
"content" : ""
}, {
"title" : "Codex 与 Claude Code 安装配置教程",
"url" : "https://blog.csdn.net/weixin_45888077/article/details/161401615",
"content" : ""
}, {
"title" : "初识java(十一):继承",
"url" : "https://blog.csdn.net/2502_93282244/article/details/161372118",
"content" : ""
}, {
"title" : "我那台在抽屉里躺了三年的旧手机,被我改造成了全天候私人云盘",
"url" : "https://blog.csdn.net/SDFsoul/article/details/161278737",
"content" : ""
}, {
"title" : "【必看】2026年 {计算题} |专项解析 ~ H:动态规划 & 图论",
"url" : "https://blog.csdn.net/weixin_42115157/article/details/161057408",
"content" : ""
}, {
"title" : "FreeRTOS——按键控制任务的挂起和恢复",
"url" : "https://blog.csdn.net/weixin_64611877/article/details/161456747",
"content" : ""
}, {
"title" : "【c++笔记】类和对象流食般投喂(中)",
"url" : "https://blog.csdn.net/dj_798/article/details/160994229",
"content" : ""
}, {
"title" : "C++的IO流",
"url" : "https://blog.csdn.net/suimingtao/article/details/160892078",
"content" : ""
}, {
"title" : "Java——标准序列化机制",
"url" : "https://blog.csdn.net/cold___play/article/details/161107932",
"content" : ""
}, {
"title" : "1.6T光模块将成AI数据中心主流",
"url" : "https://blog.csdn.net/m0_75253087/article/details/160956039",
"content" : ""
}, {
"title" : "通用程序无缺陷保证的不可能性:停机问题与哥德尔不完备定理的双轨论证 —— 兼论“边界情况不可穷举”的形式化含义",
"url" : "https://blog.csdn.net/qq_43689451/article/details/161271922",
"content" : ""
}, {
"title" : "新书速览|信息与通信工程综合实验:自动目标识别专题",
"url" : "https://blog.csdn.net/quanzhankaifaqua/article/details/161193290",
"content" : ""
}, {
"title" : "深入理解 OSI 七层网络模型:从原理到实践",
"url" : "https://blog.csdn.net/2603_95882547/article/details/161140630",
"content" : ""
} ]

185
project/java-cli-期末课程项目/data/my_export.json

@ -0,0 +1,185 @@
[ {
"title" : "7月1日起施行 超龄劳动者迎来权益保障新规",
"url" : "http://society.people.com.cn/n1/2026/0525/c1008-40727022.html",
"content" : ""
}, {
"title" : "经港珠澳大桥出入境港澳单牌车总量突破1000万辆次",
"url" : "http://gba.people.cn/n1/2026/0525/c42272-40726946.html",
"content" : ""
}, {
"title" : "外交部谈美伊谈判",
"url" : "http://world.people.com.cn/n1/2026/0525/c1002-40726926.html",
"content" : ""
}, {
"title" : "重庆发布今年首个地质灾害红色预警",
"url" : "http://society.people.com.cn/n1/2026/0525/c1008-40726849.html",
"content" : ""
}, {
"title" : "重庆发布今年首个地质灾害红色预警",
"url" : "http://cq.people.com.cn/n2/2026/0525/c365401-41590405.html",
"content" : ""
}, {
"title" : "账号管理规范",
"url" : "https://blog.csdn.net/blogdevteam/article/details/126135357",
"content" : ""
}, {
"title" : "代码产出暴涨250%,Claude Code已100%由自己编写!CC 之父 Boris 最新对话:我现在只负责写提示词",
"url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161325096",
"content" : ""
}, {
"title" : "我们公司全员把 Cursor 换成了自研的 全开源AtomCode",
"url" : "https://blog.csdn.net/jiangtao/article/details/161373705",
"content" : ""
}, {
"title" : "与菲尔兹奖得主Timothy Gowers对话:整个数学研究的范式将被AI改变",
"url" : "https://blog.csdn.net/jzagi/article/details/161327725",
"content" : ""
}, {
"title" : "AI又“翻车”!Gemini狂删2.8万行代码、系统宕机33分钟,还伪造沟通记录谎称“已恢复正常”",
"url" : "https://blog.csdn.net/csdnnews/article/details/161325101",
"content" : ""
}, {
"title" : "开源项目“离谱的死亡方式”",
"url" : "https://blog.csdn.net/csdnnews/article/details/161325111",
"content" : ""
}, {
"title" : "“DeepSeek崩了”又冲上热搜;特斯拉FSD中文名改为“特斯拉辅助驾驶”:价格依旧为6.4万元;苹果WWDC26将成库克告别秀 | 极客头条",
"url" : "https://blog.csdn.net/weixin_39786569/article/details/161394638",
"content" : ""
}, {
"title" : "“超级Agent”大梦初醒:任务一长就“飘”、动辄陷入“无限探索”?一场对话复盘工业级智能体的真实痛点与终局 | AI进化论",
"url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294914",
"content" : ""
}, {
"title" : "从全网群嘲到让学术界颤抖!OpenAI 攻破 80 年数学悬案,菲尔兹奖得主预言灵验:AI正将人类逐出科研循环",
"url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294921",
"content" : ""
}, {
"title" : "雷军直言“输给特斯拉不丢人”;传Manus创始人计划融资10亿美元回购公司 | 极客头条",
"url" : "https://blog.csdn.net/weixin_39786569/article/details/161313996",
"content" : ""
}, {
"title" : "GitHub遭入侵,黑客开价5万美元卖源码!员工装了个VS Code插件,致3800个内部仓库被盗",
"url" : "https://blog.csdn.net/csdnnews/article/details/161294926",
"content" : ""
}, {
"title" : "Chaterm — 开源SRE副驾驶,让你与服务器直接对话! 服务器 14.7K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/157735374",
"content" : ""
}, {
"title" : "拆箱开源版Coze:Agent核心三件套大公开,48小时揽下9K Star 人工智能 47.5K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149722641",
"content" : ""
}, {
"title" : "MinIO:开源对象存储解决方案的领先者 开源 67.6K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149424765",
"content" : ""
}, {
"title" : "LocalSend:比 AirDrop 更自由!这款神器让文件传输不再受限 https 64.1K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149356472",
"content" : ""
}, {
"title" : "Excalidraw:一款轻量、高效、极具手感的在线白板工具 产品经理 56.7K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149249425",
"content" : ""
}, {
"title" : "star31.6k,Aider:让代码编写如虎添翼的终端神器 人工智能 66.5K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149169547",
"content" : ""
}, {
"title" : "用Rust编写的开源支付解决方案——Hyperswitch rust 63.6K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/149066439",
"content" : ""
}, {
"title" : "Langflow:这个拖拽式AI工作流神器正在颠覆传统编程 人工智能 76.9K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/148900678",
"content" : ""
}, {
"title" : "一键抠图有多强?19Kstar 的 Rembg 开源神器 python 58.7K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/148851428",
"content" : ""
}, {
"title" : "CHATERM AI:开启云资源氛围管理新篇章! 人工智能 70.3K 查看详情",
"url" : "https://blog.csdn.net/coderroad/article/details/148769366",
"content" : ""
}, {
"title" : "CSDN会员推广伙伴招募:分销返佣 + 资源互换,诚邀合作",
"url" : "https://blog.csdn.net/blogdevteam/article/details/160479095",
"content" : ""
}, {
"title" : "深入解析进程:从PCB到僵尸进程",
"url" : "https://blog.csdn.net/2401_86275172/article/details/160566166",
"content" : ""
}, {
"title" : "【功能跃升】Claude Code v2.1.145:开放 --json 脚本接口,打通 tmux 状态栏,超大文件智能截断",
"url" : "https://blog.csdn.net/Rthan/article/details/161241670",
"content" : ""
}, {
"title" : "【读书笔记】《幸福关系的七段旅程》",
"url" : "https://blog.csdn.net/Chandler2017/article/details/160967281",
"content" : ""
}, {
"title" : "Spring 核心原理:IoC/DI 与 Bean 生命周期全景解析",
"url" : "https://blog.csdn.net/2401_88151415/article/details/161253437",
"content" : ""
}, {
"title" : "鸿蒙 PC 跨设备拖拽:实现原理 + 实战代码",
"url" : "https://blog.csdn.net/qq_36478920/article/details/161291953",
"content" : ""
}, {
"title" : "volatile 的底层原理及应用场景",
"url" : "https://blog.csdn.net/tongluowan007/article/details/161230327",
"content" : ""
}, {
"title" : "ROS开发专栏---ROS2humble安装详细教程---适配Ubuntu 22.04",
"url" : "https://blog.csdn.net/weixin_61186812/article/details/161054923",
"content" : ""
}, {
"title" : "2026年全国青少年信息素养大赛算法应用主题赛(C++赛项-初赛-赛前冲刺模拟卷2:文末附答案和解析)",
"url" : "https://blog.csdn.net/weixin_66461496/article/details/161206019",
"content" : ""
}, {
"title" : "系统分析师 备考知识点整理",
"url" : "https://blog.csdn.net/david_232656/article/details/161291901",
"content" : ""
}, {
"title" : "Linux之文件",
"url" : "https://blog.csdn.net/bksczm/article/details/161055964",
"content" : ""
}, {
"title" : "Python 数据分析基础入门:《Excel Python:飞速搞定数据分析与处理》学习笔记系列(附录 C 高级 Python 概念)",
"url" : "https://blog.csdn.net/m0_67558301/article/details/161324964",
"content" : ""
}, {
"title" : "【LE Audio】CAP精讲[8]:CCID绑定术,打通音频流与控制的任督二脉",
"url" : "https://blog.csdn.net/weixin_37800531/article/details/161135741",
"content" : ""
}, {
"title" : "Codex Mac版安装教程(AppStore无法下载解决)",
"url" : "https://blog.csdn.net/weixin_41961749/article/details/161110569",
"content" : ""
}, {
"title" : "应用层中的UDP协议原理",
"url" : "https://blog.csdn.net/2503_90262217/article/details/161200229",
"content" : ""
}, {
"title" : "【AI】Git、Node.js 一站式保姆级安装指南",
"url" : "https://blog.csdn.net/2401_87342824/article/details/161199150",
"content" : ""
}, {
"title" : "Re: Linux系统篇(十八)进程篇·三:深度硬核!全面起底 Linux 进程状态变化与内核链表动态解绑",
"url" : "https://blog.csdn.net/Z2314246476/article/details/161076726",
"content" : ""
}, {
"title" : "本周 GitHub 最热项目全解析!Star History 2026年第20周(5月8日-14日)排行榜深度盘点",
"url" : "https://blog.csdn.net/yanceyxin/article/details/161130991",
"content" : ""
}, {
"title" : "Google I/O 2026深度解读:AI Agent时代全面到来,从“大模型时代“到“智能体时代“的历史性跨越",
"url" : "https://blog.csdn.net/shaobingj126/article/details/161307384",
"content" : ""
}, {
"title" : "c#基础知识合集07 方法值传递 引用传递 ref参数 out输出参数 in参数 参数列表",
"url" : "https://blog.csdn.net/2603_96051737/article/details/161256831",
"content" : ""
} ]

29
project/java-cli-期末课程项目/data/sample_test.json

@ -0,0 +1,29 @@
{
"metadata": {
"exportTime": "2026-05-31T12:00:00",
"totalCount": 3,
"source": "CLI Crawler v1.0",
"exportMode": "STANDARD",
"version": "1.0"
},
"articles": [
{
"title": "测试文章1",
"url": "https://example.com/article1",
"content": "这是测试内容1",
"crawledAt": "2026-05-31T10:00:00"
},
{
"title": "测试文章2",
"url": "https://example.com/article2",
"content": "这是测试内容2",
"crawledAt": "2026-05-31T11:00:00"
},
{
"title": "测试文章3",
"url": "https://example.com/article3",
"content": "这是测试内容3",
"crawledAt": "2026-05-31T12:00:00"
}
]
}

54
project/java-cli-期末课程项目/minimal_test.ps1

@ -0,0 +1,54 @@
# Simple Import/Export Test - Minimal Version
$env:JAVA_HOME = "C:\Program Files\Java\latest\jdk-25"
$APP_JAR = "target\datacollect-cli-0.1.0-jar-with-dependencies.jar"
$SAMPLE = "data\sample_test.json"
$EXPORT = "data\export_result.json"
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Import/Export Test - Minimal" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host ""
Write-Host "[TEST 1] Import" -ForegroundColor Yellow
$result = & java -jar $APP_JAR import $SAMPLE 2>&1
Write-Host $result
Write-Host ""
Write-Host "[TEST 2] List" -ForegroundColor Yellow
$result = & java -jar $APP_JAR list 2>&1
Write-Host $result
Write-Host ""
Write-Host "[TEST 3] Export" -ForegroundColor Yellow
$result = & java -jar $APP_JAR export $EXPORT --format json 2>&1
Write-Host $result
Write-Host ""
Write-Host "[TEST 4] Check Export File" -ForegroundColor Yellow
if (Test-Path $EXPORT) {
Write-Host "[SUCCESS] File created!" -ForegroundColor Green
$content = Get-Content $EXPORT -Raw
Write-Host "Length: $($content.Length) chars" -ForegroundColor Cyan
if ($content -match "crawledAt") {
Write-Host "[SUCCESS] crawledAt field found!" -ForegroundColor Green
}
if ($content -match "metadata") {
Write-Host "[SUCCESS] metadata field found!" -ForegroundColor Green
}
}
Write-Host ""
Write-Host "[TEST 5] Import Again (Duplicate)" -ForegroundColor Yellow
$result = & java -jar $APP_JAR import $SAMPLE 2>&1
Write-Host $result
Write-Host ""
Write-Host "[TEST 6] Final List" -ForegroundColor Yellow
$result = & java -jar $APP_JAR list 2>&1
Write-Host $result
Write-Host ""
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "TEST COMPLETED" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan

67
project/java-cli-期末课程项目/pom.xml

@ -0,0 +1,67 @@
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>datacollect-cli</artifactId>
<version>0.1.0</version>
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.9</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.4.14</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.16.1</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.example.datacollect.Main</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

56
project/java-cli-期末课程项目/simple_test.cmd

@ -0,0 +1,56 @@
@echo off
set JAVA_HOME=C:\Program Files\Java\latest\jdk-25
set APP_JAR=target\datacollect-cli-0.1.0-jar-with-dependencies.jar
set SAMPLE=data\sample_test.json
set EXPORT=data\export_result.json
echo ========================================
echo Import/Export Feature Test
echo ========================================
echo.
echo [TEST 1] Import sample JSON file
echo Command: import %SAMPLE%
java -jar %APP_JAR% import %SAMPLE%
echo.
echo.
echo [TEST 2] List articles
echo Command: list
java -jar %APP_JAR% list
echo.
echo.
echo [TEST 3] Export to JSON
echo Command: export %EXPORT% --format json
java -jar %APP_JAR% export %EXPORT% --format json
echo.
echo.
echo [TEST 4] Check exported file
if exist %EXPORT% (
echo [SUCCESS] Export file created
echo.
echo First 1000 characters of exported file:
powershell -Command "Get-Content %EXPORT% | Select-Object -First 20"
) else (
echo [ERROR] Export file NOT created
)
echo.
echo.
echo [TEST 5] Test duplicate import
echo Command: import %SAMPLE% (again)
java -jar %APP_JAR% import %SAMPLE%
echo.
echo.
echo [TEST 6] Final list
echo Command: list
java -jar %APP_JAR% list
echo.
echo.
echo ========================================
echo Tests completed! Check output above.
echo ========================================

117
project/java-cli-期末课程项目/simple_test.ps1

@ -0,0 +1,117 @@
# Simple Import/Export Test
$ErrorActionPreference = "Stop"
$env:JAVA_HOME = "C:\Program Files\Java\latest\jdk-25"
$APP_JAR = "target\datacollect-cli-0.1.0-jar-with-dependencies.jar"
$TEST_FILE = "data\sample_test.json"
$EXPORT_FILE = "data\export_result.json"
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Import/Export Feature Test" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host ""
# Step 1: Import sample data
Write-Host "[TEST 1] Import sample JSON file" -ForegroundColor Yellow
Write-Host "Command: import $TEST_FILE" -ForegroundColor Gray
$result = & java -jar $APP_JAR "import $TEST_FILE" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
# Step 2: List articles
Write-Host "[TEST 2] List articles after import" -ForegroundColor Yellow
Write-Host "Command: list" -ForegroundColor Gray
$result = & java -jar $APP_JAR "list" 2>&1
Write-Host $result -ForegroundColor Green
# Extract count
$count1 = 0
$result -split "`n" | ForEach-Object {
if ($_ -match "Total: (\d+)") {
$count1 = [int]$matches[1]
}
}
Write-Host "Article count: $count1" -ForegroundColor Cyan
Write-Host ""
# Step 3: Export to new file
Write-Host "[TEST 3] Export to new JSON file" -ForegroundColor Yellow
Write-Host "Command: export $EXPORT_FILE --format json" -ForegroundColor Gray
$result = & java -jar $APP_JAR "export $EXPORT_FILE --format json" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
# Step 4: Check exported file
Write-Host "[TEST 4] Verify exported JSON file" -ForegroundColor Yellow
if (Test-Path $EXPORT_FILE) {
Write-Host "[SUCCESS] Export file created" -ForegroundColor Green
$content = Get-Content $EXPORT_FILE -Raw
Write-Host "File size: $($content.Length) characters" -ForegroundColor Cyan
# Check for crawledAt
if ($content -match "crawledAt") {
Write-Host "[SUCCESS] crawledAt field found in exported JSON" -ForegroundColor Green
} else {
Write-Host "[ERROR] crawledAt field NOT found" -ForegroundColor Red
}
# Check for metadata
if ($content -match "metadata") {
Write-Host "[SUCCESS] metadata field found" -ForegroundColor Green
} else {
Write-Host "[ERROR] metadata field NOT found" -ForegroundColor Red
}
} else {
Write-Host "[ERROR] Export file NOT created" -ForegroundColor Red
}
Write-Host ""
# Step 5: Test duplicate import
Write-Host "[TEST 5] Test duplicate import (should skip duplicates)" -ForegroundColor Yellow
Write-Host "Command: import $TEST_FILE (again)" -ForegroundColor Gray
$result = & java -jar $APP_JAR "import $TEST_FILE" 2>&1
Write-Host $result -ForegroundColor Green
# Step 6: List and verify no duplication
Write-Host "[TEST 6] Verify no duplication" -ForegroundColor Yellow
Write-Host "Command: list" -ForegroundColor Gray
$result = & java -jar $APP_JAR "list" 2>&1
Write-Host $result -ForegroundColor Green
$count2 = 0
$result -split "`n" | ForEach-Object {
if ($_ -match "Total: (\d+)") {
$count2 = [int]$matches[1]
}
}
Write-Host "Article count after second import: $count2" -ForegroundColor Cyan
Write-Host ""
# Summary
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "TEST SUMMARY" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
if ($count1 -eq 3 -and $count2 -eq 3) {
Write-Host "[SUCCESS] All tests passed!" -ForegroundColor Green
Write-Host "- Import: Successfully imported 3 articles" -ForegroundColor White
Write-Host "- Export: Successfully exported to JSON" -ForegroundColor White
Write-Host "- Duplicate: Correctly skipped duplicate articles" -ForegroundColor White
Write-Host "- crawledAt field: Present in exported JSON" -ForegroundColor White
} else {
Write-Host "[PARTIAL] Some tests may have issues" -ForegroundColor Yellow
Write-Host "First import count: $count1" -ForegroundColor White
Write-Host "Second import count: $count2" -ForegroundColor White
}
Write-Host ""
# Show exported file content
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "EXPORTED JSON CONTENT (Preview)" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
if (Test-Path $EXPORT_FILE) {
$exportContent = Get-Content $EXPORT_FILE -Raw
if ($exportContent.Length -gt 1000) {
Write-Host ($exportContent.Substring(0, 1000) + "...") -ForegroundColor White
} else {
Write-Host $exportContent -ForegroundColor White
}
}

60
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/Main.java

@ -0,0 +1,60 @@
package com.example.datacollect;
import com.example.datacollect.controller.CrawlerController;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.repository.PersistenceManager;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.util.JsonExporter;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public class Main {
private static final Logger logger = LoggerFactory.getLogger(Main.class);
public static void main(String[] args) {
try (ConsoleView view = new ConsoleView();
ArticleRepository repository = new ArticleRepository();
PersistenceManager persistenceManager = new PersistenceManager(repository)) {
logger.info("Starting CLI Crawler application");
JsonExporter jsonExporter = new JsonExporter(repository);
StrategyFactory strategyFactory = new StrategyFactory();
loadSession(persistenceManager, view, repository);
CrawlerController controller = new CrawlerController(view, repository, strategyFactory, persistenceManager, jsonExporter);
view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands.");
logger.info("Application initialized successfully");
while (true) {
try {
controller.handle(view.readLine());
} catch (Exception e) {
view.printError("Error: " + e.getMessage());
logger.error("Error in main loop: {}", e.getMessage(), e);
}
}
} catch (Exception e) {
logger.error("Fatal error in application: {}", e.getMessage(), e);
System.err.println("Fatal error: " + e.getMessage());
System.exit(1);
}
}
private static void loadSession(PersistenceManager persistenceManager, ConsoleView view, ArticleRepository repository) {
try {
persistenceManager.load();/* 加载会话 */
if (repository.size() > 0) {/* 如果有文章 */
view.printInfo("Loaded " + repository.size() + " articles from previous session");/* 打印加载的文章数量 */
}
} catch (IOException e) {
view.printError("Warning: Failed to load previous session: " + e.getMessage());
logger.warn("Failed to load previous session: {}", e.getMessage(), e);
}
}
}

103
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/AnalyzeCommand.java

@ -0,0 +1,103 @@
package com.example.datacollect.command;
import com.example.datacollect.exception.NetworkException;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.strategy.CrawlStrategy;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.util.RetryUtils;
import com.example.datacollect.view.ConsoleView;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.Callable;
public class AnalyzeCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class);
private final ConsoleView view;
private final StrategyFactory strategyFactory;
public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) {
this.view = view;
this.strategyFactory = strategyFactory;
}
@Override
public String getName() {
return "analyze";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
if (args.length < 2) {
view.printError("Usage: analyze <url>");
logger.warn("Invalid command: missing URL argument");
return;
}
String url = args[1];
logger.info("Analyze command executed for URL: {}", url);
try {
CrawlStrategy strategy = strategyFactory.getStrategy(url);
if (strategy == null) {
view.printError("No strategy found for: " + url);
logger.error("No strategy found for URL: {}", url);
return;
}
Callable<Document> fetchTask = () -> {
logger.debug("Fetching document from: {}", url);
try {
return Jsoup.connect(url)
.userAgent("Mozilla/5.0")
.timeout(5000)
.get();
} catch (IOException e) {
throw new NetworkException("Failed to connect to " + url + ": " + e.getMessage(), e);
}
};
Document doc = RetryUtils.executeWithRetry(fetchTask);
logger.info("Successfully fetched document from: {}", url);
List<Article> articles = strategy.parse(url, doc);
logger.info("Parsed {} articles for analysis", articles.size());
int total = articles.size();
int totalTitleLen = 0;
int totalContentLen = 0;
for (Article a : articles) {
totalTitleLen += a.getTitle() == null ? 0 : a.getTitle().length();
totalContentLen += a.getContent() == null ? 0 : a.getContent().length();
}
view.printInfo("===== 分析统计结果 =====");
view.printInfo("文章总数:" + total + " 篇");
view.printInfo("标题总长度:" + totalTitleLen);
view.printInfo("内容总长度:" + totalContentLen);
if (total > 0) {
view.printInfo("平均标题长度:" + (totalTitleLen / total));
view.printInfo("平均内容长度:" + (totalContentLen / total));
}
view.printInfo("======================");
view.printSuccess("分析完成(数据未保存)");
logger.info("Analysis completed: {} articles analyzed", total);
} catch (NetworkException e) {
view.printError("Network error: " + e.getMessage());
logger.error("Network error while analyzing {}: {}", url, e.getMessage(), e);
} catch (ParseException e) {
view.printError("Parse error: " + e.getMessage());
logger.error("Parse error while analyzing {}: {}", url, e.getMessage(), e);
} catch (Exception e) {
view.printError("分析失败:" + e.getMessage());
logger.error("Unexpected error while analyzing {}: {}", url, e.getMessage(), e);
}
}
}

8
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/Command.java

@ -0,0 +1,8 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
public interface Command {
String getName();
void execute(String[] args, ArticleRepository repository);
}

114
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/CrawlCommand.java

@ -0,0 +1,114 @@
package com.example.datacollect.command;
import com.example.datacollect.exception.NetworkException;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.exception.UrlFormatException;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.strategy.CrawlStrategy;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.util.RetryUtils;
import com.example.datacollect.view.ConsoleView;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.concurrent.Callable;
public class CrawlCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
private final ConsoleView view;
private final StrategyFactory strategyFactory;
public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
this.view = view;
this.strategyFactory = strategyFactory;
}
@Override
public String getName() {
return "crawl";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
if (args == null || args.length < 2) {
view.printError("用法: crawl <url>");
logger.warn("无效命令: 缺少URL参数");
return;
}
String url = args[1];
if (url == null || url.trim().isEmpty()) {
view.printError("错误: URL不能为空");
logger.error("无效参数: URL为空");
return;
}
try {
new URL(url);
} catch (MalformedURLException e) {
logger.error("无效URL格式: {}", url, e);
throw new UrlFormatException("无效的URL格式: " + url, url, e);
}
logger.info("开始爬取: {}", url);
CrawlStrategy strategy = strategyFactory.getStrategy(url);
if (strategy == null) {
view.printError("未找到策略: " + url);
logger.error("未找到URL对应的策略: {}", url);
return;
}
try {
view.printInfo("正在爬取: " + url);
Callable<Document> fetchTask = () -> {
logger.debug("正在获取文档: {}", url);
try {
return Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
.header("Accept-Encoding", "gzip, deflate, br")
.header("Connection", "keep-alive")
.header("Referer", url)
.header("Cache-Control", "max-age=0")
.timeout(15000)
.followRedirects(true)
.get();
} catch (IOException e) {
throw new NetworkException("连接失败: " + e.getMessage(), e);
}
};
Document doc = RetryUtils.executeWithRetry(fetchTask);
logger.info("成功获取文档: {}", url);
var articles = strategy.parse(url, doc);
logger.info("解析文章数: {}", articles.size());
repository.addAll(articles);
logger.info("成功添加 {} 篇文章到仓库", articles.size());
view.printSuccess("爬取完成,共 " + articles.size() + " 篇文章。");
logger.info("成功从 {} 爬取 {} 篇文章", url, articles.size());
} catch (NetworkException e) {
view.printError(e.getMessage());
logger.error("爬取 {} 时网络错误: {}", url, e.getMessage(), e);
} catch (ParseException e) {
view.printError("解析错误: " + e.getMessage());
logger.error("爬取 {} 时解析错误: {}", url, e.getMessage(), e);
} catch (UrlFormatException e) {
view.printError("URL格式错误: " + e.getMessage());
logger.error("爬取 {} 时URL格式错误: {}", url, e.getMessage(), e);
} catch (Exception e) {
view.printError("爬取失败: " + e.getMessage());
logger.error("爬取 {} 时发生未知错误: {}", url, e.getMessage(), e);
}
}
}

42
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExitCommand.java

@ -0,0 +1,42 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.repository.PersistenceManager;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
public class ExitCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class);
private final ConsoleView view;
private final PersistenceManager persistenceManager;
public ExitCommand(ConsoleView view, PersistenceManager persistenceManager) {
this.view = view;
this.persistenceManager = persistenceManager;
}
@Override
public String getName() {
return "exit";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
logger.info("Exit command executed, saving data before shutdown");
try {
persistenceManager.save();/* 保存数据到持久化管理器 */
view.printInfo("Saved " + repository.size() + " articles");
logger.info("Successfully saved {} articles before exit", repository.size());
} catch (IOException e) {
view.printError("Warning: Failed to save data: " + e.getMessage());
logger.error("Failed to save data on exit: {}", e.getMessage(), e);
}
view.printSuccess("Bye!");
System.exit(0);
}
}

66
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExportCommand.java

@ -0,0 +1,66 @@
package com.example.datacollect.command;
import com.example.datacollect.exception.ExportException;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.repository.PersistenceManager;
import com.example.datacollect.util.JsonExporter;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.nio.file.Paths;
public class ExportCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ExportCommand.class);
private final ConsoleView view;
private final PersistenceManager persistenceManager;
private final JsonExporter jsonExporter;
public ExportCommand(ConsoleView view, PersistenceManager persistenceManager, JsonExporter jsonExporter) {
this.view = view;
this.persistenceManager = persistenceManager;
this.jsonExporter = jsonExporter;
}
@Override
public String getName() {
return "export";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
String filePath = null;
String format = "json";
if (args.length < 2) {
view.printError("Usage: export <file_path> [--format json]");
logger.warn("Invalid command: missing file path argument");
return;
}
filePath = args[1];
for (int i = 2; i < args.length; i++) {
if (args[i].equals("--format") && i + 1 < args.length) {
format = args[i + 1].toLowerCase();
}
}
logger.info("导出请求: 文件={}, 格式={}", filePath, format);
try {
if ("json".equals(format)) {
jsonExporter.exportToFile(Paths.get(filePath));
view.printSuccess("Successfully exported " + repository.size() + " articles to " + filePath);
logger.info("Exported {} articles to {}", repository.size(), filePath);
} else {
view.printError("Unsupported format: " + format + ". Only 'json' is supported.");
logger.warn("Unsupported format: {}", format);
}
} catch (ExportException e) {
view.printError("Export failed: " + e.getMessage());
logger.error("Export error: {}", e.getMessage(), e);
}
}
}

33
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/HelpCommand.java

@ -0,0 +1,33 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HelpCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class);
private final ConsoleView view;
public HelpCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "help";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
logger.info("Help command executed");
view.printInfo("Commands:");
view.printInfo(" crawl <url> - Crawl articles from URL");
view.printInfo(" list - List all articles");
view.printInfo(" export <file> - Export articles to JSON file");
view.printInfo(" import <file> - Import articles from JSON file");
view.printInfo(" analyze <url> - Analyze URL structure");
view.printInfo(" help - Show this help");
view.printInfo(" exit - Exit and save data");
}
}

71
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ImportCommand.java

@ -0,0 +1,71 @@
package com.example.datacollect.command;
import com.example.datacollect.exception.ImportException;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.repository.PersistenceManager;
import com.example.datacollect.util.JsonImporter;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ImportCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ImportCommand.class);
private final ConsoleView view;
private final PersistenceManager persistenceManager;
public ImportCommand(ConsoleView view, PersistenceManager persistenceManager) {
this.view = view;
this.persistenceManager = persistenceManager;
}
@Override
public String getName() {
return "import";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
if (args.length < 2) {
view.printError("Usage: import <file_path>");
logger.warn("Invalid command: missing file path argument");
return;
}
String filePath = args[1];
try {
int beforeCount = repository.size();
JsonImporter.ImportResult result = persistenceManager.importWithReport(filePath);
int afterCount = repository.size();
StringBuilder message = new StringBuilder();
message.append("Import completed:\n");
message.append(" - Total found: ").append(result.getTotalFound()).append("\n");
message.append(" - Imported: ").append(result.getImported()).append("\n");
message.append(" - Skipped (duplicates): ").append(result.getSkipped()).append("\n");
message.append(" - Invalid: ").append(result.getInvalid()).append("\n");
message.append(" - Overwritten: ").append(result.getOverwritten()).append("\n");
message.append(" - Repository total: ").append(afterCount);
if (!result.getErrors().isEmpty()) {
message.append("\n - Errors: ").append(result.getErrors().size());
for (int i = 0; i < Math.min(3, result.getErrors().size()); i++) {
message.append("\n ").append(i + 1).append(". ").append(result.getErrors().get(i));
}
if (result.getErrors().size() > 3) {
message.append("\n ... and ").append(result.getErrors().size() - 3).append(" more errors");
}
}
view.printSuccess(message.toString());
logger.info("Import result: {}", result.getSummary());
} catch (ImportException e) {
view.printError("Import failed: " + e.getMessage());
logger.error("Import error: {}", e.getMessage(), e);
} catch (Exception e) {
view.printError("Import failed: " + e.getMessage());
logger.error("Import error: {}", e.getMessage(), e);
}
}
}

26
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ListCommand.java

@ -0,0 +1,26 @@
package com.example.datacollect.command;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ListCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ListCommand.class);
private final ConsoleView view;
public ListCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "list";
}
@Override
public void execute(String[] args, ArticleRepository repository) {
logger.info("List command executed, showing {} articles", repository.size());
view.display(repository.getAll());
}
}

71
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/controller/CrawlerController.java

@ -0,0 +1,71 @@
package com.example.datacollect.controller;
import com.example.datacollect.command.AnalyzeCommand;
import com.example.datacollect.command.Command;
import com.example.datacollect.command.CrawlCommand;
import com.example.datacollect.command.ExitCommand;
import com.example.datacollect.command.ExportCommand;
import com.example.datacollect.command.HelpCommand;
import com.example.datacollect.command.ImportCommand;
import com.example.datacollect.command.ListCommand;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.repository.PersistenceManager;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.util.JsonExporter;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Map;
public class CrawlerController {
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class);
private final Map<String, Command> commands = new HashMap<>();/* 命令映射表 */
private final ConsoleView view;/* 控制台视图 */
private final ArticleRepository repository;/* 文章仓库 */
public CrawlerController(ConsoleView view, ArticleRepository repository,
StrategyFactory strategyFactory, PersistenceManager persistenceManager, JsonExporter jsonExporter) {
this.view = view;
this.repository = repository;
register(new HelpCommand(view));
register(new ListCommand(view));
register(new CrawlCommand(view, strategyFactory));
register(new ExitCommand(view, persistenceManager));
register(new AnalyzeCommand(view, strategyFactory));
register(new ExportCommand(view, persistenceManager, jsonExporter));
register(new ImportCommand(view, persistenceManager));
logger.info("CrawlerController initialized with {} commands", commands.size());
}
private void register(Command command) {/* 注册命令 */
commands.put(command.getName(), command);/* 将命令添加到映射表 */
logger.debug("Registered command: {}", command.getName());/* 记录注册的命令 */
}
public void handle(String input) {/* 处理用户输入 */
String text = input == null ? "" : input.trim();/* 处理空输入 */
if (text.isEmpty()) {
return;
}
String[] args = text.split("\\s+");/* 解析命令行参数 */
String cmdName = args[0].toLowerCase();/* 提取命令名称并转换为小写 */
logger.debug("Processing command: {}", cmdName);
Command command = commands.get(cmdName);/* 获取命令对象 */
if (command == null) {
view.printError("Unknown command: " + cmdName);
logger.warn("Unknown command attempted: {}", cmdName);
return;
}
try {
command.execute(args, repository);/* 执行命令 */
} catch (Exception e) {
view.printError("Command execution failed: " + e.getMessage());
logger.error("Error executing command {}: {}", cmdName, e.getMessage(), e);
}
}
}

10
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/CrawlerException.java

@ -0,0 +1,10 @@
package com.example.datacollect.exception;
public class CrawlerException extends Exception {
public CrawlerException(String message) {
super(message);
}
public CrawlerException(String message, Throwable cause) {
super(message, cause);
}
}

56
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/DuplicateArticleException.java

@ -0,0 +1,56 @@
package com.example.datacollect.exception;
public class DuplicateArticleException extends Exception {
private final String duplicateUrl;
private final Integer existingIndex;
public DuplicateArticleException(String message) {
super(message);
this.duplicateUrl = null;
this.existingIndex = null;
}
public DuplicateArticleException(String message, String duplicateUrl) {
super(message);
this.duplicateUrl = duplicateUrl;
this.existingIndex = null;
}
public DuplicateArticleException(String message, String duplicateUrl, Integer existingIndex) {
super(message);
this.duplicateUrl = duplicateUrl;
this.existingIndex = existingIndex;
}
public DuplicateArticleException(String message, String duplicateUrl, Throwable cause) {
super(message, cause);
this.duplicateUrl = duplicateUrl;
this.existingIndex = null;
}
public DuplicateArticleException(String message, String duplicateUrl, Integer existingIndex, Throwable cause) {
super(message, cause);
this.duplicateUrl = duplicateUrl;
this.existingIndex = existingIndex;
}
public String getDuplicateUrl() {
return duplicateUrl;
}
public Integer getExistingIndex() {
return existingIndex;
}
@Override
public String getMessage() {
StringBuilder sb = new StringBuilder(super.getMessage());
if (duplicateUrl != null) {
sb.append(" [重复URL: ").append(duplicateUrl).append("]");
}
if (existingIndex != null) {
sb.append(" [已存在位置: ").append(existingIndex).append("]");
}
return sb.toString();
}
}

63
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ExportException.java

@ -0,0 +1,63 @@
package com.example.datacollect.exception;
public class ExportException extends Exception {
private final String filePath;
private final Long estimatedSize;
public ExportException(String message) {
super(message);
this.filePath = null;
this.estimatedSize = null;
}
public ExportException(String message, String filePath) {
super(message);
this.filePath = filePath;
this.estimatedSize = null;
}
public ExportException(String message, String filePath, Long estimatedSize) {
super(message);
this.filePath = filePath;
this.estimatedSize = estimatedSize;
}
public ExportException(String message, Throwable cause) {
super(message, cause);
this.filePath = null;
this.estimatedSize = null;
}
public ExportException(String message, String filePath, Throwable cause) {
super(message, cause);
this.filePath = filePath;
this.estimatedSize = null;
}
public String getFilePath() {
return filePath;
}
public Long getEstimatedSize() {
return estimatedSize;
}
@Override
public String getMessage() {
StringBuilder sb = new StringBuilder(super.getMessage());
if (filePath != null) {
sb.append(" [文件: ").append(filePath).append("]");
}
if (estimatedSize != null) {
sb.append(" [预估大小: ").append(formatSize(estimatedSize)).append("]");
}
return sb.toString();
}
private static String formatSize(long size) {
if (size < 1024) return size + " B";
if (size < 1024 * 1024) return String.format("%.2f KB", size / 1024.0);
if (size < 1024 * 1024 * 1024) return String.format("%.2f MB", size / (1024.0 * 1024));
return String.format("%.2f GB", size / (1024.0 * 1024 * 1024));
}
}

56
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ImportException.java

@ -0,0 +1,56 @@
package com.example.datacollect.exception;
public class ImportException extends Exception {
private final String filePath;
private final Integer lineNumber;
public ImportException(String message) {
super(message);
this.filePath = null;
this.lineNumber = null;
}
public ImportException(String message, String filePath) {
super(message);
this.filePath = filePath;
this.lineNumber = null;
}
public ImportException(String message, String filePath, Integer lineNumber) {
super(message);
this.filePath = filePath;
this.lineNumber = lineNumber;
}
public ImportException(String message, Throwable cause) {
super(message, cause);
this.filePath = null;
this.lineNumber = null;
}
public ImportException(String message, String filePath, Throwable cause) {
super(message, cause);
this.filePath = filePath;
this.lineNumber = null;
}
public String getFilePath() {
return filePath;
}
public Integer getLineNumber() {
return lineNumber;
}
@Override
public String getMessage() {
StringBuilder sb = new StringBuilder(super.getMessage());
if (filePath != null) {
sb.append(" [文件: ").append(filePath).append("]");
}
if (lineNumber != null) {
sb.append(" [行号: ").append(lineNumber).append("]");
}
return sb.toString();
}
}

10
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/NetworkException.java

@ -0,0 +1,10 @@
package com.example.datacollect.exception;
public class NetworkException extends CrawlerException {
public NetworkException(String message) {
super(message);
}
public NetworkException(String message, Throwable cause) {
super(message, cause);
}
}

10
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ParseException.java

@ -0,0 +1,10 @@
package com.example.datacollect.exception;
public class ParseException extends CrawlerException {
public ParseException(String message) {
super(message);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
}

30
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/UrlFormatException.java

@ -0,0 +1,30 @@
package com.example.datacollect.exception;
public class UrlFormatException extends RuntimeException {
private final String invalidUrl;
public UrlFormatException(String message) {
super(message);
this.invalidUrl = null;
}
public UrlFormatException(String message, String invalidUrl) {
super(message);
this.invalidUrl = invalidUrl;
}
public UrlFormatException(String message, Throwable cause) {
super(message, cause);
this.invalidUrl = null;
}
public UrlFormatException(String message, String invalidUrl, Throwable cause) {
super(message, cause);
this.invalidUrl = invalidUrl;
}
public String getInvalidUrl() {
return invalidUrl;
}
}

72
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ValidationException.java

@ -0,0 +1,72 @@
package com.example.datacollect.exception;
public class ValidationException extends Exception {
private final String fieldName;
private final String invalidValue;
private final String validationRule;
public ValidationException(String message) {
super(message);
this.fieldName = null;
this.invalidValue = null;
this.validationRule = null;
}
public ValidationException(String message, String fieldName) {
super(message);
this.fieldName = fieldName;
this.invalidValue = null;
this.validationRule = null;
}
public ValidationException(String message, String fieldName, String invalidValue) {
super(message);
this.fieldName = fieldName;
this.invalidValue = invalidValue;
this.validationRule = null;
}
public ValidationException(String message, String fieldName, String invalidValue, String validationRule) {
super(message);
this.fieldName = fieldName;
this.invalidValue = invalidValue;
this.validationRule = validationRule;
}
public ValidationException(String message, Throwable cause) {
super(message, cause);
this.fieldName = null;
this.invalidValue = null;
this.validationRule = null;
}
public String getFieldName() {
return fieldName;
}
public String getInvalidValue() {
return invalidValue;
}
public String getValidationRule() {
return validationRule;
}
@Override
public String getMessage() {
StringBuilder sb = new StringBuilder(super.getMessage());
if (fieldName != null) {
sb.append(" [字段: ").append(fieldName).append("]");
}
if (invalidValue != null) {
String displayValue = invalidValue.length() > 50
? invalidValue.substring(0, 50) + "..."
: invalidValue;
sb.append(" [值: ").append(displayValue).append("]");
}
if (validationRule != null) {
sb.append(" [规则: ").append(validationRule).append("]");
}
return sb.toString();
}
}

99
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/model/Article.java

@ -0,0 +1,99 @@
package com.example.datacollect.model;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.time.LocalDateTime;
public class Article {
private String title;
private String url;
private String content;
private LocalDateTime crawledAt;
public Article() {
this.crawledAt = LocalDateTime.now();
}
public Article(String title, String url, String content) {
setTitle(title);
setUrl(url);
setContent(content);
this.crawledAt = LocalDateTime.now();
}
@JsonCreator
public Article(@JsonProperty("title") String title,
@JsonProperty("url") String url,
@JsonProperty("content") String content,
@JsonProperty("crawledAt") LocalDateTime crawledAt) {
setTitle(title);
setUrl(url);
setContent(content);
this.crawledAt = crawledAt != null ? crawledAt : LocalDateTime.now();
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
if (title == null) {
throw new IllegalArgumentException("Title cannot be null");
}
if (title.trim().isEmpty()) {
throw new IllegalArgumentException("Title cannot be empty");
}
if (title.length() > 500) {
throw new IllegalArgumentException("Title cannot exceed 500 characters");
}
this.title = title.trim();
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
if (url == null) {
throw new IllegalArgumentException("URL cannot be null");
}
if (url.trim().isEmpty()) {
throw new IllegalArgumentException("URL cannot be empty");
}
if (!url.startsWith("http://") && !url.startsWith("https://")) {
throw new IllegalArgumentException("URL must start with http:// or https://");
}
this.url = url.trim();
}
public String getContent() {
return content;
}
public void setContent(String content) {
if (content == null) {
this.content = "";
} else if (content.length() > 10000) {
this.content = content.substring(0, 10000);/* 截断内容到 10000 个字符 */
} else {
this.content = content;
}
}
public LocalDateTime getCrawledAt() {
return crawledAt;
}
public void setCrawledAt(LocalDateTime crawledAt) {
this.crawledAt = crawledAt;
}
@Override
public String toString() {
return "Article{"
+ "title='" + title + '\''
+ ", url='" + url + '\''
+ ", crawledAt='" + crawledAt + '\''
+ '}';
}
}

172
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/ArticleRepository.java

@ -0,0 +1,172 @@
package com.example.datacollect.repository;
import com.example.datacollect.model.Article;
import com.example.datacollect.util.JsonSerializer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class ArticleRepository implements AutoCloseable {
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
private static final int MAX_TITLE_LENGTH = 500;
private static final int MAX_CONTENT_LENGTH = 10000;
private final List<Article> articles = new ArrayList<>();
private final Set<String> urlSet = new HashSet<>();
public void add(Article article) {
if (article == null) {
logger.error("Attempted to add null article");
throw new IllegalArgumentException("Article cannot be null");
}
String title = article.getTitle();
String url = article.getUrl();
String content = article.getContent();
if (title == null || title.trim().isEmpty()) {
logger.warn("Attempted to add article with empty title");
throw new IllegalArgumentException("Article title cannot be null or empty");
}
if (url == null || url.trim().isEmpty()) {
logger.warn("Attempted to add article with empty URL");
throw new IllegalArgumentException("Article URL cannot be null or empty");
}
if (title.length() > MAX_TITLE_LENGTH) {
logger.warn("Article title too long: {} characters (max: {})", title.length(), MAX_TITLE_LENGTH);
throw new IllegalArgumentException("Article title exceeds maximum length of " + MAX_TITLE_LENGTH);
}
if (content != null && content.length() > MAX_CONTENT_LENGTH) {
logger.warn("Article content too long: {} characters (max: {})", content.length(), MAX_CONTENT_LENGTH);
content = content.substring(0, MAX_CONTENT_LENGTH);
}
if (!url.startsWith("http://") && !url.startsWith("https://")) {
logger.warn("Invalid URL format: {}", url);
throw new IllegalArgumentException("Article URL must start with http:// or https://");
}
if (urlSet.contains(url)) {
logger.warn("Duplicate article URL detected: {}", url);
return;
}
Article validatedArticle = new Article(title.trim(), url.trim(), content != null ? content.trim() : "");
articles.add(validatedArticle);
urlSet.add(url);
logger.debug("Added article: {}", title);
}
public void addAll(List<Article> articleList) {
if (articleList == null) {
logger.error("Attempted to add null article list");
throw new IllegalArgumentException("Article list cannot be null");
}
int successCount = 0;
int skipCount = 0;
for (Article article : articleList) {
if (article != null) {
try {
add(article);
successCount++;
} catch (IllegalArgumentException e) {
logger.warn("Skipped invalid article: {}", e.getMessage());
skipCount++;
}
} else {
logger.warn("Skipped null article in list");
skipCount++;
}
}
logger.info("Added {} articles, skipped {} invalid articles", successCount, skipCount);
}
public List<Article> getAll() {
logger.debug("Retrieving all articles, total: {}", articles.size());
return Collections.unmodifiableList(articles);
}
public int size() {
return articles.size();
}
public void clear() {
int count = articles.size();
articles.clear();
urlSet.clear();
logger.info("Cleared repository, removed {} articles", count);
}
public void remove(Article article) {
if (article == null) {
logger.warn("Attempted to remove null article");
return;
}
String url = article.getUrl();
if (url != null && urlSet.contains(url)) {
articles.remove(article);
urlSet.remove(url);
logger.debug("Removed article: {}", article.getTitle());
} else {
logger.warn("Article not found in repository: {}", url);
}
}
public Article findByUrl(String url) {
if (url == null || url.trim().isEmpty()) {
logger.debug("findByUrl called with null or empty URL");
return null;
}
for (Article article : articles) {
if (article.getUrl().equals(url)) {
logger.debug("Found article by URL: {}", url);
return article;
}
}
logger.debug("No article found with URL: {}", url);
return null;
}
public boolean containsUrl(String url) {
return url != null && urlSet.contains(url);
}
public void saveToJson(String filePath) throws IOException {
JsonSerializer.writeToFile(articles, filePath);
logger.info("Saved {} articles to JSON file: {}", articles.size(), filePath);
}
public void loadFromJson(String filePath) throws IOException {
List<Article> loadedArticles = JsonSerializer.readListFromFile(filePath, Article.class);
addAll(loadedArticles);
logger.info("Loaded {} articles from JSON file: {}", loadedArticles.size(), filePath);
}
public String toJsonString() {
return JsonSerializer.serialize(articles);
}
public String toJsonStringCompact() {
return JsonSerializer.serializeCompact(articles);
}
@Override
public void close() {
logger.debug("ArticleRepository closed");
}
}

182
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/PersistenceManager.java

@ -0,0 +1,182 @@
package com.example.datacollect.repository;
import com.example.datacollect.exception.ExportException;
import com.example.datacollect.exception.ImportException;
import com.example.datacollect.model.Article;
import com.example.datacollect.util.JsonExporter;
import com.example.datacollect.util.JsonImporter;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
public class PersistenceManager implements AutoCloseable {
private static final Logger logger = LoggerFactory.getLogger(PersistenceManager.class);
private static final String DEFAULT_BACKUP_DIR = "data";
private static final String DEFAULT_BACKUP_FILE = "articles.json";
private static final String BACKUP_FILE_PATTERN = "articles_%s.json";
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss");
private final ObjectMapper objectMapper;
private final Path backupDir;
private final Path backupFile;
private final ArticleRepository repository;
private final AtomicBoolean autoSaveEnabled;
private final JsonExporter jsonExporter;
private final JsonImporter jsonImporter;
public PersistenceManager(ArticleRepository repository) {
this(repository, DEFAULT_BACKUP_DIR);
}
public PersistenceManager(ArticleRepository repository, String backupDir) {
this.repository = repository;
this.backupDir = Paths.get(backupDir);
this.backupFile = this.backupDir.resolve(DEFAULT_BACKUP_FILE);
this.autoSaveEnabled = new AtomicBoolean(true);
this.objectMapper = new ObjectMapper();
this.objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
this.jsonExporter = new JsonExporter(repository);
this.jsonImporter = new JsonImporter(repository);
ensureBackupDirExists();
logger.info("PersistenceManager initialized with backup directory: {}", backupDir);
}
private void ensureBackupDirExists() {
try {
if (!Files.exists(backupDir)) {
Files.createDirectories(backupDir);
logger.debug("Created backup directory: {}", backupDir);
}
} catch (IOException e) {
logger.error("Failed to create backup directory: {}", e.getMessage(), e);
throw new RuntimeException("Failed to create backup directory", e);
}
}
public void save() throws IOException {
if (!autoSaveEnabled.get()) {
logger.debug("Auto-save is disabled, skipping save");
return;
}
List<Article> articles = repository.getAll();
try (BufferedWriter writer = Files.newBufferedWriter(backupFile, StandardCharsets.UTF_8)) {
objectMapper.writeValue(writer, articles);
logger.info("Successfully saved {} articles to {}", articles.size(), backupFile);
}
}
public void load() throws IOException {
if (!Files.exists(backupFile)) {
logger.info("No backup file found at {}, starting fresh", backupFile);
return;
}
try (var reader = Files.newBufferedReader(backupFile, StandardCharsets.UTF_8)) {
List<Article> articles = objectMapper.readValue(reader,
objectMapper.getTypeFactory().constructCollectionType(List.class, Article.class));
if (articles != null && !articles.isEmpty()) {
repository.addAll(articles);
logger.info("Successfully loaded {} articles from {}", articles.size(), backupFile);
}
}
}
public void exportTo(String filePath) throws IOException {
try {
JsonExporter.ExportOptions options = new JsonExporter.ExportOptions();
options.setMode(JsonExporter.ExportMode.MINIMAL);
options.setIncludeMetadata(true);
jsonExporter.exportToFile(Paths.get(filePath), options);
} catch (ExportException e) {
throw new IOException("Export failed: " + e.getMessage(), e);
}
}
public void importFrom(String filePath) throws IOException {
try {
JsonImporter.ImportOptions options = new JsonImporter.ImportOptions();
options.setDuplicateStrategy(JsonImporter.DuplicateStrategy.SKIP);
jsonImporter.importFromFile(Paths.get(filePath), options);
} catch (ImportException e) {
throw new IOException("Import failed: " + e.getMessage(), e);
}
}
public void createSnapshot() throws IOException {
String timestamp = LocalDateTime.now().format(DATE_FORMATTER);
Path snapshotFile = backupDir.resolve(String.format(BACKUP_FILE_PATTERN, timestamp));
try {
JsonExporter.ExportOptions options = new JsonExporter.ExportOptions();
options.setMode(JsonExporter.ExportMode.STANDARD);
options.setIncludeMetadata(true);
jsonExporter.exportToFile(snapshotFile, options);
logger.info("Created snapshot: {} ({} articles)", snapshotFile, repository.size());
} catch (ExportException e) {
throw new IOException("Failed to create snapshot: " + e.getMessage(), e);
}
}
public List<String> listSnapshots() throws IOException {
List<String> snapshots = new ArrayList<>();
if (Files.exists(backupDir)) {
try (var stream = Files.list(backupDir)) {
stream.filter(path -> {
String fileName = path.getFileName().toString();
return fileName.startsWith("articles_") && fileName.endsWith(".json") && !fileName.equals(DEFAULT_BACKUP_FILE);
}).forEach(path -> snapshots.add(path.toString()));
}
}
return snapshots;
}
public void setAutoSaveEnabled(boolean enabled) {
autoSaveEnabled.set(enabled);
logger.info("Auto-save {} {}", enabled ? "enabled" : "disabled");
}
public boolean isAutoSaveEnabled() {
return autoSaveEnabled.get();
}
public String getBackupFilePath() {
return backupFile.toString();
}
public JsonImporter.ImportResult importWithReport(String filePath) throws ImportException {
JsonImporter.ImportOptions options = new JsonImporter.ImportOptions();
options.setDuplicateStrategy(JsonImporter.DuplicateStrategy.SKIP);
return jsonImporter.importFromFile(Paths.get(filePath), options);
}
@Override
public void close() {
try {
save();
logger.info("PersistenceManager closed, data saved");
} catch (IOException e) {
logger.error("Failed to save data on close: {}", e.getMessage(), e);
}
}
}

11
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java

@ -0,0 +1,11 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import java.util.List;
public interface CrawlStrategy {
List<Article> parse(String url, Document doc) throws ParseException;
boolean supports(String url);
}

115
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CsdnStrategy.java

@ -0,0 +1,115 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
public class CsdnStrategy implements CrawlStrategy {
private static final Logger logger = LoggerFactory.getLogger(CsdnStrategy.class);
@Override
public boolean supports(String url) {
return url.contains("csdn.net");
}
@Override
public List<Article> parse(String url, Document doc) throws ParseException {
logger.info("Starting to parse CSDN: {}", url);
List<Article> articles = new ArrayList<>();
Set<String> seenUrls = new HashSet<>();
try {
Elements links = doc.select("a[href*='/article/details/']");
logger.debug("Found {} article links", links.size());
if (links.isEmpty()) {
links = doc.select("a[href*='csdn.net/article/']");
logger.debug("Trying alternative selector, found {} items", links.size());
}
if (links.isEmpty()) {
links = doc.select("a.title, a.article-title, .article-item a, .list-item a");
logger.debug("Trying fallback selectors, found {} items", links.size());
}
for (Element link : links) {
try {
String href = link.attr("href");
if (href == null || href.isEmpty()) {
continue;
}
String articleUrl = link.attr("abs:href");
if (articleUrl == null || articleUrl.isEmpty()) {
if (!href.startsWith("http")) {
if (!href.startsWith("//")) {
articleUrl = "https://" + (href.startsWith("/") ? "" : "/") + href;
} else {
articleUrl = "https:" + href;
}
} else {
articleUrl = href;
}
}
if (!articleUrl.contains("csdn.net")) {
continue;
}
if (seenUrls.contains(articleUrl)) {
continue;
}
seenUrls.add(articleUrl);
String title = link.text().trim();
if (title.isEmpty() || title.length() < 5) {
Element titleEl = link.selectFirst("span, h3, h4, .title");
if (titleEl != null) {
title = titleEl.text().trim();
}
}
if (title.isEmpty() || title.length() < 5) {
continue;
}
String content = "";
Element parent = link.parent();
if (parent != null) {
Element desc = parent.selectFirst("p.description, .desc, .summary");
if (desc != null) {
content = desc.text().trim();
}
}
Article article = new Article(title, articleUrl, content);
articles.add(article);
logger.debug("Parsed article: {}", title);
} catch (Exception e) {
logger.debug("Skipping link due to error: {}", e.getMessage());
}
}
if (articles.isEmpty()) {
logger.warn("No articles found. CSDN page structure may have changed.");
}
logger.info("Successfully parsed {} articles from CSDN", articles.size());
return articles;
} catch (Exception e) {
logger.error("Failed to parse CSDN page: {}", e.getMessage(), e);
throw new ParseException("Failed to parse CSDN: " + e.getMessage(), e);
}
}
}

77
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java

@ -0,0 +1,77 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
/* HNU News 策略
- 添加 logger 成员
- 添加异常处理
- 实现防御性编程 */
public class HnuNewsStrategy implements CrawlStrategy {
private static final Logger logger = LoggerFactory.getLogger(HnuNewsStrategy.class);
@Override
public boolean supports(String url) {
return url.contains("news.hnu.edu.cn");/* 支持 HNU News 网站 */
}
@Override
public List<Article> parse(String url, Document doc) throws ParseException {
logger.info("Starting to parse HNU News: {}", url);
List<Article> articles = new ArrayList<>();/* 存储储解析后的文章 */
try {
Elements listItems = doc.select("ul.list11 li");/* 选择文章列表项 */
logger.debug("Found {} list items", listItems.size());/* 记录找到的列表项数量 */
for (Element li : listItems) {
try {
Element link = li.selectFirst("a");/* 选择列表项中的链接 */
if (link == null) {
logger.warn("No link found in list item");/* 记录未找到链接 */
continue;
}
String articleUrl = link.attr("href");/* 获取链接的 href 属性值 */
if (!articleUrl.startsWith("http")) {
articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", "");/* 补全相对路径 */
}
String title = "";/* 存储文章标题 */
Element titleEl = link.selectFirst("h4.l2.h4s2");/* 选择标题元素 */
if (titleEl != null) {
title = titleEl.text().trim();/* 提取标题文本并移除首尾空格 */
}
String content = "";/* 存储文章内容 */
Element contentEl = link.selectFirst("p.l3.ps3");/* 选择内容元素 */
if (contentEl != null) {
content = contentEl.text().trim();/* 提取内容文本并移除首尾空格 */
}
if (!title.isEmpty()) {
Article article = new Article(title, articleUrl, content);/* 创建文章对象 */
articles.add(article);/* 将文章添加到列表 */
} else {
logger.warn("Empty title found, skipping article");
}
} catch (Exception e) {
logger.error("Error parsing individual article: {}", e.getMessage());
}
}
logger.info("Successfully parsed {} articles from HNU News", articles.size());
return articles;
} catch (Exception e) {
logger.error("Failed to parse HNU News page: {}", e.getMessage(), e);
throw new ParseException("Failed to parse HNU News: " + e.getMessage(), e);
}
}
}

83
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java

@ -0,0 +1,83 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
/* 人民网策略类 */
public class PeopleStrategy implements CrawlStrategy {
private static final Logger logger = LoggerFactory.getLogger(PeopleStrategy.class);
@Override
public boolean supports(String url) {
return url.contains("people.com.cn");/* 检查URL是否包含people.com.cn */
}
@Override
public List<Article> parse(String url, Document doc) throws ParseException {
logger.info("Starting to parse People's Daily News: {}", url);
List<Article> articles = new ArrayList<>();/* 初始化文章列表 */
try {
Elements newsItems = doc.select("div.w1000, div.news-item, li.list_item");/* 选择新闻容器 */
logger.debug("Found {} news containers", newsItems.size());
if (newsItems.isEmpty()) {
newsItems = doc.select("a[href*='/n1/']");/* 选择替代选择器 */
logger.debug("Trying alternative selector, found {} items", newsItems.size());
}
for (Element item : newsItems) {
try {
Element link = item.selectFirst("a");/* 选择链接元素 */
if (link == null) {
link = item.tagName().equals("a") ? item : null;/* 检查是否为链接元素 */
}
if (link == null) {
logger.warn("No link found in news item");
continue;
}
String articleUrl = link.attr("href");/* 获取链接URL */
if (!articleUrl.startsWith("http")) {/* 检查是否为绝对URL */
if (articleUrl.startsWith("/")) {
articleUrl = "https://www.people.com.cn" + articleUrl;
} else {
articleUrl = "https://www.people.com.cn/" + articleUrl;
}
}
String title = link.text().trim();/* 获取标题文本 */
String content = "";/* 初始化内容文本 */
Element contentEl = item.selectFirst("p, div.ed, div.summary");/* 选择内容元素 */
if (contentEl != null) {
content = contentEl.text().trim();/* 获取内容文本 */
}
if (!title.isEmpty() && title.length() > 5) {
Article article = new Article(title, articleUrl, content);/* 创建文章对象 */
articles.add(article);/* 添加文章到列表 */
logger.debug("Parsed article: {}", title);/* 记录解析文章 */
} else {
logger.warn("Invalid title found, skipping article");/* 记录无效标题 */
}
} catch (Exception e) {
logger.error("Error parsing individual article: {}", e.getMessage());
}
}
logger.info("Successfully parsed {} articles from People's Daily News", articles.size());
return articles;
} catch (Exception e) {
logger.error("Failed to parse People's Daily News page: {}", e.getMessage(), e);
throw new ParseException("Failed to parse People's Daily News: " + e.getMessage(), e);
}
}
}

35
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/StrategyFactory.java

@ -0,0 +1,35 @@
package com.example.datacollect.strategy;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class StrategyFactory {
private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class);
private final List<CrawlStrategy> strategies = new ArrayList<>();
public StrategyFactory() {
strategies.add(new HnuNewsStrategy());
strategies.add(new YouthStrategy());
strategies.add(new PeopleStrategy());
strategies.add(new CsdnStrategy());
logger.info("Initialized StrategyFactory with {} strategies", strategies.size());
}
public CrawlStrategy getStrategy(String url) {
for (CrawlStrategy s : strategies) {
if (s.supports(url)) {
logger.debug("Found strategy {} for URL: {}", s.getClass().getSimpleName(), url);
return s;
}
}
logger.warn("No strategy found for URL: {}", url);
return null;
}
public void register(CrawlStrategy strategy) {
strategies.add(strategy);
logger.info("Registered new strategy: {}", strategy.getClass().getSimpleName());
}
}

112
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/YouthStrategy.java

@ -0,0 +1,112 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
/* 青年网新闻解析策略*/
public class YouthStrategy implements CrawlStrategy {
private static final Logger logger = LoggerFactory.getLogger(YouthStrategy.class);
@Override
public boolean supports(String url) {
return url.contains("youth.cn");/* 检查URL是否包含青年网域名 */
}
@Override
public List<Article> parse(String url, Document doc) throws ParseException {
logger.info("Starting to parse Youth News: {}", url);
List<Article> articles = new ArrayList<>();
try {
Elements newsItems = doc.select("div.news-item, div.article-item, li.news-list-item, div.list-item, ul.list li, .news-list li");
logger.debug("Found {} news items with primary selectors", newsItems.size());
if (newsItems.isEmpty()) {
newsItems = doc.select("a[href*='/n1/'], a[href*='/gn/'], a[href*='/qy/'], a[href*='/jj/']");
logger.debug("Trying alternative selector (news category links), found {} items", newsItems.size());
}
if (newsItems.isEmpty()) {
newsItems = doc.select("a[href$='.html']");
logger.debug("Trying fallback selector (html links), found {} items", newsItems.size());
}
for (Element item : newsItems) {
try {
Element link = item.selectFirst("a");
if (link == null) {
link = item.tagName().equals("a") ? item : null;
}
if (link == null) {
logger.debug("No link found in item, skipping");
continue;
}
String articleUrl = link.attr("href");
if (!articleUrl.startsWith("http")) {
if (articleUrl.startsWith("/")) {
articleUrl = "https://www.youth.cn" + articleUrl;
} else {
articleUrl = "https://www.youth.cn/" + articleUrl;
}
}
String title = link.text().trim();
if (title.isEmpty()) {
Element titleEl = link.selectFirst("span, h3, h4, .title");
if (titleEl != null) {
title = titleEl.text().trim();
}
}
if (title.isEmpty()) {
Element parentTitle = item.selectFirst("span, h3, h4, .title, .news-title");
if (parentTitle != null) {
title = parentTitle.text().trim();
}
}
if (title.isEmpty()) {
logger.debug("Empty title found, skipping");
continue;
}
String content = "";
Element contentEl = item.selectFirst("p.summary, p.desc, div.brief, .summary, .desc");
if (contentEl != null) {
content = contentEl.text().trim();
}
if (!title.isEmpty() && title.length() > 5) {
Article article = new Article(title, articleUrl, content);
articles.add(article);
logger.debug("Parsed article: {}", title);
} else {
logger.debug("Invalid title found (length: {}), skipping article", title.length());
}
} catch (Exception e) {
logger.debug("Error parsing individual article: {}", e.getMessage());
}
}
if (articles.isEmpty()) {
logger.warn("No articles found. Youth.cn page structure may have changed.");
}
logger.info("Successfully parsed {} articles from Youth News", articles.size());
return articles;
} catch (Exception e) {
logger.error("Failed to parse Youth News page: {}", e.getMessage(), e);
throw new ParseException("Failed to parse Youth News: " + e.getMessage(), e);
}
}
}

261
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonExporter.java

@ -0,0 +1,261 @@
package com.example.datacollect.util;
import com.example.datacollect.exception.ExportException;
import com.example.datacollect.model.Article;
import com.example.datacollect.repository.ArticleRepository;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.*;
import java.util.stream.Collectors;
public class JsonExporter {
private static final Logger logger = LoggerFactory.getLogger(JsonExporter.class);
private static final String VERSION = "1.0";
private static final DateTimeFormatter EXPORT_TIME_FORMAT = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
public enum ExportMode {
STANDARD,
COMPACT,
MINIMAL
}
public static class ExportOptions {
private ExportMode mode = ExportMode.STANDARD;
private String filterKeyword;
private LocalDateTime startDate;
private LocalDateTime endDate;
private boolean includeMetadata = true;
public ExportOptions() {}
public ExportMode getMode() {
return mode;
}
public void setMode(ExportMode mode) {
this.mode = mode;
}
public String getFilterKeyword() {
return filterKeyword;
}
public void setFilterKeyword(String filterKeyword) {
this.filterKeyword = filterKeyword;
}
public LocalDateTime getStartDate() {
return startDate;
}
public void setStartDate(LocalDateTime startDate) {
this.startDate = startDate;
}
public LocalDateTime getEndDate() {
return endDate;
}
public void setEndDate(LocalDateTime endDate) {
this.endDate = endDate;
}
public boolean isIncludeMetadata() {
return includeMetadata;
}
public void setIncludeMetadata(boolean includeMetadata) {
this.includeMetadata = includeMetadata;
}
}
public static class ExportMetadata {
private String exportTime;
private int totalCount;
private String source;
private String exportMode;
private String version;
public ExportMetadata() {}
public String getExportTime() {
return exportTime;
}
public void setExportTime(String exportTime) {
this.exportTime = exportTime;
}
public int getTotalCount() {
return totalCount;
}
public void setTotalCount(int totalCount) {
this.totalCount = totalCount;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public String getExportMode() {
return exportMode;
}
public void setExportMode(String exportMode) {
this.exportMode = exportMode;
}
public String getVersion() {
return version;
}
public void setVersion(String version) {
this.version = version;
}
}
private final ArticleRepository repository;
private final ObjectMapper objectMapper;
public JsonExporter(ArticleRepository repository) {
this.repository = repository;
this.objectMapper = new ObjectMapper();
}
public void exportToFile(Path targetPath) throws ExportException {
exportToFile(targetPath, new ExportOptions());
}
public void exportToFile(Path targetPath, ExportOptions options) throws ExportException {
logger.info("开始导出到文件: {}, 模式: {}", targetPath, options.getMode());
validateTargetPath(targetPath);
try {
List<Article> articles = getFilteredArticles(options);
logger.debug("过滤后待导出文章数: {}", articles.size());
String json = generateJson(articles, options);
try (BufferedWriter writer = Files.newBufferedWriter(targetPath, StandardCharsets.UTF_8)) {
writer.write(json);
}
logger.info("成功导出 {} 篇文章到: {}", articles.size(), targetPath);
} catch (IOException e) {
logger.error("导出文件失败: {}", e.getMessage(), e);
throw new ExportException("无法写入导出文件: " + e.getMessage(), targetPath.toString(), e);
}
}
public String exportToString() throws ExportException {
return exportToString(new ExportOptions());
}
public String exportToString(ExportOptions options) throws ExportException {
List<Article> articles = getFilteredArticles(options);
return generateJson(articles, options);
}
private List<Article> getFilteredArticles(ExportOptions options) {
List<Article> articles = repository.getAll();
if (options.getFilterKeyword() != null && !options.getFilterKeyword().trim().isEmpty()) {
String keyword = options.getFilterKeyword().toLowerCase();
articles = articles.stream()
.filter(a -> a.getTitle().toLowerCase().contains(keyword)
|| a.getContent().toLowerCase().contains(keyword))
.collect(Collectors.toList());
logger.debug("关键词过滤后剩余文章数: {}", articles.size());
}
return articles;
}
private String generateJson(List<Article> articles, ExportOptions options) throws ExportException {
try {
Map<String, Object> output = new LinkedHashMap<>();
if (options.isIncludeMetadata() && options.getMode() != ExportMode.MINIMAL) {
ExportMetadata metadata = new ExportMetadata();
metadata.setExportTime(LocalDateTime.now().format(EXPORT_TIME_FORMAT));
metadata.setTotalCount(articles.size());
metadata.setSource("CLI Crawler v" + VERSION);
metadata.setExportMode(options.getMode().name());
metadata.setVersion(VERSION);
output.put("metadata", metadata);
}
output.put("articles", articles);
if (options.getMode() == ExportMode.STANDARD) {
return objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(output);
} else {
return objectMapper.writeValueAsString(output);
}
} catch (Exception e) {
logger.error("生成JSON失败: {}", e.getMessage(), e);
throw new ExportException("无法生成JSON: " + e.getMessage(), e);
}
}
private void validateTargetPath(Path targetPath) throws ExportException {
if (targetPath == null) {
throw new ExportException("导出路径不能为空");
}
Path parent = targetPath.getParent();
if (parent != null && !Files.exists(parent)) {
try {
Files.createDirectories(parent);
logger.info("创建导出目录: {}", parent);
} catch (IOException e) {
throw new ExportException("无法创建导出目录: " + parent, e);
}
}
}
public List<Path> exportWithSnapshots(String baseDir) throws ExportException {
logger.info("开始批量导出快照到目录: {}", baseDir);
List<Path> exportedFiles = new ArrayList<>();
Path basePath = Path.of(baseDir);
try {
if (!Files.exists(basePath)) {
Files.createDirectories(basePath);
}
ExportOptions standardOptions = new ExportOptions();
standardOptions.setMode(ExportMode.STANDARD);
standardOptions.setIncludeMetadata(true);
String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
Path snapshotPath = basePath.resolve("snapshot_" + timestamp + ".json");
exportToFile(snapshotPath, standardOptions);
exportedFiles.add(snapshotPath);
logger.info("批量导出完成,共导出 {} 个文件", exportedFiles.size());
} catch (Exception e) {
logger.error("批量导出失败: {}", e.getMessage(), e);
throw new ExportException("批量导出失败: " + e.getMessage(), e);
}
return exportedFiles;
}
}

386
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonImporter.java

@ -0,0 +1,386 @@
package com.example.datacollect.util;
import com.example.datacollect.exception.DuplicateArticleException;
import com.example.datacollect.exception.ImportException;
import com.example.datacollect.exception.ValidationException;
import com.example.datacollect.model.Article;
import com.example.datacollect.repository.ArticleRepository;
import com.fasterxml.jackson.databind.ObjectMapper;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedReader;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.*;
import java.time.LocalDateTime;
import java.util.regex.Pattern;
public class JsonImporter {
private static final Logger logger = LoggerFactory.getLogger(JsonImporter.class);
private static final Pattern URL_PATTERN = Pattern.compile("^https?://.*");
private static final int MAX_TITLE_LENGTH = 500;
private static final int MAX_CONTENT_LENGTH = 10000;
public enum DuplicateStrategy {
SKIP,
OVERWRITE,
ERROR
}
public static class ImportOptions {
private DuplicateStrategy duplicateStrategy = DuplicateStrategy.SKIP;
private boolean validateUrl = true;
private boolean validateTitle = true;
private boolean skipInvalid = true;
private int maxContentLength = MAX_CONTENT_LENGTH;
public ImportOptions() {}
public DuplicateStrategy getDuplicateStrategy() {
return duplicateStrategy;
}
public void setDuplicateStrategy(DuplicateStrategy duplicateStrategy) {
this.duplicateStrategy = duplicateStrategy;
}
public boolean isValidateUrl() {
return validateUrl;
}
public void setValidateUrl(boolean validateUrl) {
this.validateUrl = validateUrl;
}
public boolean isValidateTitle() {
return validateTitle;
}
public void setValidateTitle(boolean validateTitle) {
this.validateTitle = validateTitle;
}
public boolean isSkipInvalid() {
return skipInvalid;
}
public void setSkipInvalid(boolean skipInvalid) {
this.skipInvalid = skipInvalid;
}
public int getMaxContentLength() {
return maxContentLength;
}
public void setMaxContentLength(int maxContentLength) {
this.maxContentLength = maxContentLength;
}
}
public static class ImportResult {
private int totalFound;
private int imported;
private int skipped;
private int invalid;
private int overwritten;
private List<String> errors;
private List<String> warnings;
public ImportResult() {
this.errors = new ArrayList<>();
this.warnings = new ArrayList<>();
}
public int getTotalFound() {
return totalFound;
}
public void setTotalFound(int totalFound) {
this.totalFound = totalFound;
}
public int getImported() {
return imported;
}
public void setImported(int imported) {
this.imported = imported;
}
public int getSkipped() {
return skipped;
}
public void setSkipped(int skipped) {
this.skipped = skipped;
}
public int getInvalid() {
return invalid;
}
public void setInvalid(int invalid) {
this.invalid = invalid;
}
public int getOverwritten() {
return overwritten;
}
public void setOverwritten(int overwritten) {
this.overwritten = overwritten;
}
public List<String> getErrors() {
return errors;
}
public void addError(String error) {
this.errors.add(error);
}
public List<String> getWarnings() {
return warnings;
}
public void addWarning(String warning) {
this.warnings.add(warning);
}
public String getSummary() {
return String.format(
"导入完成: 总共找到=%d, 成功导入=%d, 跳过=%d, 无效=%d, 覆盖=%d, 错误=%d",
totalFound, imported, skipped, invalid, overwritten, errors.size()
);
}
}
private final ArticleRepository repository;
private final ObjectMapper objectMapper;
public JsonImporter(ArticleRepository repository) {
this.repository = repository;
this.objectMapper = new ObjectMapper();
}
public ImportResult importFromFile(Path sourcePath) throws ImportException {
return importFromFile(sourcePath, new ImportOptions());
}
public ImportResult importFromFile(Path sourcePath, ImportOptions options) throws ImportException {
logger.info("开始从文件导入: {}", sourcePath);
validateSourcePath(sourcePath);
ImportResult result = new ImportResult();
try {
String content = readFileContent(sourcePath);
List<Article> articles = parseArticles(content, result);
result.setTotalFound(articles.size());
logger.debug("解析到 {} 篇文章", articles.size());
for (int i = 0; i < articles.size(); i++) {
Article article = articles.get(i);
try {
processArticle(article, options, result, i);
} catch (ValidationException e) {
logger.warn("文章验证失败 [位置 {}]: {}", i, e.getMessage());
result.addError("无效文章 at index " + i + ": " + e.getMessage());
result.setInvalid(result.getInvalid() + 1);
if (!options.isSkipInvalid()) {
throw new ImportException("文章验证失败: " + e.getMessage(), sourcePath.toString(), i);
}
} catch (DuplicateArticleException e) {
logger.warn("重复文章 [位置 {}]: {}", i, e.getMessage());
result.setSkipped(result.getSkipped() + 1);
}
}
logger.info("导入完成: {}", result.getSummary());
} catch (IOException e) {
logger.error("读取文件失败: {}", e.getMessage(), e);
throw new ImportException("无法读取导入文件: " + e.getMessage(), sourcePath.toString(), e);
} catch (ImportException e) {
throw e;
} catch (Exception e) {
logger.error("导入过程出错: {}", e.getMessage(), e);
throw new ImportException("导入失败: " + e.getMessage(), sourcePath.toString(), e);
}
return result;
}
public List<Article> parseArticles(String json) throws ImportException {
ImportResult result = new ImportResult();
return parseArticles(json, result);
}
private List<Article> parseArticles(String json, ImportResult result) throws ImportException {
try {
Map<String, Object> data = objectMapper.readValue(json, Map.class);
List<?> articlesList = null;
if (data.containsKey("articles")) {
articlesList = (List<?>) data.get("articles");
} else if (data.containsKey("data")) {
articlesList = (List<?>) data.get("data");
} else if (data instanceof List) {
articlesList = (List<?>) data;
}
if (articlesList == null) {
throw new ImportException("JSON格式错误:未找到 'articles' 或 'data' 字段");
}
List<Article> articles = new ArrayList<>();
for (int i = 0; i < articlesList.size(); i++) {
try {
Object item = articlesList.get(i);
if (item instanceof Map) {
Article article = mapToArticle((Map<?, ?>) item, i);
articles.add(article);
}
} catch (Exception e) {
logger.warn("解析第 {} 篇文章失败: {}", i, e.getMessage());
result.addError("解析失败 at index " + i + ": " + e.getMessage());
}
}
return articles;
} catch (ImportException e) {
throw e;
} catch (Exception e) {
logger.error("JSON解析失败: {}", e.getMessage(), e);
throw new ImportException("JSON解析失败: " + e.getMessage(), e);
}
}
@SuppressWarnings("unchecked")
private Article mapToArticle(Map<?, ?> map, int index) throws ValidationException {
String title = (String) map.get("title");
String url = (String) map.get("url");
String content = (String) map.get("content");
Object crawledAtObj = map.get("crawledAt");
LocalDateTime crawledAt = null;
if (crawledAtObj != null) {
try {
if (crawledAtObj instanceof String) {
crawledAt = LocalDateTime.parse((String) crawledAtObj);
}
} catch (Exception e) {
logger.warn("无法解析 crawledAt 字段: {}, 使用默认值", crawledAtObj);
}
}
if (title == null || title.trim().isEmpty()) {
throw new ValidationException("标题不能为空", "title", null, "非空字符串");
}
if (url == null || url.trim().isEmpty()) {
throw new ValidationException("URL不能为空", "url", null, "非空字符串");
}
if (content == null) {
content = "";
}
return new Article(title.trim(), url.trim(), content.trim(), crawledAt);
}
private void processArticle(Article article, ImportOptions options, ImportResult result, int index)
throws ValidationException, DuplicateArticleException {
if (options.isValidateTitle() && article.getTitle().length() > MAX_TITLE_LENGTH) {
throw new ValidationException(
"标题过长: 最大" + MAX_TITLE_LENGTH + "字符",
"title",
article.getTitle(),
"长度 <= " + MAX_TITLE_LENGTH
);
}
if (options.isValidateUrl() && !URL_PATTERN.matcher(article.getUrl()).matches()) {
throw new ValidationException(
"URL格式无效: " + article.getUrl(),
"url",
article.getUrl(),
"必须以 http:// 或 https:// 开头"
);
}
Article existing = repository.findByUrl(article.getUrl());
if (existing != null) {
switch (options.getDuplicateStrategy()) {
case SKIP:
logger.debug("跳过重复文章: {}", article.getUrl());
throw new DuplicateArticleException("文章URL已存在: " + article.getUrl(), article.getUrl());
case OVERWRITE:
logger.debug("覆盖重复文章: {}", article.getUrl());
repository.remove(existing);
repository.add(article);
result.setOverwritten(result.getOverwritten() + 1);
result.setImported(result.getImported() + 1);
return;
case ERROR:
throw new DuplicateArticleException(
"发现重复URL: " + article.getUrl(),
article.getUrl(),
repository.getAll().indexOf(existing)
);
}
}
String content = article.getContent();
if (content.length() > options.getMaxContentLength()) {
content = content.substring(0, options.getMaxContentLength());
logger.debug("文章内容已截断到 {} 字符: {}", options.getMaxContentLength(), article.getTitle());
}
repository.add(article);
result.setImported(result.getImported() + 1);
logger.debug("成功导入文章: {}", article.getTitle());
}
private String readFileContent(Path sourcePath) throws IOException {
StringBuilder content = new StringBuilder();
try (BufferedReader reader = Files.newBufferedReader(sourcePath, StandardCharsets.UTF_8)) {
String line;
while ((line = reader.readLine()) != null) {
content.append(line).append("\n");
}
}
return content.toString();
}
private void validateSourcePath(Path sourcePath) throws ImportException {
if (sourcePath == null) {
throw new ImportException("导入路径不能为空");
}
if (!Files.exists(sourcePath)) {
throw new ImportException("导入文件不存在: " + sourcePath, sourcePath.toString());
}
if (!Files.isReadable(sourcePath)) {
throw new ImportException("文件不可读: " + sourcePath, sourcePath.toString());
}
try {
long size = Files.size(sourcePath);
if (size > 100 * 1024 * 1024) {
logger.warn("导入文件较大 ({} MB),处理可能较慢", size / (1024 * 1024));
}
} catch (IOException e) {
logger.warn("无法获取文件大小: {}", e.getMessage());
}
}
}

81
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonSerializer.java

@ -0,0 +1,81 @@
package com.example.datacollect.util;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.databind.SerializationFeature;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.List;
public class JsonSerializer {
private static final Logger logger = LoggerFactory.getLogger(JsonSerializer.class);
private static final ObjectMapper objectMapper = new ObjectMapper();
static {
objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
}
private JsonSerializer() {
}
public static <T> String serialize(T obj) {
try {
return objectMapper.writeValueAsString(obj);
} catch (Exception e) {
logger.error("Failed to serialize object", e);
throw new RuntimeException("Failed to serialize object", e);
}
}
public static <T> String serializeCompact(T obj) {
try {
ObjectMapper compactMapper = new ObjectMapper();
return compactMapper.writeValueAsString(obj);
} catch (Exception e) {
logger.error("Failed to serialize object (compact)", e);
throw new RuntimeException("Failed to serialize object", e);
}
}
public static <T> T deserialize(String json, Class<T> clazz) {
try {
return objectMapper.readValue(json, clazz);
} catch (Exception e) {
logger.error("Failed to deserialize object", e);
throw new RuntimeException("Failed to deserialize object", e);
}
}
public static <T> List<T> deserializeList(String json, Class<T> clazz) {
try {
return objectMapper.readValue(json,
objectMapper.getTypeFactory().constructCollectionType(List.class, clazz));
} catch (Exception e) {
logger.error("Failed to deserialize list", e);
throw new RuntimeException("Failed to deserialize list", e);
}
}
public static <T> void writeToFile(T obj, String filePath) throws IOException {
File file = new File(filePath);
objectMapper.writeValue(file, obj);
logger.debug("Successfully wrote object to file: {}", filePath);
}
public static <T> T readFromFile(String filePath, Class<T> clazz) throws IOException {
File file = new File(filePath);
T obj = objectMapper.readValue(file, clazz);
logger.debug("Successfully read object from file: {}", filePath);
return obj;
}
public static <T> List<T> readListFromFile(String filePath, Class<T> clazz) throws IOException {
File file = new File(filePath);
List<T> list = objectMapper.readValue(file,
objectMapper.getTypeFactory().constructCollectionType(List.class, clazz));
logger.debug("Successfully read list from file: {}", filePath);
return list;
}
}

39
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/RetryUtils.java

@ -0,0 +1,39 @@
package com.example.datacollect.util;
import com.example.datacollect.exception.NetworkException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.Callable;
public class RetryUtils {
private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class);
private static final int MAX_RETRIES = 3;
private static final long BASE_DELAY_MS = 500;
public static <T> T executeWithRetry(Callable<T> task) throws NetworkException {
Exception lastException = null;
for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
try {
if (attempt > 0) {
long waitTime = BASE_DELAY_MS * (long) Math.pow(2, attempt - 1);
logger.info("重试 {}/{} 次,等待 {} ms", attempt, MAX_RETRIES, waitTime);
Thread.sleep(waitTime);
}
return task.call();
} catch (Exception e) {
lastException = e;
logger.warn("第 {} 次尝试失败: {}", attempt + 1, e.getMessage());
if (attempt < MAX_RETRIES) {
continue;
}
}
}
logger.error("所有 {} 次重试均失败", MAX_RETRIES + 1);
throw new NetworkException("网络错误,已重试三次", lastException);
}
}

52
project/java-cli-期末课程项目/src/main/java/com/example/datacollect/view/ConsoleView.java

@ -0,0 +1,52 @@
package com.example.datacollect.view;
import com.example.datacollect.model.Article;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Scanner;
public class ConsoleView implements AutoCloseable {
private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class);
private static final String ANSI_RESET = "\u001B[0m";
private static final String ANSI_GREEN = "\u001B[32m";
private static final String ANSI_RED = "\u001B[31m";
private static final String ANSI_BLUE = "\u001B[34m";
private final Scanner scanner = new Scanner(System.in);
@Override
public void close() {
scanner.close();/* 关闭扫描器,释放资源 */
logger.debug("ConsoleView closed");
}
public String readLine() {
System.out.print("> ");
String input = scanner.nextLine();
return input;/* 返回用户输入 */
}
public void printSuccess(String msg) {
System.out.println(ANSI_GREEN + msg + ANSI_RESET);
}
public void printError(String msg) {
System.out.println(ANSI_RED + msg + ANSI_RESET);
}
public void printInfo(String msg) {
System.out.println(ANSI_BLUE + msg + ANSI_RESET);
}
public void display(List<Article> articles) {
if (articles.isEmpty()) {
printInfo("暂无文章,请先执行 crawl。");
return;
}
for (int i = 0; i < articles.size(); i++) {
Article a = articles.get(i);/* 获取文章 */
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());/* 打印文章标题和URL */
}
}
}

25
project/java-cli-期末课程项目/src/main/resources/logback.xml

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/crawler.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>logs/crawler.%d{yyyy-MM-dd}.log</fileNamePattern>
<maxHistory>30</maxHistory>
</rollingPolicy>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<logger name="com.example.datacollect" level="DEBUG" />
<root level="INFO">
<appender-ref ref="CONSOLE" />
<appender-ref ref="FILE" />
</root>
</configuration>

25
project/java-cli-期末课程项目/target/classes/logback.xml

@ -0,0 +1,25 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>logs/crawler.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>logs/crawler.%d{yyyy-MM-dd}.log</fileNamePattern>
<maxHistory>30</maxHistory>
</rollingPolicy>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<logger name="com.example.datacollect" level="DEBUG" />
<root level="INFO">
<appender-ref ref="CONSOLE" />
<appender-ref ref="FILE" />
</root>
</configuration>

3
project/java-cli-期末课程项目/target/maven-archiver/pom.properties

@ -0,0 +1,3 @@
artifactId=datacollect-cli
groupId=com.example
version=0.1.0

0
project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst

32
project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst

@ -0,0 +1,32 @@
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\repository\PersistenceManager.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ExitCommand.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\CrawlCommand.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ExportException.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ExportCommand.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ImportCommand.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ImportException.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\DuplicateArticleException.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\CrawlerException.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\Command.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\model\Article.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\PeopleStrategy.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\NetworkException.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\controller\CrawlerController.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\StrategyFactory.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\JsonImporter.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\RetryUtils.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ListCommand.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\Main.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\UrlFormatException.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\JsonSerializer.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ParseException.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\YouthStrategy.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\CsdnStrategy.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\HelpCommand.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\repository\ArticleRepository.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ValidationException.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\view\ConsoleView.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\JsonExporter.java
C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\AnalyzeCommand.java

92
project/java-cli-期末课程项目/test_crawler.ps1

@ -0,0 +1,92 @@
$ErrorActionPreference = "Continue"
Write-Host "=== 测试 CLI 爬虫程序 ===" -ForegroundColor Cyan
# 测试1: 启动程序并显示帮助
Write-Host "`n1. 测试帮助命令..." -ForegroundColor Yellow
$helpOutput = echo "help" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
if ($LASTEXITCODE -ne 0) {
Write-Host "帮助命令执行失败" -ForegroundColor Red
Write-Host $helpOutput
} else {
Write-Host "帮助命令执行成功" -ForegroundColor Green
Write-Host $helpOutput | Select-Object -First 15
}
# 测试2: 测试 list 命令(空列表)
Write-Host "`n2. 测试 list 命令(空列表)..." -ForegroundColor Yellow
$listOutput = echo "list" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
if ($LASTEXITCODE -ne 0) {
Write-Host "list 命令执行失败" -ForegroundColor Red
Write-Host $listOutput
} else {
Write-Host "list 命令执行成功" -ForegroundColor Green
}
# 测试3: 测试 Juejin 策略
Write-Host "`n3. 测试 Juejin 策略..." -ForegroundColor Yellow
$juejinOutput = @("crawl https://juejin.cn/", "list", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
if ($LASTEXITCODE -ne 0) {
Write-Host "Juejin 策略测试失败" -ForegroundColor Red
Write-Host $juejinOutput | Select-Object -Last 10
} else {
$articleCount = ($juejinOutput | Select-String "Crawled" | ForEach-Object { $_.Line -replace "Crawled (\d+) articles\.", '$1' })
Write-Host "Juejin 策略测试成功 - 爬取到 $articleCount 篇文章" -ForegroundColor Green
}
# 测试4: 测试 HnuNews 策略
Write-Host "`n4. 测试 HnuNews 策略..." -ForegroundColor Yellow
$hnuOutput = @("crawl https://news.hnu.edu.cn/", "list", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
if ($LASTEXITCODE -ne 0) {
Write-Host "HnuNews 策略测试失败" -ForegroundColor Red
Write-Host $hnuOutput | Select-Object -Last 10
} else {
$articleCount = ($hnuOutput | Select-String "Crawled" | ForEach-Object { $_.Line -replace "Crawled (\d+) articles\.", '$1' })
Write-Host "HnuNews 策略测试成功 - 爬取到 $articleCount 篇文章" -ForegroundColor Green
}
# 测试5: 测试导出功能
Write-Host "`n5. 测试导出功能..." -ForegroundColor Yellow
$exportOutput = @("crawl https://juejin.cn/", "export test_export.json", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
if (-not (Test-Path "test_export.json")) {
Write-Host "导出功能测试失败" -ForegroundColor Red
Write-Host $exportOutput | Select-Object -Last 10
} else {
$fileSize = (Get-Item "test_export.json").Length
Write-Host "导出功能测试成功 - 文件大小: $fileSize 字节" -ForegroundColor Green
Remove-Item "test_export.json" -Force
}
# 测试6: 测试导入功能
Write-Host "`n6. 测试导入功能..." -ForegroundColor Yellow
@("crawl https://juejin.cn/", "export import_test.json", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 | Out-Null
$importOutput = @("import import_test.json", "list", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
if ($LASTEXITCODE -ne 0) {
Write-Host "导入功能测试失败" -ForegroundColor Red
Write-Host $importOutput | Select-Object -Last 10
} else {
Write-Host "导入功能测试成功" -ForegroundColor Green
Remove-Item "import_test.json" -Force
}
# 测试7: 测试未知命令
Write-Host "`n7. 测试未知命令处理..." -ForegroundColor Yellow
$unknownOutput = echo "unknown_command" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
if ($unknownOutput -match "Unknown command") {
Write-Host "未知命令处理测试成功" -ForegroundColor Green
} else {
Write-Host "未知命令处理测试失败" -ForegroundColor Red
}
# 测试8: 测试会话持久化(退出后重新启动)
Write-Host "`n8. 测试会话持久化..." -ForegroundColor Yellow
@("crawl https://juejin.cn/", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 | Out-Null
$restoreOutput = echo "list" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
if ($restoreOutput -match "Loaded") {
Write-Host "会话持久化测试成功" -ForegroundColor Green
} else {
Write-Host "会话持久化测试失败" -ForegroundColor Red
Write-Host $restoreOutput | Select-Object -Last 5
}
Write-Host "`n=== 测试完成 ===" -ForegroundColor Cyan

17
project/java-cli-期末课程项目/test_export.json

@ -0,0 +1,17 @@
[ {
"title" : "7月1日起施行 超龄劳动者迎来权益保障新规",
"url" : "http://society.people.com.cn/n1/2026/0525/c1008-40727022.html",
"content" : ""
}, {
"title" : "经港珠澳大桥出入境港澳单牌车总量突破1000万辆次",
"url" : "http://gba.people.cn/n1/2026/0525/c42272-40726946.html",
"content" : ""
}, {
"title" : "外交部谈美伊谈判",
"url" : "http://world.people.com.cn/n1/2026/0525/c1002-40726926.html",
"content" : ""
}, {
"title" : "重庆发布今年首个地质灾害红色预警",
"url" : "http://society.people.com.cn/n1/2026/0525/c1008-40726849.html",
"content" : ""
} ]

236
project/java-cli-期末课程项目/test_import_export.ps1

@ -0,0 +1,236 @@
# Test Script for CLI Crawler - Data Import/Export Features
# This script automates the test sequence
$ErrorActionPreference = "Stop"
$env:JAVA_HOME = "C:\Program Files\Java\latest\jdk-25"
$APP_JAR = "target\datacollect-cli-0.1.0-jar-with-dependencies.jar"
$TEST_EXPORT_FILE = "data\test_export.json"
$USERPROFILE_PATH = "$env:USERPROFILE\.datacollect"
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "CLI Crawler - Import/Export Test Suite" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host ""
# Clean up function
function Clean-Up {
Write-Host "[CLEANUP] Removing old data files..." -ForegroundColor Yellow
if (Test-Path $USERPROFILE_PATH) {
Remove-Item "$USERPROFILE_PATH\*" -Force -Recurse -ErrorAction SilentlyContinue
}
if (Test-Path $TEST_EXPORT_FILE) {
Remove-Item $TEST_EXPORT_FILE -Force -ErrorAction SilentlyContinue
}
}
# Run CLI command function
function Run-CLI {
param([string]$Commands)
$commandsArray = $Commands -split "`n"
foreach ($cmd in $commandsArray) {
$cmd = $cmd.Trim()
if ($cmd -ne "") {
Write-Host "[CLI] $cmd" -ForegroundColor Gray
$result = & java -jar $APP_JAR $cmd 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
}
}
}
# Step 1: Initial Cleanup
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 1: Initial Cleanup" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Clean-Up
Write-Host ""
# Step 2: Crawl some data
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 2: Crawl Data (CSDN)" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: crawl https://www.csdn.net/" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "crawl https://www.csdn.net/" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
Start-Sleep -Seconds 2
# Step 3: List articles
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 3: List Articles" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: list" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "list" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
Start-Sleep -Seconds 1
# Step 4: Export to JSON
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 4: Export to JSON" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: export data\test_export.json --format json" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "export data\test_export.json --format json" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
Start-Sleep -Seconds 1
# Step 5: Check JSON file
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 5: Check Exported JSON File" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
if (Test-Path $TEST_EXPORT_FILE) {
Write-Host "[SUCCESS] JSON file created: $TEST_EXPORT_FILE" -ForegroundColor Green
Write-Host ""
Write-Host "JSON File Content Preview (first 1500 chars):" -ForegroundColor Cyan
$content = Get-Content $TEST_EXPORT_FILE -Raw
if ($content.Length -gt 1500) {
Write-Host ($content.Substring(0, 1500) + "...") -ForegroundColor White
} else {
Write-Host $content -ForegroundColor White
}
# Check for crawledAt field
if ($content -match "crawledAt") {
Write-Host ""
Write-Host "[SUCCESS] crawledAt field found in JSON!" -ForegroundColor Green
} else {
Write-Host ""
Write-Host "[ERROR] crawledAt field NOT found in JSON!" -ForegroundColor Red
}
# Check for metadata
if ($content -match "metadata") {
Write-Host "[SUCCESS] metadata field found in JSON!" -ForegroundColor Green
} else {
Write-Host "[WARNING] metadata field NOT found in JSON!" -ForegroundColor Yellow
}
} else {
Write-Host "[ERROR] JSON file NOT created!" -ForegroundColor Red
}
Write-Host ""
# Step 6: Get article count before clear
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 6: Get Article Count Before Clear" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: list" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "list" 2>&1
Write-Host $result -ForegroundColor Green
# Count articles
$articleCount = 0
$lines = $result -split "`n"
foreach ($line in $lines) {
if ($line -match "Total: (\d+) articles") {
$articleCount = [int]$matches[1]
break
}
}
Write-Host ""
Write-Host "Current article count: $articleCount" -ForegroundColor Cyan
Write-Host ""
Start-Sleep -Seconds 1
# Step 7: Clear all data
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 7: Clear All Data" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: clear" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "clear" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
Start-Sleep -Seconds 1
# Step 8: Verify data is cleared
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 8: Verify Data Cleared" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: list" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "list" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
Start-Sleep -Seconds 1
# Step 9: Import data from JSON
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 9: Import Data from JSON" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: import data\test_export.json" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "import data\test_export.json" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
Start-Sleep -Seconds 1
# Step 10: Verify data restored
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 10: Verify Data Restored" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: list" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "list" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
# Count articles after import
$articleCountAfterImport = 0
$lines = $result -split "`n"
foreach ($line in $lines) {
if ($line -match "Total: (\d+) articles") {
$articleCountAfterImport = [int]$matches[1]
break
}
}
if ($articleCountAfterImport -eq $articleCount) {
Write-Host "[SUCCESS] Data restored successfully! Article count matches: $articleCountAfterImport" -ForegroundColor Green
} else {
Write-Host "[WARNING] Article count mismatch. Before: $articleCount, After: $articleCountAfterImport" -ForegroundColor Yellow
}
Write-Host ""
# Step 11: Test duplicate import (should not duplicate)
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 11: Test Duplicate Import (No Duplication)" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: import data\test_export.json (second time)" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "import data\test_export.json" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
Start-Sleep -Seconds 1
# Step 12: Final article count
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "STEP 12: Final Article Count" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Command: list" -ForegroundColor Yellow
$result = & java -jar $APP_JAR "list" 2>&1
Write-Host $result -ForegroundColor Green
Write-Host ""
# Final count
$finalCount = 0
$lines = $result -split "`n"
foreach ($line in $lines) {
if ($line -match "Total: (\d+) articles") {
$finalCount = [int]$matches[1]
break
}
}
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "TEST SUMMARY" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "Articles after first import: $articleCountAfterImport" -ForegroundColor White
Write-Host "Articles after second import: $finalCount" -ForegroundColor White
Write-Host ""
if ($finalCount -eq $articleCountAfterImport) {
Write-Host "[SUCCESS] Duplicate import correctly skipped! No duplication occurred." -ForegroundColor Green
} else {
Write-Host "[ERROR] Duplicate import created duplicates! Count increased from $articleCountAfterImport to $finalCount" -ForegroundColor Red
}
Write-Host ""
Write-Host "========================================" -ForegroundColor Cyan
Write-Host "ALL TESTS COMPLETED" -ForegroundColor Cyan
Write-Host "========================================" -ForegroundColor Cyan

2
project/java-cli-期末课程项目/test_input.txt

@ -0,0 +1,2 @@
export data/test_standard_export.json
exit
Loading…
Cancel
Save