diff --git a/project/202506050307-汪君玥-期末实验报告.docx b/project/202506050307-汪君玥-期末实验报告.docx
new file mode 100644
index 0000000..0e50195
Binary files /dev/null and b/project/202506050307-汪君玥-期末实验报告.docx differ
diff --git a/project/java-cli-期末课程项目/.gitignore b/project/java-cli-期末课程项目/.gitignore
new file mode 100644
index 0000000..0ebcf1a
--- /dev/null
+++ b/project/java-cli-期末课程项目/.gitignore
@@ -0,0 +1,4 @@
+*.jar
+*.jar
+*.class
+*.log
\ No newline at end of file
diff --git a/project/java-cli-期末课程项目/data/csdnexport.json b/project/java-cli-期末课程项目/data/csdnexport.json
new file mode 100644
index 0000000..d842d31
--- /dev/null
+++ b/project/java-cli-期末课程项目/data/csdnexport.json
@@ -0,0 +1,273 @@
+[ {
+ "title" : "7月1日起施行 超龄劳动者迎来权益保障新规",
+ "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40727022.html",
+ "content" : ""
+}, {
+ "title" : "经港珠澳大桥出入境港澳单牌车总量突破1000万辆次",
+ "url" : "http://gba.people.cn/n1/2026/0525/c42272-40726946.html",
+ "content" : ""
+}, {
+ "title" : "外交部谈美伊谈判",
+ "url" : "http://world.people.com.cn/n1/2026/0525/c1002-40726926.html",
+ "content" : ""
+}, {
+ "title" : "重庆发布今年首个地质灾害红色预警",
+ "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40726849.html",
+ "content" : ""
+}, {
+ "title" : "重庆发布今年首个地质灾害红色预警",
+ "url" : "http://cq.people.com.cn/n2/2026/0525/c365401-41590405.html",
+ "content" : ""
+}, {
+ "title" : "账号管理规范",
+ "url" : "https://blog.csdn.net/blogdevteam/article/details/126135357",
+ "content" : ""
+}, {
+ "title" : "代码产出暴涨250%,Claude Code已100%由自己编写!CC 之父 Boris 最新对话:我现在只负责写提示词",
+ "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161325096",
+ "content" : ""
+}, {
+ "title" : "我们公司全员把 Cursor 换成了自研的 全开源AtomCode",
+ "url" : "https://blog.csdn.net/jiangtao/article/details/161373705",
+ "content" : ""
+}, {
+ "title" : "与菲尔兹奖得主Timothy Gowers对话:整个数学研究的范式将被AI改变",
+ "url" : "https://blog.csdn.net/jzagi/article/details/161327725",
+ "content" : ""
+}, {
+ "title" : "AI又“翻车”!Gemini狂删2.8万行代码、系统宕机33分钟,还伪造沟通记录谎称“已恢复正常”",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161325101",
+ "content" : ""
+}, {
+ "title" : "开源项目“离谱的死亡方式”",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161325111",
+ "content" : ""
+}, {
+ "title" : "“DeepSeek崩了”又冲上热搜;特斯拉FSD中文名改为“特斯拉辅助驾驶”:价格依旧为6.4万元;苹果WWDC26将成库克告别秀 | 极客头条",
+ "url" : "https://blog.csdn.net/weixin_39786569/article/details/161394638",
+ "content" : ""
+}, {
+ "title" : "“超级Agent”大梦初醒:任务一长就“飘”、动辄陷入“无限探索”?一场对话复盘工业级智能体的真实痛点与终局 | AI进化论",
+ "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294914",
+ "content" : ""
+}, {
+ "title" : "从全网群嘲到让学术界颤抖!OpenAI 攻破 80 年数学悬案,菲尔兹奖得主预言灵验:AI正将人类逐出科研循环",
+ "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294921",
+ "content" : ""
+}, {
+ "title" : "雷军直言“输给特斯拉不丢人”;传Manus创始人计划融资10亿美元回购公司 | 极客头条",
+ "url" : "https://blog.csdn.net/weixin_39786569/article/details/161313996",
+ "content" : ""
+}, {
+ "title" : "GitHub遭入侵,黑客开价5万美元卖源码!员工装了个VS Code插件,致3800个内部仓库被盗",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161294926",
+ "content" : ""
+}, {
+ "title" : "Chaterm — 开源SRE副驾驶,让你与服务器直接对话! 服务器 14.7K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/157735374",
+ "content" : ""
+}, {
+ "title" : "拆箱开源版Coze:Agent核心三件套大公开,48小时揽下9K Star 人工智能 47.5K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149722641",
+ "content" : ""
+}, {
+ "title" : "MinIO:开源对象存储解决方案的领先者 开源 67.6K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149424765",
+ "content" : ""
+}, {
+ "title" : "LocalSend:比 AirDrop 更自由!这款神器让文件传输不再受限 https 64.1K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149356472",
+ "content" : ""
+}, {
+ "title" : "Excalidraw:一款轻量、高效、极具手感的在线白板工具 产品经理 56.7K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149249425",
+ "content" : ""
+}, {
+ "title" : "star31.6k,Aider:让代码编写如虎添翼的终端神器 人工智能 66.5K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149169547",
+ "content" : ""
+}, {
+ "title" : "用Rust编写的开源支付解决方案——Hyperswitch rust 63.6K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149066439",
+ "content" : ""
+}, {
+ "title" : "Langflow:这个拖拽式AI工作流神器正在颠覆传统编程 人工智能 76.9K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/148900678",
+ "content" : ""
+}, {
+ "title" : "一键抠图有多强?19Kstar 的 Rembg 开源神器 python 58.7K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/148851428",
+ "content" : ""
+}, {
+ "title" : "CHATERM AI:开启云资源氛围管理新篇章! 人工智能 70.3K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/148769366",
+ "content" : ""
+}, {
+ "title" : "CSDN会员推广伙伴招募:分销返佣 + 资源互换,诚邀合作",
+ "url" : "https://blog.csdn.net/blogdevteam/article/details/160479095",
+ "content" : ""
+}, {
+ "title" : "深入解析进程:从PCB到僵尸进程",
+ "url" : "https://blog.csdn.net/2401_86275172/article/details/160566166",
+ "content" : ""
+}, {
+ "title" : "【功能跃升】Claude Code v2.1.145:开放 --json 脚本接口,打通 tmux 状态栏,超大文件智能截断",
+ "url" : "https://blog.csdn.net/Rthan/article/details/161241670",
+ "content" : ""
+}, {
+ "title" : "【读书笔记】《幸福关系的七段旅程》",
+ "url" : "https://blog.csdn.net/Chandler2017/article/details/160967281",
+ "content" : ""
+}, {
+ "title" : "Spring 核心原理:IoC/DI 与 Bean 生命周期全景解析",
+ "url" : "https://blog.csdn.net/2401_88151415/article/details/161253437",
+ "content" : ""
+}, {
+ "title" : "鸿蒙 PC 跨设备拖拽:实现原理 + 实战代码",
+ "url" : "https://blog.csdn.net/qq_36478920/article/details/161291953",
+ "content" : ""
+}, {
+ "title" : "volatile 的底层原理及应用场景",
+ "url" : "https://blog.csdn.net/tongluowan007/article/details/161230327",
+ "content" : ""
+}, {
+ "title" : "ROS开发专栏---ROS2humble安装详细教程---适配Ubuntu 22.04",
+ "url" : "https://blog.csdn.net/weixin_61186812/article/details/161054923",
+ "content" : ""
+}, {
+ "title" : "2026年全国青少年信息素养大赛算法应用主题赛(C++赛项-初赛-赛前冲刺模拟卷2:文末附答案和解析)",
+ "url" : "https://blog.csdn.net/weixin_66461496/article/details/161206019",
+ "content" : ""
+}, {
+ "title" : "系统分析师 备考知识点整理",
+ "url" : "https://blog.csdn.net/david_232656/article/details/161291901",
+ "content" : ""
+}, {
+ "title" : "Linux之文件",
+ "url" : "https://blog.csdn.net/bksczm/article/details/161055964",
+ "content" : ""
+}, {
+ "title" : "Python 数据分析基础入门:《Excel Python:飞速搞定数据分析与处理》学习笔记系列(附录 C 高级 Python 概念)",
+ "url" : "https://blog.csdn.net/m0_67558301/article/details/161324964",
+ "content" : ""
+}, {
+ "title" : "【LE Audio】CAP精讲[8]:CCID绑定术,打通音频流与控制的任督二脉",
+ "url" : "https://blog.csdn.net/weixin_37800531/article/details/161135741",
+ "content" : ""
+}, {
+ "title" : "Codex Mac版安装教程(AppStore无法下载解决)",
+ "url" : "https://blog.csdn.net/weixin_41961749/article/details/161110569",
+ "content" : ""
+}, {
+ "title" : "应用层中的UDP协议原理",
+ "url" : "https://blog.csdn.net/2503_90262217/article/details/161200229",
+ "content" : ""
+}, {
+ "title" : "【AI】Git、Node.js 一站式保姆级安装指南",
+ "url" : "https://blog.csdn.net/2401_87342824/article/details/161199150",
+ "content" : ""
+}, {
+ "title" : "Re: Linux系统篇(十八)进程篇·三:深度硬核!全面起底 Linux 进程状态变化与内核链表动态解绑",
+ "url" : "https://blog.csdn.net/Z2314246476/article/details/161076726",
+ "content" : ""
+}, {
+ "title" : "本周 GitHub 最热项目全解析!Star History 2026年第20周(5月8日-14日)排行榜深度盘点",
+ "url" : "https://blog.csdn.net/yanceyxin/article/details/161130991",
+ "content" : ""
+}, {
+ "title" : "Google I/O 2026深度解读:AI Agent时代全面到来,从“大模型时代“到“智能体时代“的历史性跨越",
+ "url" : "https://blog.csdn.net/shaobingj126/article/details/161307384",
+ "content" : ""
+}, {
+ "title" : "c#基础知识合集07 方法值传递 引用传递 ref参数 out输出参数 in参数 参数列表",
+ "url" : "https://blog.csdn.net/2603_96051737/article/details/161256831",
+ "content" : ""
+}, {
+ "title" : "谷歌辞职、创业失败、重读神经科学,她说 AI 时代最危险的事是外包你的思考 | 万有引力",
+ "url" : "https://blog.csdn.net/tangxiaoyin/article/details/161428871",
+ "content" : ""
+}, {
+ "title" : "传字节向Seed员工开放「豆包股」认购权;滴滴出行App大规模故障,官方致歉;小米MiMo-V2.5系列API永久降价:最高降99% | 极客头条",
+ "url" : "https://blog.csdn.net/weixin_39786569/article/details/161446737",
+ "content" : ""
+}, {
+ "title" : "华为韬定律刷屏,程序员真正该读懂的信号是什么? | 硅基时间",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161432746",
+ "content" : ""
+}, {
+ "title" : "一位10年Android老兵选择「逆行」:“如果未来只剩AI写代码,那就把我落下吧!”",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161432759",
+ "content" : ""
+}, {
+ "title" : "告别繁琐预处理!MindSpeed LLM推出Train_from_HF功能,实现加载即训练",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161426770",
+ "content" : ""
+}, {
+ "title" : "MindSpeed LLM结合Agent-Skills适配Mamba3模型,解锁SSM模型新潜能",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161427107",
+ "content" : ""
+}, {
+ "title" : "高性能计算:鲲鹏软硬协同定义AI4S 计算新范式",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161426451",
+ "content" : ""
+}, {
+ "title" : "AI公司烧不起Token了!国产Agent杀出,逼近Opus 4.6还免费,天工AI发布SkyClaw-v1.0:面向真实工作流的百万上下文 Agent 模型",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161422508",
+ "content" : ""
+}, {
+ "title" : "2026年618大促7000元内演唱会手机推荐:Find X9s Pro领衔,远摄防抖清晰度全解析",
+ "url" : "https://blog.csdn.net/2601_95822891/article/details/161261185",
+ "content" : ""
+}, {
+ "title" : "Python运算符:身份运算符(is/is not)与双等号的区别",
+ "url" : "https://blog.csdn.net/AIRoses/article/details/161410239",
+ "content" : ""
+}, {
+ "title" : "Codex 与 Claude Code 安装配置教程",
+ "url" : "https://blog.csdn.net/weixin_45888077/article/details/161401615",
+ "content" : ""
+}, {
+ "title" : "初识java(十一):继承",
+ "url" : "https://blog.csdn.net/2502_93282244/article/details/161372118",
+ "content" : ""
+}, {
+ "title" : "我那台在抽屉里躺了三年的旧手机,被我改造成了全天候私人云盘",
+ "url" : "https://blog.csdn.net/SDFsoul/article/details/161278737",
+ "content" : ""
+}, {
+ "title" : "【必看】2026年 {计算题} |专项解析 ~ H:动态规划 & 图论",
+ "url" : "https://blog.csdn.net/weixin_42115157/article/details/161057408",
+ "content" : ""
+}, {
+ "title" : "FreeRTOS——按键控制任务的挂起和恢复",
+ "url" : "https://blog.csdn.net/weixin_64611877/article/details/161456747",
+ "content" : ""
+}, {
+ "title" : "【c++笔记】类和对象流食般投喂(中)",
+ "url" : "https://blog.csdn.net/dj_798/article/details/160994229",
+ "content" : ""
+}, {
+ "title" : "C++的IO流",
+ "url" : "https://blog.csdn.net/suimingtao/article/details/160892078",
+ "content" : ""
+}, {
+ "title" : "Java——标准序列化机制",
+ "url" : "https://blog.csdn.net/cold___play/article/details/161107932",
+ "content" : ""
+}, {
+ "title" : "1.6T光模块将成AI数据中心主流",
+ "url" : "https://blog.csdn.net/m0_75253087/article/details/160956039",
+ "content" : ""
+}, {
+ "title" : "通用程序无缺陷保证的不可能性:停机问题与哥德尔不完备定理的双轨论证 —— 兼论“边界情况不可穷举”的形式化含义",
+ "url" : "https://blog.csdn.net/qq_43689451/article/details/161271922",
+ "content" : ""
+}, {
+ "title" : "新书速览|信息与通信工程综合实验:自动目标识别专题",
+ "url" : "https://blog.csdn.net/quanzhankaifaqua/article/details/161193290",
+ "content" : ""
+}, {
+ "title" : "深入理解 OSI 七层网络模型:从原理到实践",
+ "url" : "https://blog.csdn.net/2603_95882547/article/details/161140630",
+ "content" : ""
+} ]
\ No newline at end of file
diff --git a/project/java-cli-期末课程项目/data/my_export.json b/project/java-cli-期末课程项目/data/my_export.json
new file mode 100644
index 0000000..f323df4
--- /dev/null
+++ b/project/java-cli-期末课程项目/data/my_export.json
@@ -0,0 +1,185 @@
+[ {
+ "title" : "7月1日起施行 超龄劳动者迎来权益保障新规",
+ "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40727022.html",
+ "content" : ""
+}, {
+ "title" : "经港珠澳大桥出入境港澳单牌车总量突破1000万辆次",
+ "url" : "http://gba.people.cn/n1/2026/0525/c42272-40726946.html",
+ "content" : ""
+}, {
+ "title" : "外交部谈美伊谈判",
+ "url" : "http://world.people.com.cn/n1/2026/0525/c1002-40726926.html",
+ "content" : ""
+}, {
+ "title" : "重庆发布今年首个地质灾害红色预警",
+ "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40726849.html",
+ "content" : ""
+}, {
+ "title" : "重庆发布今年首个地质灾害红色预警",
+ "url" : "http://cq.people.com.cn/n2/2026/0525/c365401-41590405.html",
+ "content" : ""
+}, {
+ "title" : "账号管理规范",
+ "url" : "https://blog.csdn.net/blogdevteam/article/details/126135357",
+ "content" : ""
+}, {
+ "title" : "代码产出暴涨250%,Claude Code已100%由自己编写!CC 之父 Boris 最新对话:我现在只负责写提示词",
+ "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161325096",
+ "content" : ""
+}, {
+ "title" : "我们公司全员把 Cursor 换成了自研的 全开源AtomCode",
+ "url" : "https://blog.csdn.net/jiangtao/article/details/161373705",
+ "content" : ""
+}, {
+ "title" : "与菲尔兹奖得主Timothy Gowers对话:整个数学研究的范式将被AI改变",
+ "url" : "https://blog.csdn.net/jzagi/article/details/161327725",
+ "content" : ""
+}, {
+ "title" : "AI又“翻车”!Gemini狂删2.8万行代码、系统宕机33分钟,还伪造沟通记录谎称“已恢复正常”",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161325101",
+ "content" : ""
+}, {
+ "title" : "开源项目“离谱的死亡方式”",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161325111",
+ "content" : ""
+}, {
+ "title" : "“DeepSeek崩了”又冲上热搜;特斯拉FSD中文名改为“特斯拉辅助驾驶”:价格依旧为6.4万元;苹果WWDC26将成库克告别秀 | 极客头条",
+ "url" : "https://blog.csdn.net/weixin_39786569/article/details/161394638",
+ "content" : ""
+}, {
+ "title" : "“超级Agent”大梦初醒:任务一长就“飘”、动辄陷入“无限探索”?一场对话复盘工业级智能体的真实痛点与终局 | AI进化论",
+ "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294914",
+ "content" : ""
+}, {
+ "title" : "从全网群嘲到让学术界颤抖!OpenAI 攻破 80 年数学悬案,菲尔兹奖得主预言灵验:AI正将人类逐出科研循环",
+ "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294921",
+ "content" : ""
+}, {
+ "title" : "雷军直言“输给特斯拉不丢人”;传Manus创始人计划融资10亿美元回购公司 | 极客头条",
+ "url" : "https://blog.csdn.net/weixin_39786569/article/details/161313996",
+ "content" : ""
+}, {
+ "title" : "GitHub遭入侵,黑客开价5万美元卖源码!员工装了个VS Code插件,致3800个内部仓库被盗",
+ "url" : "https://blog.csdn.net/csdnnews/article/details/161294926",
+ "content" : ""
+}, {
+ "title" : "Chaterm — 开源SRE副驾驶,让你与服务器直接对话! 服务器 14.7K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/157735374",
+ "content" : ""
+}, {
+ "title" : "拆箱开源版Coze:Agent核心三件套大公开,48小时揽下9K Star 人工智能 47.5K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149722641",
+ "content" : ""
+}, {
+ "title" : "MinIO:开源对象存储解决方案的领先者 开源 67.6K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149424765",
+ "content" : ""
+}, {
+ "title" : "LocalSend:比 AirDrop 更自由!这款神器让文件传输不再受限 https 64.1K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149356472",
+ "content" : ""
+}, {
+ "title" : "Excalidraw:一款轻量、高效、极具手感的在线白板工具 产品经理 56.7K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149249425",
+ "content" : ""
+}, {
+ "title" : "star31.6k,Aider:让代码编写如虎添翼的终端神器 人工智能 66.5K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149169547",
+ "content" : ""
+}, {
+ "title" : "用Rust编写的开源支付解决方案——Hyperswitch rust 63.6K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/149066439",
+ "content" : ""
+}, {
+ "title" : "Langflow:这个拖拽式AI工作流神器正在颠覆传统编程 人工智能 76.9K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/148900678",
+ "content" : ""
+}, {
+ "title" : "一键抠图有多强?19Kstar 的 Rembg 开源神器 python 58.7K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/148851428",
+ "content" : ""
+}, {
+ "title" : "CHATERM AI:开启云资源氛围管理新篇章! 人工智能 70.3K 查看详情",
+ "url" : "https://blog.csdn.net/coderroad/article/details/148769366",
+ "content" : ""
+}, {
+ "title" : "CSDN会员推广伙伴招募:分销返佣 + 资源互换,诚邀合作",
+ "url" : "https://blog.csdn.net/blogdevteam/article/details/160479095",
+ "content" : ""
+}, {
+ "title" : "深入解析进程:从PCB到僵尸进程",
+ "url" : "https://blog.csdn.net/2401_86275172/article/details/160566166",
+ "content" : ""
+}, {
+ "title" : "【功能跃升】Claude Code v2.1.145:开放 --json 脚本接口,打通 tmux 状态栏,超大文件智能截断",
+ "url" : "https://blog.csdn.net/Rthan/article/details/161241670",
+ "content" : ""
+}, {
+ "title" : "【读书笔记】《幸福关系的七段旅程》",
+ "url" : "https://blog.csdn.net/Chandler2017/article/details/160967281",
+ "content" : ""
+}, {
+ "title" : "Spring 核心原理:IoC/DI 与 Bean 生命周期全景解析",
+ "url" : "https://blog.csdn.net/2401_88151415/article/details/161253437",
+ "content" : ""
+}, {
+ "title" : "鸿蒙 PC 跨设备拖拽:实现原理 + 实战代码",
+ "url" : "https://blog.csdn.net/qq_36478920/article/details/161291953",
+ "content" : ""
+}, {
+ "title" : "volatile 的底层原理及应用场景",
+ "url" : "https://blog.csdn.net/tongluowan007/article/details/161230327",
+ "content" : ""
+}, {
+ "title" : "ROS开发专栏---ROS2humble安装详细教程---适配Ubuntu 22.04",
+ "url" : "https://blog.csdn.net/weixin_61186812/article/details/161054923",
+ "content" : ""
+}, {
+ "title" : "2026年全国青少年信息素养大赛算法应用主题赛(C++赛项-初赛-赛前冲刺模拟卷2:文末附答案和解析)",
+ "url" : "https://blog.csdn.net/weixin_66461496/article/details/161206019",
+ "content" : ""
+}, {
+ "title" : "系统分析师 备考知识点整理",
+ "url" : "https://blog.csdn.net/david_232656/article/details/161291901",
+ "content" : ""
+}, {
+ "title" : "Linux之文件",
+ "url" : "https://blog.csdn.net/bksczm/article/details/161055964",
+ "content" : ""
+}, {
+ "title" : "Python 数据分析基础入门:《Excel Python:飞速搞定数据分析与处理》学习笔记系列(附录 C 高级 Python 概念)",
+ "url" : "https://blog.csdn.net/m0_67558301/article/details/161324964",
+ "content" : ""
+}, {
+ "title" : "【LE Audio】CAP精讲[8]:CCID绑定术,打通音频流与控制的任督二脉",
+ "url" : "https://blog.csdn.net/weixin_37800531/article/details/161135741",
+ "content" : ""
+}, {
+ "title" : "Codex Mac版安装教程(AppStore无法下载解决)",
+ "url" : "https://blog.csdn.net/weixin_41961749/article/details/161110569",
+ "content" : ""
+}, {
+ "title" : "应用层中的UDP协议原理",
+ "url" : "https://blog.csdn.net/2503_90262217/article/details/161200229",
+ "content" : ""
+}, {
+ "title" : "【AI】Git、Node.js 一站式保姆级安装指南",
+ "url" : "https://blog.csdn.net/2401_87342824/article/details/161199150",
+ "content" : ""
+}, {
+ "title" : "Re: Linux系统篇(十八)进程篇·三:深度硬核!全面起底 Linux 进程状态变化与内核链表动态解绑",
+ "url" : "https://blog.csdn.net/Z2314246476/article/details/161076726",
+ "content" : ""
+}, {
+ "title" : "本周 GitHub 最热项目全解析!Star History 2026年第20周(5月8日-14日)排行榜深度盘点",
+ "url" : "https://blog.csdn.net/yanceyxin/article/details/161130991",
+ "content" : ""
+}, {
+ "title" : "Google I/O 2026深度解读:AI Agent时代全面到来,从“大模型时代“到“智能体时代“的历史性跨越",
+ "url" : "https://blog.csdn.net/shaobingj126/article/details/161307384",
+ "content" : ""
+}, {
+ "title" : "c#基础知识合集07 方法值传递 引用传递 ref参数 out输出参数 in参数 参数列表",
+ "url" : "https://blog.csdn.net/2603_96051737/article/details/161256831",
+ "content" : ""
+} ]
\ No newline at end of file
diff --git a/project/java-cli-期末课程项目/data/sample_test.json b/project/java-cli-期末课程项目/data/sample_test.json
new file mode 100644
index 0000000..5462cd5
--- /dev/null
+++ b/project/java-cli-期末课程项目/data/sample_test.json
@@ -0,0 +1,29 @@
+{
+ "metadata": {
+ "exportTime": "2026-05-31T12:00:00",
+ "totalCount": 3,
+ "source": "CLI Crawler v1.0",
+ "exportMode": "STANDARD",
+ "version": "1.0"
+ },
+ "articles": [
+ {
+ "title": "测试文章1",
+ "url": "https://example.com/article1",
+ "content": "这是测试内容1",
+ "crawledAt": "2026-05-31T10:00:00"
+ },
+ {
+ "title": "测试文章2",
+ "url": "https://example.com/article2",
+ "content": "这是测试内容2",
+ "crawledAt": "2026-05-31T11:00:00"
+ },
+ {
+ "title": "测试文章3",
+ "url": "https://example.com/article3",
+ "content": "这是测试内容3",
+ "crawledAt": "2026-05-31T12:00:00"
+ }
+ ]
+}
diff --git a/project/java-cli-期末课程项目/minimal_test.ps1 b/project/java-cli-期末课程项目/minimal_test.ps1
new file mode 100644
index 0000000..fa53d41
--- /dev/null
+++ b/project/java-cli-期末课程项目/minimal_test.ps1
@@ -0,0 +1,54 @@
+# Simple Import/Export Test - Minimal Version
+$env:JAVA_HOME = "C:\Program Files\Java\latest\jdk-25"
+$APP_JAR = "target\datacollect-cli-0.1.0-jar-with-dependencies.jar"
+$SAMPLE = "data\sample_test.json"
+$EXPORT = "data\export_result.json"
+
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Import/Export Test - Minimal" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host ""
+
+Write-Host "[TEST 1] Import" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR import $SAMPLE 2>&1
+Write-Host $result
+Write-Host ""
+
+Write-Host "[TEST 2] List" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR list 2>&1
+Write-Host $result
+Write-Host ""
+
+Write-Host "[TEST 3] Export" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR export $EXPORT --format json 2>&1
+Write-Host $result
+Write-Host ""
+
+Write-Host "[TEST 4] Check Export File" -ForegroundColor Yellow
+if (Test-Path $EXPORT) {
+ Write-Host "[SUCCESS] File created!" -ForegroundColor Green
+ $content = Get-Content $EXPORT -Raw
+ Write-Host "Length: $($content.Length) chars" -ForegroundColor Cyan
+
+ if ($content -match "crawledAt") {
+ Write-Host "[SUCCESS] crawledAt field found!" -ForegroundColor Green
+ }
+ if ($content -match "metadata") {
+ Write-Host "[SUCCESS] metadata field found!" -ForegroundColor Green
+ }
+}
+Write-Host ""
+
+Write-Host "[TEST 5] Import Again (Duplicate)" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR import $SAMPLE 2>&1
+Write-Host $result
+Write-Host ""
+
+Write-Host "[TEST 6] Final List" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR list 2>&1
+Write-Host $result
+
+Write-Host ""
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "TEST COMPLETED" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
diff --git a/project/java-cli-期末课程项目/pom.xml b/project/java-cli-期末课程项目/pom.xml
new file mode 100644
index 0000000..7864c8a
--- /dev/null
+++ b/project/java-cli-期末课程项目/pom.xml
@@ -0,0 +1,67 @@
+
+ 4.0.0
+ com.example
+ datacollect-cli
+ 0.1.0
+
+ 11
+ 11
+
+
+
+ org.jsoup
+ jsoup
+ 1.17.2
+
+
+ org.slf4j
+ slf4j-api
+ 2.0.9
+
+
+ ch.qos.logback
+ logback-classic
+ 1.4.14
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.16.1
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.3.0
+
+
+
+ com.example.datacollect.Main
+
+
+
+ jar-with-dependencies
+
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/project/java-cli-期末课程项目/simple_test.cmd b/project/java-cli-期末课程项目/simple_test.cmd
new file mode 100644
index 0000000..a89d53e
--- /dev/null
+++ b/project/java-cli-期末课程项目/simple_test.cmd
@@ -0,0 +1,56 @@
+@echo off
+set JAVA_HOME=C:\Program Files\Java\latest\jdk-25
+set APP_JAR=target\datacollect-cli-0.1.0-jar-with-dependencies.jar
+set SAMPLE=data\sample_test.json
+set EXPORT=data\export_result.json
+
+echo ========================================
+echo Import/Export Feature Test
+echo ========================================
+echo.
+
+echo [TEST 1] Import sample JSON file
+echo Command: import %SAMPLE%
+java -jar %APP_JAR% import %SAMPLE%
+echo.
+echo.
+
+echo [TEST 2] List articles
+echo Command: list
+java -jar %APP_JAR% list
+echo.
+echo.
+
+echo [TEST 3] Export to JSON
+echo Command: export %EXPORT% --format json
+java -jar %APP_JAR% export %EXPORT% --format json
+echo.
+echo.
+
+echo [TEST 4] Check exported file
+if exist %EXPORT% (
+ echo [SUCCESS] Export file created
+ echo.
+ echo First 1000 characters of exported file:
+ powershell -Command "Get-Content %EXPORT% | Select-Object -First 20"
+) else (
+ echo [ERROR] Export file NOT created
+)
+echo.
+echo.
+
+echo [TEST 5] Test duplicate import
+echo Command: import %SAMPLE% (again)
+java -jar %APP_JAR% import %SAMPLE%
+echo.
+echo.
+
+echo [TEST 6] Final list
+echo Command: list
+java -jar %APP_JAR% list
+echo.
+echo.
+
+echo ========================================
+echo Tests completed! Check output above.
+echo ========================================
diff --git a/project/java-cli-期末课程项目/simple_test.ps1 b/project/java-cli-期末课程项目/simple_test.ps1
new file mode 100644
index 0000000..53b1e14
--- /dev/null
+++ b/project/java-cli-期末课程项目/simple_test.ps1
@@ -0,0 +1,117 @@
+# Simple Import/Export Test
+$ErrorActionPreference = "Stop"
+$env:JAVA_HOME = "C:\Program Files\Java\latest\jdk-25"
+$APP_JAR = "target\datacollect-cli-0.1.0-jar-with-dependencies.jar"
+$TEST_FILE = "data\sample_test.json"
+$EXPORT_FILE = "data\export_result.json"
+
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Import/Export Feature Test" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host ""
+
+# Step 1: Import sample data
+Write-Host "[TEST 1] Import sample JSON file" -ForegroundColor Yellow
+Write-Host "Command: import $TEST_FILE" -ForegroundColor Gray
+$result = & java -jar $APP_JAR "import $TEST_FILE" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+
+# Step 2: List articles
+Write-Host "[TEST 2] List articles after import" -ForegroundColor Yellow
+Write-Host "Command: list" -ForegroundColor Gray
+$result = & java -jar $APP_JAR "list" 2>&1
+Write-Host $result -ForegroundColor Green
+
+# Extract count
+$count1 = 0
+$result -split "`n" | ForEach-Object {
+ if ($_ -match "Total: (\d+)") {
+ $count1 = [int]$matches[1]
+ }
+}
+Write-Host "Article count: $count1" -ForegroundColor Cyan
+Write-Host ""
+
+# Step 3: Export to new file
+Write-Host "[TEST 3] Export to new JSON file" -ForegroundColor Yellow
+Write-Host "Command: export $EXPORT_FILE --format json" -ForegroundColor Gray
+$result = & java -jar $APP_JAR "export $EXPORT_FILE --format json" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+
+# Step 4: Check exported file
+Write-Host "[TEST 4] Verify exported JSON file" -ForegroundColor Yellow
+if (Test-Path $EXPORT_FILE) {
+ Write-Host "[SUCCESS] Export file created" -ForegroundColor Green
+ $content = Get-Content $EXPORT_FILE -Raw
+ Write-Host "File size: $($content.Length) characters" -ForegroundColor Cyan
+
+ # Check for crawledAt
+ if ($content -match "crawledAt") {
+ Write-Host "[SUCCESS] crawledAt field found in exported JSON" -ForegroundColor Green
+ } else {
+ Write-Host "[ERROR] crawledAt field NOT found" -ForegroundColor Red
+ }
+
+ # Check for metadata
+ if ($content -match "metadata") {
+ Write-Host "[SUCCESS] metadata field found" -ForegroundColor Green
+ } else {
+ Write-Host "[ERROR] metadata field NOT found" -ForegroundColor Red
+ }
+} else {
+ Write-Host "[ERROR] Export file NOT created" -ForegroundColor Red
+}
+Write-Host ""
+
+# Step 5: Test duplicate import
+Write-Host "[TEST 5] Test duplicate import (should skip duplicates)" -ForegroundColor Yellow
+Write-Host "Command: import $TEST_FILE (again)" -ForegroundColor Gray
+$result = & java -jar $APP_JAR "import $TEST_FILE" 2>&1
+Write-Host $result -ForegroundColor Green
+
+# Step 6: List and verify no duplication
+Write-Host "[TEST 6] Verify no duplication" -ForegroundColor Yellow
+Write-Host "Command: list" -ForegroundColor Gray
+$result = & java -jar $APP_JAR "list" 2>&1
+Write-Host $result -ForegroundColor Green
+
+$count2 = 0
+$result -split "`n" | ForEach-Object {
+ if ($_ -match "Total: (\d+)") {
+ $count2 = [int]$matches[1]
+ }
+}
+Write-Host "Article count after second import: $count2" -ForegroundColor Cyan
+Write-Host ""
+
+# Summary
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "TEST SUMMARY" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+if ($count1 -eq 3 -and $count2 -eq 3) {
+ Write-Host "[SUCCESS] All tests passed!" -ForegroundColor Green
+ Write-Host "- Import: Successfully imported 3 articles" -ForegroundColor White
+ Write-Host "- Export: Successfully exported to JSON" -ForegroundColor White
+ Write-Host "- Duplicate: Correctly skipped duplicate articles" -ForegroundColor White
+ Write-Host "- crawledAt field: Present in exported JSON" -ForegroundColor White
+} else {
+ Write-Host "[PARTIAL] Some tests may have issues" -ForegroundColor Yellow
+ Write-Host "First import count: $count1" -ForegroundColor White
+ Write-Host "Second import count: $count2" -ForegroundColor White
+}
+Write-Host ""
+
+# Show exported file content
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "EXPORTED JSON CONTENT (Preview)" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+if (Test-Path $EXPORT_FILE) {
+ $exportContent = Get-Content $EXPORT_FILE -Raw
+ if ($exportContent.Length -gt 1000) {
+ Write-Host ($exportContent.Substring(0, 1000) + "...") -ForegroundColor White
+ } else {
+ Write-Host $exportContent -ForegroundColor White
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/Main.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/Main.java
new file mode 100644
index 0000000..3171e0e
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/Main.java
@@ -0,0 +1,60 @@
+package com.example.datacollect;
+
+import com.example.datacollect.controller.CrawlerController;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.repository.PersistenceManager;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.util.JsonExporter;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+public class Main {
+ private static final Logger logger = LoggerFactory.getLogger(Main.class);
+
+ public static void main(String[] args) {
+ try (ConsoleView view = new ConsoleView();
+ ArticleRepository repository = new ArticleRepository();
+ PersistenceManager persistenceManager = new PersistenceManager(repository)) {
+
+ logger.info("Starting CLI Crawler application");
+
+ JsonExporter jsonExporter = new JsonExporter(repository);
+ StrategyFactory strategyFactory = new StrategyFactory();
+
+ loadSession(persistenceManager, view, repository);
+
+ CrawlerController controller = new CrawlerController(view, repository, strategyFactory, persistenceManager, jsonExporter);
+
+ view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands.");
+ logger.info("Application initialized successfully");
+
+ while (true) {
+ try {
+ controller.handle(view.readLine());
+ } catch (Exception e) {
+ view.printError("Error: " + e.getMessage());
+ logger.error("Error in main loop: {}", e.getMessage(), e);
+ }
+ }
+ } catch (Exception e) {
+ logger.error("Fatal error in application: {}", e.getMessage(), e);
+ System.err.println("Fatal error: " + e.getMessage());
+ System.exit(1);
+ }
+ }
+
+ private static void loadSession(PersistenceManager persistenceManager, ConsoleView view, ArticleRepository repository) {
+ try {
+ persistenceManager.load();/* 加载会话 */
+ if (repository.size() > 0) {/* 如果有文章 */
+ view.printInfo("Loaded " + repository.size() + " articles from previous session");/* 打印加载的文章数量 */
+ }
+ } catch (IOException e) {
+ view.printError("Warning: Failed to load previous session: " + e.getMessage());
+ logger.warn("Failed to load previous session: {}", e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/AnalyzeCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
new file mode 100644
index 0000000..ec9bcc3
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
@@ -0,0 +1,103 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.util.RetryUtils;
+import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.Callable;
+
+public class AnalyzeCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class);
+ private final ConsoleView view;
+ private final StrategyFactory strategyFactory;
+
+ public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.strategyFactory = strategyFactory;
+ }
+
+ @Override
+ public String getName() {
+ return "analyze";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ view.printError("Usage: analyze ");
+ logger.warn("Invalid command: missing URL argument");
+ return;
+ }
+ String url = args[1];
+ logger.info("Analyze command executed for URL: {}", url);
+
+ try {
+ CrawlStrategy strategy = strategyFactory.getStrategy(url);
+ if (strategy == null) {
+ view.printError("No strategy found for: " + url);
+ logger.error("No strategy found for URL: {}", url);
+ return;
+ }
+
+ Callable fetchTask = () -> {
+ logger.debug("Fetching document from: {}", url);
+ try {
+ return Jsoup.connect(url)
+ .userAgent("Mozilla/5.0")
+ .timeout(5000)
+ .get();
+ } catch (IOException e) {
+ throw new NetworkException("Failed to connect to " + url + ": " + e.getMessage(), e);
+ }
+ };
+
+ Document doc = RetryUtils.executeWithRetry(fetchTask);
+ logger.info("Successfully fetched document from: {}", url);
+
+ List articles = strategy.parse(url, doc);
+ logger.info("Parsed {} articles for analysis", articles.size());
+
+ int total = articles.size();
+ int totalTitleLen = 0;
+ int totalContentLen = 0;
+
+ for (Article a : articles) {
+ totalTitleLen += a.getTitle() == null ? 0 : a.getTitle().length();
+ totalContentLen += a.getContent() == null ? 0 : a.getContent().length();
+ }
+
+ view.printInfo("===== 分析统计结果 =====");
+ view.printInfo("文章总数:" + total + " 篇");
+ view.printInfo("标题总长度:" + totalTitleLen);
+ view.printInfo("内容总长度:" + totalContentLen);
+ if (total > 0) {
+ view.printInfo("平均标题长度:" + (totalTitleLen / total));
+ view.printInfo("平均内容长度:" + (totalContentLen / total));
+ }
+ view.printInfo("======================");
+ view.printSuccess("分析完成(数据未保存)");
+
+ logger.info("Analysis completed: {} articles analyzed", total);
+ } catch (NetworkException e) {
+ view.printError("Network error: " + e.getMessage());
+ logger.error("Network error while analyzing {}: {}", url, e.getMessage(), e);
+ } catch (ParseException e) {
+ view.printError("Parse error: " + e.getMessage());
+ logger.error("Parse error while analyzing {}: {}", url, e.getMessage(), e);
+ } catch (Exception e) {
+ view.printError("分析失败:" + e.getMessage());
+ logger.error("Unexpected error while analyzing {}: {}", url, e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/Command.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/Command.java
new file mode 100644
index 0000000..029cadc
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/Command.java
@@ -0,0 +1,8 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+
+public interface Command {
+ String getName();
+ void execute(String[] args, ArticleRepository repository);
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/CrawlCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/CrawlCommand.java
new file mode 100644
index 0000000..1c32175
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/CrawlCommand.java
@@ -0,0 +1,114 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.exception.UrlFormatException;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.util.RetryUtils;
+import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.concurrent.Callable;
+
+public class CrawlCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
+ private final ConsoleView view;
+ private final StrategyFactory strategyFactory;
+
+ public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.strategyFactory = strategyFactory;
+ }
+
+ @Override
+ public String getName() {
+ return "crawl";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args == null || args.length < 2) {
+ view.printError("用法: crawl ");
+ logger.warn("无效命令: 缺少URL参数");
+ return;
+ }
+
+ String url = args[1];
+ if (url == null || url.trim().isEmpty()) {
+ view.printError("错误: URL不能为空");
+ logger.error("无效参数: URL为空");
+ return;
+ }
+
+ try {
+ new URL(url);
+ } catch (MalformedURLException e) {
+ logger.error("无效URL格式: {}", url, e);
+ throw new UrlFormatException("无效的URL格式: " + url, url, e);
+ }
+
+ logger.info("开始爬取: {}", url);
+
+ CrawlStrategy strategy = strategyFactory.getStrategy(url);
+ if (strategy == null) {
+ view.printError("未找到策略: " + url);
+ logger.error("未找到URL对应的策略: {}", url);
+ return;
+ }
+
+ try {
+ view.printInfo("正在爬取: " + url);
+
+ Callable fetchTask = () -> {
+ logger.debug("正在获取文档: {}", url);
+ try {
+ return Jsoup.connect(url)
+ .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
+ .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8")
+ .header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
+ .header("Accept-Encoding", "gzip, deflate, br")
+ .header("Connection", "keep-alive")
+ .header("Referer", url)
+ .header("Cache-Control", "max-age=0")
+ .timeout(15000)
+ .followRedirects(true)
+ .get();
+ } catch (IOException e) {
+ throw new NetworkException("连接失败: " + e.getMessage(), e);
+ }
+ };
+
+ Document doc = RetryUtils.executeWithRetry(fetchTask);
+ logger.info("成功获取文档: {}", url);
+
+ var articles = strategy.parse(url, doc);
+ logger.info("解析文章数: {}", articles.size());
+
+ repository.addAll(articles);
+ logger.info("成功添加 {} 篇文章到仓库", articles.size());
+
+ view.printSuccess("爬取完成,共 " + articles.size() + " 篇文章。");
+ logger.info("成功从 {} 爬取 {} 篇文章", url, articles.size());
+ } catch (NetworkException e) {
+ view.printError(e.getMessage());
+ logger.error("爬取 {} 时网络错误: {}", url, e.getMessage(), e);
+ } catch (ParseException e) {
+ view.printError("解析错误: " + e.getMessage());
+ logger.error("爬取 {} 时解析错误: {}", url, e.getMessage(), e);
+ } catch (UrlFormatException e) {
+ view.printError("URL格式错误: " + e.getMessage());
+ logger.error("爬取 {} 时URL格式错误: {}", url, e.getMessage(), e);
+ } catch (Exception e) {
+ view.printError("爬取失败: " + e.getMessage());
+ logger.error("爬取 {} 时发生未知错误: {}", url, e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExitCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExitCommand.java
new file mode 100644
index 0000000..69230cb
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExitCommand.java
@@ -0,0 +1,42 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.repository.PersistenceManager;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+
+public class ExitCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class);
+ private final ConsoleView view;
+ private final PersistenceManager persistenceManager;
+
+ public ExitCommand(ConsoleView view, PersistenceManager persistenceManager) {
+ this.view = view;
+ this.persistenceManager = persistenceManager;
+ }
+
+ @Override
+ public String getName() {
+ return "exit";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("Exit command executed, saving data before shutdown");
+
+ try {
+ persistenceManager.save();/* 保存数据到持久化管理器 */
+ view.printInfo("Saved " + repository.size() + " articles");
+ logger.info("Successfully saved {} articles before exit", repository.size());
+ } catch (IOException e) {
+ view.printError("Warning: Failed to save data: " + e.getMessage());
+ logger.error("Failed to save data on exit: {}", e.getMessage(), e);
+ }
+
+ view.printSuccess("Bye!");
+ System.exit(0);
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExportCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExportCommand.java
new file mode 100644
index 0000000..94eb154
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExportCommand.java
@@ -0,0 +1,66 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.exception.ExportException;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.repository.PersistenceManager;
+import com.example.datacollect.util.JsonExporter;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Paths;
+
+public class ExportCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ExportCommand.class);
+ private final ConsoleView view;
+ private final PersistenceManager persistenceManager;
+ private final JsonExporter jsonExporter;
+
+ public ExportCommand(ConsoleView view, PersistenceManager persistenceManager, JsonExporter jsonExporter) {
+ this.view = view;
+ this.persistenceManager = persistenceManager;
+ this.jsonExporter = jsonExporter;
+ }
+
+ @Override
+ public String getName() {
+ return "export";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ String filePath = null;
+ String format = "json";
+
+ if (args.length < 2) {
+ view.printError("Usage: export [--format json]");
+ logger.warn("Invalid command: missing file path argument");
+ return;
+ }
+
+ filePath = args[1];
+
+ for (int i = 2; i < args.length; i++) {
+ if (args[i].equals("--format") && i + 1 < args.length) {
+ format = args[i + 1].toLowerCase();
+ }
+ }
+
+ logger.info("导出请求: 文件={}, 格式={}", filePath, format);
+
+ try {
+ if ("json".equals(format)) {
+ jsonExporter.exportToFile(Paths.get(filePath));
+ view.printSuccess("Successfully exported " + repository.size() + " articles to " + filePath);
+ logger.info("Exported {} articles to {}", repository.size(), filePath);
+ } else {
+ view.printError("Unsupported format: " + format + ". Only 'json' is supported.");
+ logger.warn("Unsupported format: {}", format);
+ }
+ } catch (ExportException e) {
+ view.printError("Export failed: " + e.getMessage());
+ logger.error("Export error: {}", e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/HelpCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/HelpCommand.java
new file mode 100644
index 0000000..42e443a
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/HelpCommand.java
@@ -0,0 +1,33 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HelpCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class);
+ private final ConsoleView view;
+
+ public HelpCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "help";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("Help command executed");
+ view.printInfo("Commands:");
+ view.printInfo(" crawl - Crawl articles from URL");
+ view.printInfo(" list - List all articles");
+ view.printInfo(" export - Export articles to JSON file");
+ view.printInfo(" import - Import articles from JSON file");
+ view.printInfo(" analyze - Analyze URL structure");
+ view.printInfo(" help - Show this help");
+ view.printInfo(" exit - Exit and save data");
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ImportCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ImportCommand.java
new file mode 100644
index 0000000..ba89248
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ImportCommand.java
@@ -0,0 +1,71 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.exception.ImportException;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.repository.PersistenceManager;
+import com.example.datacollect.util.JsonImporter;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ImportCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ImportCommand.class);
+ private final ConsoleView view;
+ private final PersistenceManager persistenceManager;
+
+ public ImportCommand(ConsoleView view, PersistenceManager persistenceManager) {
+ this.view = view;
+ this.persistenceManager = persistenceManager;
+ }
+
+ @Override
+ public String getName() {
+ return "import";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ view.printError("Usage: import ");
+ logger.warn("Invalid command: missing file path argument");
+ return;
+ }
+
+ String filePath = args[1];
+
+ try {
+ int beforeCount = repository.size();
+ JsonImporter.ImportResult result = persistenceManager.importWithReport(filePath);
+ int afterCount = repository.size();
+
+ StringBuilder message = new StringBuilder();
+ message.append("Import completed:\n");
+ message.append(" - Total found: ").append(result.getTotalFound()).append("\n");
+ message.append(" - Imported: ").append(result.getImported()).append("\n");
+ message.append(" - Skipped (duplicates): ").append(result.getSkipped()).append("\n");
+ message.append(" - Invalid: ").append(result.getInvalid()).append("\n");
+ message.append(" - Overwritten: ").append(result.getOverwritten()).append("\n");
+ message.append(" - Repository total: ").append(afterCount);
+
+ if (!result.getErrors().isEmpty()) {
+ message.append("\n - Errors: ").append(result.getErrors().size());
+ for (int i = 0; i < Math.min(3, result.getErrors().size()); i++) {
+ message.append("\n ").append(i + 1).append(". ").append(result.getErrors().get(i));
+ }
+ if (result.getErrors().size() > 3) {
+ message.append("\n ... and ").append(result.getErrors().size() - 3).append(" more errors");
+ }
+ }
+
+ view.printSuccess(message.toString());
+ logger.info("Import result: {}", result.getSummary());
+
+ } catch (ImportException e) {
+ view.printError("Import failed: " + e.getMessage());
+ logger.error("Import error: {}", e.getMessage(), e);
+ } catch (Exception e) {
+ view.printError("Import failed: " + e.getMessage());
+ logger.error("Import error: {}", e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ListCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ListCommand.java
new file mode 100644
index 0000000..9261a3d
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ListCommand.java
@@ -0,0 +1,26 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ListCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ListCommand.class);
+ private final ConsoleView view;
+
+ public ListCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "list";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("List command executed, showing {} articles", repository.size());
+ view.display(repository.getAll());
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/controller/CrawlerController.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/controller/CrawlerController.java
new file mode 100644
index 0000000..4caad8a
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/controller/CrawlerController.java
@@ -0,0 +1,71 @@
+package com.example.datacollect.controller;
+
+import com.example.datacollect.command.AnalyzeCommand;
+import com.example.datacollect.command.Command;
+import com.example.datacollect.command.CrawlCommand;
+import com.example.datacollect.command.ExitCommand;
+import com.example.datacollect.command.ExportCommand;
+import com.example.datacollect.command.HelpCommand;
+import com.example.datacollect.command.ImportCommand;
+import com.example.datacollect.command.ListCommand;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.repository.PersistenceManager;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.util.JsonExporter;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.HashMap;
+import java.util.Map;
+
+public class CrawlerController {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class);
+ private final Map commands = new HashMap<>();/* 命令映射表 */
+ private final ConsoleView view;/* 控制台视图 */
+ private final ArticleRepository repository;/* 文章仓库 */
+
+ public CrawlerController(ConsoleView view, ArticleRepository repository,
+ StrategyFactory strategyFactory, PersistenceManager persistenceManager, JsonExporter jsonExporter) {
+ this.view = view;
+ this.repository = repository;
+ register(new HelpCommand(view));
+ register(new ListCommand(view));
+ register(new CrawlCommand(view, strategyFactory));
+ register(new ExitCommand(view, persistenceManager));
+ register(new AnalyzeCommand(view, strategyFactory));
+ register(new ExportCommand(view, persistenceManager, jsonExporter));
+ register(new ImportCommand(view, persistenceManager));
+ logger.info("CrawlerController initialized with {} commands", commands.size());
+ }
+
+ private void register(Command command) {/* 注册命令 */
+ commands.put(command.getName(), command);/* 将命令添加到映射表 */
+ logger.debug("Registered command: {}", command.getName());/* 记录注册的命令 */
+ }
+
+ public void handle(String input) {/* 处理用户输入 */
+ String text = input == null ? "" : input.trim();/* 处理空输入 */
+ if (text.isEmpty()) {
+ return;
+ }
+
+ String[] args = text.split("\\s+");/* 解析命令行参数 */
+ String cmdName = args[0].toLowerCase();/* 提取命令名称并转换为小写 */
+
+ logger.debug("Processing command: {}", cmdName);
+
+ Command command = commands.get(cmdName);/* 获取命令对象 */
+ if (command == null) {
+ view.printError("Unknown command: " + cmdName);
+ logger.warn("Unknown command attempted: {}", cmdName);
+ return;
+ }
+
+ try {
+ command.execute(args, repository);/* 执行命令 */
+ } catch (Exception e) {
+ view.printError("Command execution failed: " + e.getMessage());
+ logger.error("Error executing command {}: {}", cmdName, e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/CrawlerException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/CrawlerException.java
new file mode 100644
index 0000000..230adb3
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/CrawlerException.java
@@ -0,0 +1,10 @@
+package com.example.datacollect.exception;
+
+public class CrawlerException extends Exception {
+ public CrawlerException(String message) {
+ super(message);
+ }
+ public CrawlerException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/DuplicateArticleException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/DuplicateArticleException.java
new file mode 100644
index 0000000..5d6c3e5
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/DuplicateArticleException.java
@@ -0,0 +1,56 @@
+package com.example.datacollect.exception;
+
+public class DuplicateArticleException extends Exception {
+ private final String duplicateUrl;
+ private final Integer existingIndex;
+
+ public DuplicateArticleException(String message) {
+ super(message);
+ this.duplicateUrl = null;
+ this.existingIndex = null;
+ }
+
+ public DuplicateArticleException(String message, String duplicateUrl) {
+ super(message);
+ this.duplicateUrl = duplicateUrl;
+ this.existingIndex = null;
+ }
+
+ public DuplicateArticleException(String message, String duplicateUrl, Integer existingIndex) {
+ super(message);
+ this.duplicateUrl = duplicateUrl;
+ this.existingIndex = existingIndex;
+ }
+
+ public DuplicateArticleException(String message, String duplicateUrl, Throwable cause) {
+ super(message, cause);
+ this.duplicateUrl = duplicateUrl;
+ this.existingIndex = null;
+ }
+
+ public DuplicateArticleException(String message, String duplicateUrl, Integer existingIndex, Throwable cause) {
+ super(message, cause);
+ this.duplicateUrl = duplicateUrl;
+ this.existingIndex = existingIndex;
+ }
+
+ public String getDuplicateUrl() {
+ return duplicateUrl;
+ }
+
+ public Integer getExistingIndex() {
+ return existingIndex;
+ }
+
+ @Override
+ public String getMessage() {
+ StringBuilder sb = new StringBuilder(super.getMessage());
+ if (duplicateUrl != null) {
+ sb.append(" [重复URL: ").append(duplicateUrl).append("]");
+ }
+ if (existingIndex != null) {
+ sb.append(" [已存在位置: ").append(existingIndex).append("]");
+ }
+ return sb.toString();
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ExportException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ExportException.java
new file mode 100644
index 0000000..ae46dae
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ExportException.java
@@ -0,0 +1,63 @@
+package com.example.datacollect.exception;
+
+public class ExportException extends Exception {
+ private final String filePath;
+ private final Long estimatedSize;
+
+ public ExportException(String message) {
+ super(message);
+ this.filePath = null;
+ this.estimatedSize = null;
+ }
+
+ public ExportException(String message, String filePath) {
+ super(message);
+ this.filePath = filePath;
+ this.estimatedSize = null;
+ }
+
+ public ExportException(String message, String filePath, Long estimatedSize) {
+ super(message);
+ this.filePath = filePath;
+ this.estimatedSize = estimatedSize;
+ }
+
+ public ExportException(String message, Throwable cause) {
+ super(message, cause);
+ this.filePath = null;
+ this.estimatedSize = null;
+ }
+
+ public ExportException(String message, String filePath, Throwable cause) {
+ super(message, cause);
+ this.filePath = filePath;
+ this.estimatedSize = null;
+ }
+
+ public String getFilePath() {
+ return filePath;
+ }
+
+ public Long getEstimatedSize() {
+ return estimatedSize;
+ }
+
+ @Override
+ public String getMessage() {
+ StringBuilder sb = new StringBuilder(super.getMessage());
+ if (filePath != null) {
+ sb.append(" [文件: ").append(filePath).append("]");
+ }
+ if (estimatedSize != null) {
+ sb.append(" [预估大小: ").append(formatSize(estimatedSize)).append("]");
+ }
+ return sb.toString();
+ }
+
+ private static String formatSize(long size) {
+ if (size < 1024) return size + " B";
+ if (size < 1024 * 1024) return String.format("%.2f KB", size / 1024.0);
+ if (size < 1024 * 1024 * 1024) return String.format("%.2f MB", size / (1024.0 * 1024));
+ return String.format("%.2f GB", size / (1024.0 * 1024 * 1024));
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ImportException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ImportException.java
new file mode 100644
index 0000000..ba893bb
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ImportException.java
@@ -0,0 +1,56 @@
+package com.example.datacollect.exception;
+
+public class ImportException extends Exception {
+ private final String filePath;
+ private final Integer lineNumber;
+
+ public ImportException(String message) {
+ super(message);
+ this.filePath = null;
+ this.lineNumber = null;
+ }
+
+ public ImportException(String message, String filePath) {
+ super(message);
+ this.filePath = filePath;
+ this.lineNumber = null;
+ }
+
+ public ImportException(String message, String filePath, Integer lineNumber) {
+ super(message);
+ this.filePath = filePath;
+ this.lineNumber = lineNumber;
+ }
+
+ public ImportException(String message, Throwable cause) {
+ super(message, cause);
+ this.filePath = null;
+ this.lineNumber = null;
+ }
+
+ public ImportException(String message, String filePath, Throwable cause) {
+ super(message, cause);
+ this.filePath = filePath;
+ this.lineNumber = null;
+ }
+
+ public String getFilePath() {
+ return filePath;
+ }
+
+ public Integer getLineNumber() {
+ return lineNumber;
+ }
+
+ @Override
+ public String getMessage() {
+ StringBuilder sb = new StringBuilder(super.getMessage());
+ if (filePath != null) {
+ sb.append(" [文件: ").append(filePath).append("]");
+ }
+ if (lineNumber != null) {
+ sb.append(" [行号: ").append(lineNumber).append("]");
+ }
+ return sb.toString();
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/NetworkException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/NetworkException.java
new file mode 100644
index 0000000..3a24c92
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/NetworkException.java
@@ -0,0 +1,10 @@
+package com.example.datacollect.exception;
+
+public class NetworkException extends CrawlerException {
+ public NetworkException(String message) {
+ super(message);
+ }
+ public NetworkException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ParseException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ParseException.java
new file mode 100644
index 0000000..09f9f20
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ParseException.java
@@ -0,0 +1,10 @@
+package com.example.datacollect.exception;
+
+public class ParseException extends CrawlerException {
+ public ParseException(String message) {
+ super(message);
+ }
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/UrlFormatException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/UrlFormatException.java
new file mode 100644
index 0000000..f94380e
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/UrlFormatException.java
@@ -0,0 +1,30 @@
+package com.example.datacollect.exception;
+
+public class UrlFormatException extends RuntimeException {
+
+ private final String invalidUrl;
+
+ public UrlFormatException(String message) {
+ super(message);
+ this.invalidUrl = null;
+ }
+
+ public UrlFormatException(String message, String invalidUrl) {
+ super(message);
+ this.invalidUrl = invalidUrl;
+ }
+
+ public UrlFormatException(String message, Throwable cause) {
+ super(message, cause);
+ this.invalidUrl = null;
+ }
+
+ public UrlFormatException(String message, String invalidUrl, Throwable cause) {
+ super(message, cause);
+ this.invalidUrl = invalidUrl;
+ }
+
+ public String getInvalidUrl() {
+ return invalidUrl;
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ValidationException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ValidationException.java
new file mode 100644
index 0000000..274ba18
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ValidationException.java
@@ -0,0 +1,72 @@
+package com.example.datacollect.exception;
+
+public class ValidationException extends Exception {
+ private final String fieldName;
+ private final String invalidValue;
+ private final String validationRule;
+
+ public ValidationException(String message) {
+ super(message);
+ this.fieldName = null;
+ this.invalidValue = null;
+ this.validationRule = null;
+ }
+
+ public ValidationException(String message, String fieldName) {
+ super(message);
+ this.fieldName = fieldName;
+ this.invalidValue = null;
+ this.validationRule = null;
+ }
+
+ public ValidationException(String message, String fieldName, String invalidValue) {
+ super(message);
+ this.fieldName = fieldName;
+ this.invalidValue = invalidValue;
+ this.validationRule = null;
+ }
+
+ public ValidationException(String message, String fieldName, String invalidValue, String validationRule) {
+ super(message);
+ this.fieldName = fieldName;
+ this.invalidValue = invalidValue;
+ this.validationRule = validationRule;
+ }
+
+ public ValidationException(String message, Throwable cause) {
+ super(message, cause);
+ this.fieldName = null;
+ this.invalidValue = null;
+ this.validationRule = null;
+ }
+
+ public String getFieldName() {
+ return fieldName;
+ }
+
+ public String getInvalidValue() {
+ return invalidValue;
+ }
+
+ public String getValidationRule() {
+ return validationRule;
+ }
+
+ @Override
+ public String getMessage() {
+ StringBuilder sb = new StringBuilder(super.getMessage());
+ if (fieldName != null) {
+ sb.append(" [字段: ").append(fieldName).append("]");
+ }
+ if (invalidValue != null) {
+ String displayValue = invalidValue.length() > 50
+ ? invalidValue.substring(0, 50) + "..."
+ : invalidValue;
+ sb.append(" [值: ").append(displayValue).append("]");
+ }
+ if (validationRule != null) {
+ sb.append(" [规则: ").append(validationRule).append("]");
+ }
+ return sb.toString();
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/model/Article.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/model/Article.java
new file mode 100644
index 0000000..c593e11
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/model/Article.java
@@ -0,0 +1,99 @@
+package com.example.datacollect.model;
+
+import com.fasterxml.jackson.annotation.JsonCreator;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import java.time.LocalDateTime;
+
+public class Article {
+ private String title;
+ private String url;
+ private String content;
+ private LocalDateTime crawledAt;
+
+ public Article() {
+ this.crawledAt = LocalDateTime.now();
+ }
+
+ public Article(String title, String url, String content) {
+ setTitle(title);
+ setUrl(url);
+ setContent(content);
+ this.crawledAt = LocalDateTime.now();
+ }
+
+ @JsonCreator
+ public Article(@JsonProperty("title") String title,
+ @JsonProperty("url") String url,
+ @JsonProperty("content") String content,
+ @JsonProperty("crawledAt") LocalDateTime crawledAt) {
+ setTitle(title);
+ setUrl(url);
+ setContent(content);
+ this.crawledAt = crawledAt != null ? crawledAt : LocalDateTime.now();
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ if (title == null) {
+ throw new IllegalArgumentException("Title cannot be null");
+ }
+ if (title.trim().isEmpty()) {
+ throw new IllegalArgumentException("Title cannot be empty");
+ }
+ if (title.length() > 500) {
+ throw new IllegalArgumentException("Title cannot exceed 500 characters");
+ }
+ this.title = title.trim();
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ if (url == null) {
+ throw new IllegalArgumentException("URL cannot be null");
+ }
+ if (url.trim().isEmpty()) {
+ throw new IllegalArgumentException("URL cannot be empty");
+ }
+ if (!url.startsWith("http://") && !url.startsWith("https://")) {
+ throw new IllegalArgumentException("URL must start with http:// or https://");
+ }
+ this.url = url.trim();
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ public void setContent(String content) {
+ if (content == null) {
+ this.content = "";
+ } else if (content.length() > 10000) {
+ this.content = content.substring(0, 10000);/* 截断内容到 10000 个字符 */
+ } else {
+ this.content = content;
+ }
+ }
+
+ public LocalDateTime getCrawledAt() {
+ return crawledAt;
+ }
+
+ public void setCrawledAt(LocalDateTime crawledAt) {
+ this.crawledAt = crawledAt;
+ }
+
+ @Override
+ public String toString() {
+ return "Article{"
+ + "title='" + title + '\''
+ + ", url='" + url + '\''
+ + ", crawledAt='" + crawledAt + '\''
+ + '}';
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/ArticleRepository.java
new file mode 100644
index 0000000..4b6b981
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/ArticleRepository.java
@@ -0,0 +1,172 @@
+package com.example.datacollect.repository;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.util.JsonSerializer;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+public class ArticleRepository implements AutoCloseable {
+ private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
+ private static final int MAX_TITLE_LENGTH = 500;
+ private static final int MAX_CONTENT_LENGTH = 10000;
+
+ private final List articles = new ArrayList<>();
+ private final Set urlSet = new HashSet<>();
+
+ public void add(Article article) {
+ if (article == null) {
+ logger.error("Attempted to add null article");
+ throw new IllegalArgumentException("Article cannot be null");
+ }
+
+ String title = article.getTitle();
+ String url = article.getUrl();
+ String content = article.getContent();
+
+ if (title == null || title.trim().isEmpty()) {
+ logger.warn("Attempted to add article with empty title");
+ throw new IllegalArgumentException("Article title cannot be null or empty");
+ }
+
+ if (url == null || url.trim().isEmpty()) {
+ logger.warn("Attempted to add article with empty URL");
+ throw new IllegalArgumentException("Article URL cannot be null or empty");
+ }
+
+ if (title.length() > MAX_TITLE_LENGTH) {
+ logger.warn("Article title too long: {} characters (max: {})", title.length(), MAX_TITLE_LENGTH);
+ throw new IllegalArgumentException("Article title exceeds maximum length of " + MAX_TITLE_LENGTH);
+ }
+
+ if (content != null && content.length() > MAX_CONTENT_LENGTH) {
+ logger.warn("Article content too long: {} characters (max: {})", content.length(), MAX_CONTENT_LENGTH);
+ content = content.substring(0, MAX_CONTENT_LENGTH);
+ }
+
+ if (!url.startsWith("http://") && !url.startsWith("https://")) {
+ logger.warn("Invalid URL format: {}", url);
+ throw new IllegalArgumentException("Article URL must start with http:// or https://");
+ }
+
+ if (urlSet.contains(url)) {
+ logger.warn("Duplicate article URL detected: {}", url);
+ return;
+ }
+
+ Article validatedArticle = new Article(title.trim(), url.trim(), content != null ? content.trim() : "");
+ articles.add(validatedArticle);
+ urlSet.add(url);
+ logger.debug("Added article: {}", title);
+ }
+
+ public void addAll(List articleList) {
+ if (articleList == null) {
+ logger.error("Attempted to add null article list");
+ throw new IllegalArgumentException("Article list cannot be null");
+ }
+
+ int successCount = 0;
+ int skipCount = 0;
+
+ for (Article article : articleList) {
+ if (article != null) {
+ try {
+ add(article);
+ successCount++;
+ } catch (IllegalArgumentException e) {
+ logger.warn("Skipped invalid article: {}", e.getMessage());
+ skipCount++;
+ }
+ } else {
+ logger.warn("Skipped null article in list");
+ skipCount++;
+ }
+ }
+
+ logger.info("Added {} articles, skipped {} invalid articles", successCount, skipCount);
+ }
+
+ public List getAll() {
+ logger.debug("Retrieving all articles, total: {}", articles.size());
+ return Collections.unmodifiableList(articles);
+ }
+
+ public int size() {
+ return articles.size();
+ }
+
+ public void clear() {
+ int count = articles.size();
+ articles.clear();
+ urlSet.clear();
+ logger.info("Cleared repository, removed {} articles", count);
+ }
+
+ public void remove(Article article) {
+ if (article == null) {
+ logger.warn("Attempted to remove null article");
+ return;
+ }
+
+ String url = article.getUrl();
+ if (url != null && urlSet.contains(url)) {
+ articles.remove(article);
+ urlSet.remove(url);
+ logger.debug("Removed article: {}", article.getTitle());
+ } else {
+ logger.warn("Article not found in repository: {}", url);
+ }
+ }
+
+ public Article findByUrl(String url) {
+ if (url == null || url.trim().isEmpty()) {
+ logger.debug("findByUrl called with null or empty URL");
+ return null;
+ }
+
+ for (Article article : articles) {
+ if (article.getUrl().equals(url)) {
+ logger.debug("Found article by URL: {}", url);
+ return article;
+ }
+ }
+
+ logger.debug("No article found with URL: {}", url);
+ return null;
+ }
+
+ public boolean containsUrl(String url) {
+ return url != null && urlSet.contains(url);
+ }
+
+ public void saveToJson(String filePath) throws IOException {
+ JsonSerializer.writeToFile(articles, filePath);
+ logger.info("Saved {} articles to JSON file: {}", articles.size(), filePath);
+ }
+
+ public void loadFromJson(String filePath) throws IOException {
+ List loadedArticles = JsonSerializer.readListFromFile(filePath, Article.class);
+ addAll(loadedArticles);
+ logger.info("Loaded {} articles from JSON file: {}", loadedArticles.size(), filePath);
+ }
+
+ public String toJsonString() {
+ return JsonSerializer.serialize(articles);
+ }
+
+ public String toJsonStringCompact() {
+ return JsonSerializer.serializeCompact(articles);
+ }
+
+ @Override
+ public void close() {
+ logger.debug("ArticleRepository closed");
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/PersistenceManager.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/PersistenceManager.java
new file mode 100644
index 0000000..d85755d
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/PersistenceManager.java
@@ -0,0 +1,182 @@
+package com.example.datacollect.repository;
+
+import com.example.datacollect.exception.ExportException;
+import com.example.datacollect.exception.ImportException;
+import com.example.datacollect.model.Article;
+import com.example.datacollect.util.JsonExporter;
+import com.example.datacollect.util.JsonImporter;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+public class PersistenceManager implements AutoCloseable {
+ private static final Logger logger = LoggerFactory.getLogger(PersistenceManager.class);
+ private static final String DEFAULT_BACKUP_DIR = "data";
+ private static final String DEFAULT_BACKUP_FILE = "articles.json";
+ private static final String BACKUP_FILE_PATTERN = "articles_%s.json";
+ private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss");
+
+ private final ObjectMapper objectMapper;
+ private final Path backupDir;
+ private final Path backupFile;
+ private final ArticleRepository repository;
+ private final AtomicBoolean autoSaveEnabled;
+ private final JsonExporter jsonExporter;
+ private final JsonImporter jsonImporter;
+
+ public PersistenceManager(ArticleRepository repository) {
+ this(repository, DEFAULT_BACKUP_DIR);
+ }
+
+ public PersistenceManager(ArticleRepository repository, String backupDir) {
+ this.repository = repository;
+ this.backupDir = Paths.get(backupDir);
+ this.backupFile = this.backupDir.resolve(DEFAULT_BACKUP_FILE);
+ this.autoSaveEnabled = new AtomicBoolean(true);
+
+ this.objectMapper = new ObjectMapper();
+ this.objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
+
+ this.jsonExporter = new JsonExporter(repository);
+ this.jsonImporter = new JsonImporter(repository);
+
+ ensureBackupDirExists();
+ logger.info("PersistenceManager initialized with backup directory: {}", backupDir);
+ }
+
+ private void ensureBackupDirExists() {
+ try {
+ if (!Files.exists(backupDir)) {
+ Files.createDirectories(backupDir);
+ logger.debug("Created backup directory: {}", backupDir);
+ }
+ } catch (IOException e) {
+ logger.error("Failed to create backup directory: {}", e.getMessage(), e);
+ throw new RuntimeException("Failed to create backup directory", e);
+ }
+ }
+
+ public void save() throws IOException {
+ if (!autoSaveEnabled.get()) {
+ logger.debug("Auto-save is disabled, skipping save");
+ return;
+ }
+
+ List articles = repository.getAll();
+
+ try (BufferedWriter writer = Files.newBufferedWriter(backupFile, StandardCharsets.UTF_8)) {
+ objectMapper.writeValue(writer, articles);
+ logger.info("Successfully saved {} articles to {}", articles.size(), backupFile);
+ }
+ }
+
+ public void load() throws IOException {
+ if (!Files.exists(backupFile)) {
+ logger.info("No backup file found at {}, starting fresh", backupFile);
+ return;
+ }
+
+ try (var reader = Files.newBufferedReader(backupFile, StandardCharsets.UTF_8)) {
+ List articles = objectMapper.readValue(reader,
+ objectMapper.getTypeFactory().constructCollectionType(List.class, Article.class));
+
+ if (articles != null && !articles.isEmpty()) {
+ repository.addAll(articles);
+ logger.info("Successfully loaded {} articles from {}", articles.size(), backupFile);
+ }
+ }
+ }
+
+ public void exportTo(String filePath) throws IOException {
+ try {
+ JsonExporter.ExportOptions options = new JsonExporter.ExportOptions();
+ options.setMode(JsonExporter.ExportMode.MINIMAL);
+ options.setIncludeMetadata(true);
+ jsonExporter.exportToFile(Paths.get(filePath), options);
+ } catch (ExportException e) {
+ throw new IOException("Export failed: " + e.getMessage(), e);
+ }
+ }
+
+ public void importFrom(String filePath) throws IOException {
+ try {
+ JsonImporter.ImportOptions options = new JsonImporter.ImportOptions();
+ options.setDuplicateStrategy(JsonImporter.DuplicateStrategy.SKIP);
+ jsonImporter.importFromFile(Paths.get(filePath), options);
+ } catch (ImportException e) {
+ throw new IOException("Import failed: " + e.getMessage(), e);
+ }
+ }
+
+ public void createSnapshot() throws IOException {
+ String timestamp = LocalDateTime.now().format(DATE_FORMATTER);
+ Path snapshotFile = backupDir.resolve(String.format(BACKUP_FILE_PATTERN, timestamp));
+
+ try {
+ JsonExporter.ExportOptions options = new JsonExporter.ExportOptions();
+ options.setMode(JsonExporter.ExportMode.STANDARD);
+ options.setIncludeMetadata(true);
+ jsonExporter.exportToFile(snapshotFile, options);
+ logger.info("Created snapshot: {} ({} articles)", snapshotFile, repository.size());
+ } catch (ExportException e) {
+ throw new IOException("Failed to create snapshot: " + e.getMessage(), e);
+ }
+ }
+
+ public List listSnapshots() throws IOException {
+ List snapshots = new ArrayList<>();
+
+ if (Files.exists(backupDir)) {
+ try (var stream = Files.list(backupDir)) {
+ stream.filter(path -> {
+ String fileName = path.getFileName().toString();
+ return fileName.startsWith("articles_") && fileName.endsWith(".json") && !fileName.equals(DEFAULT_BACKUP_FILE);
+ }).forEach(path -> snapshots.add(path.toString()));
+ }
+ }
+
+ return snapshots;
+ }
+
+ public void setAutoSaveEnabled(boolean enabled) {
+ autoSaveEnabled.set(enabled);
+ logger.info("Auto-save {} {}", enabled ? "enabled" : "disabled");
+ }
+
+ public boolean isAutoSaveEnabled() {
+ return autoSaveEnabled.get();
+ }
+
+ public String getBackupFilePath() {
+ return backupFile.toString();
+ }
+
+ public JsonImporter.ImportResult importWithReport(String filePath) throws ImportException {
+ JsonImporter.ImportOptions options = new JsonImporter.ImportOptions();
+ options.setDuplicateStrategy(JsonImporter.DuplicateStrategy.SKIP);
+ return jsonImporter.importFromFile(Paths.get(filePath), options);
+ }
+
+ @Override
+ public void close() {
+ try {
+ save();
+ logger.info("PersistenceManager closed, data saved");
+ } catch (IOException e) {
+ logger.error("Failed to save data on close: {}", e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
new file mode 100644
index 0000000..ed69e19
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import java.util.List;
+
+public interface CrawlStrategy {
+ List parse(String url, Document doc) throws ParseException;
+ boolean supports(String url);
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CsdnStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CsdnStrategy.java
new file mode 100644
index 0000000..0635236
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CsdnStrategy.java
@@ -0,0 +1,115 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+public class CsdnStrategy implements CrawlStrategy {
+ private static final Logger logger = LoggerFactory.getLogger(CsdnStrategy.class);
+
+ @Override
+ public boolean supports(String url) {
+ return url.contains("csdn.net");
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ logger.info("Starting to parse CSDN: {}", url);
+ List articles = new ArrayList<>();
+ Set seenUrls = new HashSet<>();
+
+ try {
+ Elements links = doc.select("a[href*='/article/details/']");
+ logger.debug("Found {} article links", links.size());
+
+ if (links.isEmpty()) {
+ links = doc.select("a[href*='csdn.net/article/']");
+ logger.debug("Trying alternative selector, found {} items", links.size());
+ }
+
+ if (links.isEmpty()) {
+ links = doc.select("a.title, a.article-title, .article-item a, .list-item a");
+ logger.debug("Trying fallback selectors, found {} items", links.size());
+ }
+
+ for (Element link : links) {
+ try {
+ String href = link.attr("href");
+ if (href == null || href.isEmpty()) {
+ continue;
+ }
+
+ String articleUrl = link.attr("abs:href");
+ if (articleUrl == null || articleUrl.isEmpty()) {
+ if (!href.startsWith("http")) {
+ if (!href.startsWith("//")) {
+ articleUrl = "https://" + (href.startsWith("/") ? "" : "/") + href;
+ } else {
+ articleUrl = "https:" + href;
+ }
+ } else {
+ articleUrl = href;
+ }
+ }
+
+ if (!articleUrl.contains("csdn.net")) {
+ continue;
+ }
+
+ if (seenUrls.contains(articleUrl)) {
+ continue;
+ }
+ seenUrls.add(articleUrl);
+
+ String title = link.text().trim();
+
+ if (title.isEmpty() || title.length() < 5) {
+ Element titleEl = link.selectFirst("span, h3, h4, .title");
+ if (titleEl != null) {
+ title = titleEl.text().trim();
+ }
+ }
+
+ if (title.isEmpty() || title.length() < 5) {
+ continue;
+ }
+
+ String content = "";
+ Element parent = link.parent();
+ if (parent != null) {
+ Element desc = parent.selectFirst("p.description, .desc, .summary");
+ if (desc != null) {
+ content = desc.text().trim();
+ }
+ }
+
+ Article article = new Article(title, articleUrl, content);
+ articles.add(article);
+ logger.debug("Parsed article: {}", title);
+
+ } catch (Exception e) {
+ logger.debug("Skipping link due to error: {}", e.getMessage());
+ }
+ }
+
+ if (articles.isEmpty()) {
+ logger.warn("No articles found. CSDN page structure may have changed.");
+ }
+
+ logger.info("Successfully parsed {} articles from CSDN", articles.size());
+ return articles;
+ } catch (Exception e) {
+ logger.error("Failed to parse CSDN page: {}", e.getMessage(), e);
+ throw new ParseException("Failed to parse CSDN: " + e.getMessage(), e);
+ }
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
new file mode 100644
index 0000000..6892510
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
@@ -0,0 +1,77 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+
+/* HNU News 策略
+- 添加 logger 成员
+- 添加异常处理
+- 实现防御性编程 */
+public class HnuNewsStrategy implements CrawlStrategy {
+ private static final Logger logger = LoggerFactory.getLogger(HnuNewsStrategy.class);
+
+ @Override
+ public boolean supports(String url) {
+ return url.contains("news.hnu.edu.cn");/* 支持 HNU News 网站 */
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ logger.info("Starting to parse HNU News: {}", url);
+ List articles = new ArrayList<>();/* 存储储解析后的文章 */
+
+ try {
+ Elements listItems = doc.select("ul.list11 li");/* 选择文章列表项 */
+ logger.debug("Found {} list items", listItems.size());/* 记录找到的列表项数量 */
+
+ for (Element li : listItems) {
+ try {
+ Element link = li.selectFirst("a");/* 选择列表项中的链接 */
+ if (link == null) {
+ logger.warn("No link found in list item");/* 记录未找到链接 */
+ continue;
+ }
+
+ String articleUrl = link.attr("href");/* 获取链接的 href 属性值 */
+ if (!articleUrl.startsWith("http")) {
+ articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", "");/* 补全相对路径 */
+ }
+
+ String title = "";/* 存储文章标题 */
+ Element titleEl = link.selectFirst("h4.l2.h4s2");/* 选择标题元素 */
+ if (titleEl != null) {
+ title = titleEl.text().trim();/* 提取标题文本并移除首尾空格 */
+ }
+
+ String content = "";/* 存储文章内容 */
+ Element contentEl = link.selectFirst("p.l3.ps3");/* 选择内容元素 */
+ if (contentEl != null) {
+ content = contentEl.text().trim();/* 提取内容文本并移除首尾空格 */
+ }
+
+ if (!title.isEmpty()) {
+ Article article = new Article(title, articleUrl, content);/* 创建文章对象 */
+ articles.add(article);/* 将文章添加到列表 */
+ } else {
+ logger.warn("Empty title found, skipping article");
+ }
+ } catch (Exception e) {
+ logger.error("Error parsing individual article: {}", e.getMessage());
+ }
+ }
+
+ logger.info("Successfully parsed {} articles from HNU News", articles.size());
+ return articles;
+ } catch (Exception e) {
+ logger.error("Failed to parse HNU News page: {}", e.getMessage(), e);
+ throw new ParseException("Failed to parse HNU News: " + e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java
new file mode 100644
index 0000000..eb25935
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java
@@ -0,0 +1,83 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+/* 人民网策略类 */
+public class PeopleStrategy implements CrawlStrategy {
+ private static final Logger logger = LoggerFactory.getLogger(PeopleStrategy.class);
+
+ @Override
+ public boolean supports(String url) {
+ return url.contains("people.com.cn");/* 检查URL是否包含people.com.cn */
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ logger.info("Starting to parse People's Daily News: {}", url);
+ List articles = new ArrayList<>();/* 初始化文章列表 */
+
+ try {
+ Elements newsItems = doc.select("div.w1000, div.news-item, li.list_item");/* 选择新闻容器 */
+ logger.debug("Found {} news containers", newsItems.size());
+
+ if (newsItems.isEmpty()) {
+ newsItems = doc.select("a[href*='/n1/']");/* 选择替代选择器 */
+ logger.debug("Trying alternative selector, found {} items", newsItems.size());
+ }
+
+ for (Element item : newsItems) {
+ try {
+ Element link = item.selectFirst("a");/* 选择链接元素 */
+ if (link == null) {
+ link = item.tagName().equals("a") ? item : null;/* 检查是否为链接元素 */
+ }
+
+ if (link == null) {
+ logger.warn("No link found in news item");
+ continue;
+ }
+
+ String articleUrl = link.attr("href");/* 获取链接URL */
+ if (!articleUrl.startsWith("http")) {/* 检查是否为绝对URL */
+ if (articleUrl.startsWith("/")) {
+ articleUrl = "https://www.people.com.cn" + articleUrl;
+ } else {
+ articleUrl = "https://www.people.com.cn/" + articleUrl;
+ }
+ }
+
+ String title = link.text().trim();/* 获取标题文本 */
+
+ String content = "";/* 初始化内容文本 */
+ Element contentEl = item.selectFirst("p, div.ed, div.summary");/* 选择内容元素 */
+ if (contentEl != null) {
+ content = contentEl.text().trim();/* 获取内容文本 */
+ }
+
+ if (!title.isEmpty() && title.length() > 5) {
+ Article article = new Article(title, articleUrl, content);/* 创建文章对象 */
+ articles.add(article);/* 添加文章到列表 */
+ logger.debug("Parsed article: {}", title);/* 记录解析文章 */
+ } else {
+ logger.warn("Invalid title found, skipping article");/* 记录无效标题 */
+ }
+ } catch (Exception e) {
+ logger.error("Error parsing individual article: {}", e.getMessage());
+ }
+ }
+
+ logger.info("Successfully parsed {} articles from People's Daily News", articles.size());
+ return articles;
+ } catch (Exception e) {
+ logger.error("Failed to parse People's Daily News page: {}", e.getMessage(), e);
+ throw new ParseException("Failed to parse People's Daily News: " + e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
new file mode 100644
index 0000000..31554d4
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
@@ -0,0 +1,35 @@
+package com.example.datacollect.strategy;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+
+public class StrategyFactory {
+ private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class);
+ private final List strategies = new ArrayList<>();
+
+ public StrategyFactory() {
+ strategies.add(new HnuNewsStrategy());
+ strategies.add(new YouthStrategy());
+ strategies.add(new PeopleStrategy());
+ strategies.add(new CsdnStrategy());
+ logger.info("Initialized StrategyFactory with {} strategies", strategies.size());
+ }
+
+ public CrawlStrategy getStrategy(String url) {
+ for (CrawlStrategy s : strategies) {
+ if (s.supports(url)) {
+ logger.debug("Found strategy {} for URL: {}", s.getClass().getSimpleName(), url);
+ return s;
+ }
+ }
+ logger.warn("No strategy found for URL: {}", url);
+ return null;
+ }
+
+ public void register(CrawlStrategy strategy) {
+ strategies.add(strategy);
+ logger.info("Registered new strategy: {}", strategy.getClass().getSimpleName());
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/YouthStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/YouthStrategy.java
new file mode 100644
index 0000000..946cdc3
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/YouthStrategy.java
@@ -0,0 +1,112 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+/* 青年网新闻解析策略*/
+public class YouthStrategy implements CrawlStrategy {
+ private static final Logger logger = LoggerFactory.getLogger(YouthStrategy.class);
+
+ @Override
+ public boolean supports(String url) {
+ return url.contains("youth.cn");/* 检查URL是否包含青年网域名 */
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ logger.info("Starting to parse Youth News: {}", url);
+ List articles = new ArrayList<>();
+
+ try {
+ Elements newsItems = doc.select("div.news-item, div.article-item, li.news-list-item, div.list-item, ul.list li, .news-list li");
+ logger.debug("Found {} news items with primary selectors", newsItems.size());
+
+ if (newsItems.isEmpty()) {
+ newsItems = doc.select("a[href*='/n1/'], a[href*='/gn/'], a[href*='/qy/'], a[href*='/jj/']");
+ logger.debug("Trying alternative selector (news category links), found {} items", newsItems.size());
+ }
+
+ if (newsItems.isEmpty()) {
+ newsItems = doc.select("a[href$='.html']");
+ logger.debug("Trying fallback selector (html links), found {} items", newsItems.size());
+ }
+
+ for (Element item : newsItems) {
+ try {
+ Element link = item.selectFirst("a");
+ if (link == null) {
+ link = item.tagName().equals("a") ? item : null;
+ }
+
+ if (link == null) {
+ logger.debug("No link found in item, skipping");
+ continue;
+ }
+
+ String articleUrl = link.attr("href");
+
+ if (!articleUrl.startsWith("http")) {
+ if (articleUrl.startsWith("/")) {
+ articleUrl = "https://www.youth.cn" + articleUrl;
+ } else {
+ articleUrl = "https://www.youth.cn/" + articleUrl;
+ }
+ }
+
+ String title = link.text().trim();
+
+ if (title.isEmpty()) {
+ Element titleEl = link.selectFirst("span, h3, h4, .title");
+ if (titleEl != null) {
+ title = titleEl.text().trim();
+ }
+ }
+
+ if (title.isEmpty()) {
+ Element parentTitle = item.selectFirst("span, h3, h4, .title, .news-title");
+ if (parentTitle != null) {
+ title = parentTitle.text().trim();
+ }
+ }
+
+ if (title.isEmpty()) {
+ logger.debug("Empty title found, skipping");
+ continue;
+ }
+
+ String content = "";
+ Element contentEl = item.selectFirst("p.summary, p.desc, div.brief, .summary, .desc");
+ if (contentEl != null) {
+ content = contentEl.text().trim();
+ }
+
+ if (!title.isEmpty() && title.length() > 5) {
+ Article article = new Article(title, articleUrl, content);
+ articles.add(article);
+ logger.debug("Parsed article: {}", title);
+ } else {
+ logger.debug("Invalid title found (length: {}), skipping article", title.length());
+ }
+ } catch (Exception e) {
+ logger.debug("Error parsing individual article: {}", e.getMessage());
+ }
+ }
+
+ if (articles.isEmpty()) {
+ logger.warn("No articles found. Youth.cn page structure may have changed.");
+ }
+
+ logger.info("Successfully parsed {} articles from Youth News", articles.size());
+ return articles;
+ } catch (Exception e) {
+ logger.error("Failed to parse Youth News page: {}", e.getMessage(), e);
+ throw new ParseException("Failed to parse Youth News: " + e.getMessage(), e);
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonExporter.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonExporter.java
new file mode 100644
index 0000000..705fcaf
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonExporter.java
@@ -0,0 +1,261 @@
+package com.example.datacollect.util;
+
+import com.example.datacollect.exception.ExportException;
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedWriter;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.time.LocalDateTime;
+import java.time.format.DateTimeFormatter;
+import java.util.*;
+import java.util.stream.Collectors;
+
+public class JsonExporter {
+ private static final Logger logger = LoggerFactory.getLogger(JsonExporter.class);
+ private static final String VERSION = "1.0";
+ private static final DateTimeFormatter EXPORT_TIME_FORMAT = DateTimeFormatter.ISO_LOCAL_DATE_TIME;
+
+ public enum ExportMode {
+ STANDARD,
+ COMPACT,
+ MINIMAL
+ }
+
+ public static class ExportOptions {
+ private ExportMode mode = ExportMode.STANDARD;
+ private String filterKeyword;
+ private LocalDateTime startDate;
+ private LocalDateTime endDate;
+ private boolean includeMetadata = true;
+
+ public ExportOptions() {}
+
+ public ExportMode getMode() {
+ return mode;
+ }
+
+ public void setMode(ExportMode mode) {
+ this.mode = mode;
+ }
+
+ public String getFilterKeyword() {
+ return filterKeyword;
+ }
+
+ public void setFilterKeyword(String filterKeyword) {
+ this.filterKeyword = filterKeyword;
+ }
+
+ public LocalDateTime getStartDate() {
+ return startDate;
+ }
+
+ public void setStartDate(LocalDateTime startDate) {
+ this.startDate = startDate;
+ }
+
+ public LocalDateTime getEndDate() {
+ return endDate;
+ }
+
+ public void setEndDate(LocalDateTime endDate) {
+ this.endDate = endDate;
+ }
+
+ public boolean isIncludeMetadata() {
+ return includeMetadata;
+ }
+
+ public void setIncludeMetadata(boolean includeMetadata) {
+ this.includeMetadata = includeMetadata;
+ }
+ }
+
+ public static class ExportMetadata {
+ private String exportTime;
+ private int totalCount;
+ private String source;
+ private String exportMode;
+ private String version;
+
+ public ExportMetadata() {}
+
+ public String getExportTime() {
+ return exportTime;
+ }
+
+ public void setExportTime(String exportTime) {
+ this.exportTime = exportTime;
+ }
+
+ public int getTotalCount() {
+ return totalCount;
+ }
+
+ public void setTotalCount(int totalCount) {
+ this.totalCount = totalCount;
+ }
+
+ public String getSource() {
+ return source;
+ }
+
+ public void setSource(String source) {
+ this.source = source;
+ }
+
+ public String getExportMode() {
+ return exportMode;
+ }
+
+ public void setExportMode(String exportMode) {
+ this.exportMode = exportMode;
+ }
+
+ public String getVersion() {
+ return version;
+ }
+
+ public void setVersion(String version) {
+ this.version = version;
+ }
+ }
+
+ private final ArticleRepository repository;
+ private final ObjectMapper objectMapper;
+
+ public JsonExporter(ArticleRepository repository) {
+ this.repository = repository;
+ this.objectMapper = new ObjectMapper();
+ }
+
+ public void exportToFile(Path targetPath) throws ExportException {
+ exportToFile(targetPath, new ExportOptions());
+ }
+
+ public void exportToFile(Path targetPath, ExportOptions options) throws ExportException {
+ logger.info("开始导出到文件: {}, 模式: {}", targetPath, options.getMode());
+
+ validateTargetPath(targetPath);
+
+ try {
+ List articles = getFilteredArticles(options);
+ logger.debug("过滤后待导出文章数: {}", articles.size());
+
+ String json = generateJson(articles, options);
+
+ try (BufferedWriter writer = Files.newBufferedWriter(targetPath, StandardCharsets.UTF_8)) {
+ writer.write(json);
+ }
+
+ logger.info("成功导出 {} 篇文章到: {}", articles.size(), targetPath);
+ } catch (IOException e) {
+ logger.error("导出文件失败: {}", e.getMessage(), e);
+ throw new ExportException("无法写入导出文件: " + e.getMessage(), targetPath.toString(), e);
+ }
+ }
+
+ public String exportToString() throws ExportException {
+ return exportToString(new ExportOptions());
+ }
+
+ public String exportToString(ExportOptions options) throws ExportException {
+ List articles = getFilteredArticles(options);
+ return generateJson(articles, options);
+ }
+
+ private List getFilteredArticles(ExportOptions options) {
+ List articles = repository.getAll();
+
+ if (options.getFilterKeyword() != null && !options.getFilterKeyword().trim().isEmpty()) {
+ String keyword = options.getFilterKeyword().toLowerCase();
+ articles = articles.stream()
+ .filter(a -> a.getTitle().toLowerCase().contains(keyword)
+ || a.getContent().toLowerCase().contains(keyword))
+ .collect(Collectors.toList());
+ logger.debug("关键词过滤后剩余文章数: {}", articles.size());
+ }
+
+ return articles;
+ }
+
+ private String generateJson(List articles, ExportOptions options) throws ExportException {
+ try {
+ Map output = new LinkedHashMap<>();
+
+ if (options.isIncludeMetadata() && options.getMode() != ExportMode.MINIMAL) {
+ ExportMetadata metadata = new ExportMetadata();
+ metadata.setExportTime(LocalDateTime.now().format(EXPORT_TIME_FORMAT));
+ metadata.setTotalCount(articles.size());
+ metadata.setSource("CLI Crawler v" + VERSION);
+ metadata.setExportMode(options.getMode().name());
+ metadata.setVersion(VERSION);
+ output.put("metadata", metadata);
+ }
+
+ output.put("articles", articles);
+
+ if (options.getMode() == ExportMode.STANDARD) {
+ return objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(output);
+ } else {
+ return objectMapper.writeValueAsString(output);
+ }
+ } catch (Exception e) {
+ logger.error("生成JSON失败: {}", e.getMessage(), e);
+ throw new ExportException("无法生成JSON: " + e.getMessage(), e);
+ }
+ }
+
+ private void validateTargetPath(Path targetPath) throws ExportException {
+ if (targetPath == null) {
+ throw new ExportException("导出路径不能为空");
+ }
+
+ Path parent = targetPath.getParent();
+ if (parent != null && !Files.exists(parent)) {
+ try {
+ Files.createDirectories(parent);
+ logger.info("创建导出目录: {}", parent);
+ } catch (IOException e) {
+ throw new ExportException("无法创建导出目录: " + parent, e);
+ }
+ }
+ }
+
+ public List exportWithSnapshots(String baseDir) throws ExportException {
+ logger.info("开始批量导出快照到目录: {}", baseDir);
+
+ List exportedFiles = new ArrayList<>();
+ Path basePath = Path.of(baseDir);
+
+ try {
+ if (!Files.exists(basePath)) {
+ Files.createDirectories(basePath);
+ }
+
+ ExportOptions standardOptions = new ExportOptions();
+ standardOptions.setMode(ExportMode.STANDARD);
+ standardOptions.setIncludeMetadata(true);
+
+ String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"));
+ Path snapshotPath = basePath.resolve("snapshot_" + timestamp + ".json");
+ exportToFile(snapshotPath, standardOptions);
+ exportedFiles.add(snapshotPath);
+
+ logger.info("批量导出完成,共导出 {} 个文件", exportedFiles.size());
+ } catch (Exception e) {
+ logger.error("批量导出失败: {}", e.getMessage(), e);
+ throw new ExportException("批量导出失败: " + e.getMessage(), e);
+ }
+
+ return exportedFiles;
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonImporter.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonImporter.java
new file mode 100644
index 0000000..740083a
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonImporter.java
@@ -0,0 +1,386 @@
+package com.example.datacollect.util;
+
+import com.example.datacollect.exception.DuplicateArticleException;
+import com.example.datacollect.exception.ImportException;
+import com.example.datacollect.exception.ValidationException;
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.*;
+import java.time.LocalDateTime;
+import java.util.regex.Pattern;
+
+public class JsonImporter {
+ private static final Logger logger = LoggerFactory.getLogger(JsonImporter.class);
+
+ private static final Pattern URL_PATTERN = Pattern.compile("^https?://.*");
+ private static final int MAX_TITLE_LENGTH = 500;
+ private static final int MAX_CONTENT_LENGTH = 10000;
+
+ public enum DuplicateStrategy {
+ SKIP,
+ OVERWRITE,
+ ERROR
+ }
+
+ public static class ImportOptions {
+ private DuplicateStrategy duplicateStrategy = DuplicateStrategy.SKIP;
+ private boolean validateUrl = true;
+ private boolean validateTitle = true;
+ private boolean skipInvalid = true;
+ private int maxContentLength = MAX_CONTENT_LENGTH;
+
+ public ImportOptions() {}
+
+ public DuplicateStrategy getDuplicateStrategy() {
+ return duplicateStrategy;
+ }
+
+ public void setDuplicateStrategy(DuplicateStrategy duplicateStrategy) {
+ this.duplicateStrategy = duplicateStrategy;
+ }
+
+ public boolean isValidateUrl() {
+ return validateUrl;
+ }
+
+ public void setValidateUrl(boolean validateUrl) {
+ this.validateUrl = validateUrl;
+ }
+
+ public boolean isValidateTitle() {
+ return validateTitle;
+ }
+
+ public void setValidateTitle(boolean validateTitle) {
+ this.validateTitle = validateTitle;
+ }
+
+ public boolean isSkipInvalid() {
+ return skipInvalid;
+ }
+
+ public void setSkipInvalid(boolean skipInvalid) {
+ this.skipInvalid = skipInvalid;
+ }
+
+ public int getMaxContentLength() {
+ return maxContentLength;
+ }
+
+ public void setMaxContentLength(int maxContentLength) {
+ this.maxContentLength = maxContentLength;
+ }
+ }
+
+ public static class ImportResult {
+ private int totalFound;
+ private int imported;
+ private int skipped;
+ private int invalid;
+ private int overwritten;
+ private List errors;
+ private List warnings;
+
+ public ImportResult() {
+ this.errors = new ArrayList<>();
+ this.warnings = new ArrayList<>();
+ }
+
+ public int getTotalFound() {
+ return totalFound;
+ }
+
+ public void setTotalFound(int totalFound) {
+ this.totalFound = totalFound;
+ }
+
+ public int getImported() {
+ return imported;
+ }
+
+ public void setImported(int imported) {
+ this.imported = imported;
+ }
+
+ public int getSkipped() {
+ return skipped;
+ }
+
+ public void setSkipped(int skipped) {
+ this.skipped = skipped;
+ }
+
+ public int getInvalid() {
+ return invalid;
+ }
+
+ public void setInvalid(int invalid) {
+ this.invalid = invalid;
+ }
+
+ public int getOverwritten() {
+ return overwritten;
+ }
+
+ public void setOverwritten(int overwritten) {
+ this.overwritten = overwritten;
+ }
+
+ public List getErrors() {
+ return errors;
+ }
+
+ public void addError(String error) {
+ this.errors.add(error);
+ }
+
+ public List getWarnings() {
+ return warnings;
+ }
+
+ public void addWarning(String warning) {
+ this.warnings.add(warning);
+ }
+
+ public String getSummary() {
+ return String.format(
+ "导入完成: 总共找到=%d, 成功导入=%d, 跳过=%d, 无效=%d, 覆盖=%d, 错误=%d",
+ totalFound, imported, skipped, invalid, overwritten, errors.size()
+ );
+ }
+ }
+
+ private final ArticleRepository repository;
+ private final ObjectMapper objectMapper;
+
+ public JsonImporter(ArticleRepository repository) {
+ this.repository = repository;
+ this.objectMapper = new ObjectMapper();
+ }
+
+ public ImportResult importFromFile(Path sourcePath) throws ImportException {
+ return importFromFile(sourcePath, new ImportOptions());
+ }
+
+ public ImportResult importFromFile(Path sourcePath, ImportOptions options) throws ImportException {
+ logger.info("开始从文件导入: {}", sourcePath);
+ validateSourcePath(sourcePath);
+
+ ImportResult result = new ImportResult();
+
+ try {
+ String content = readFileContent(sourcePath);
+ List articles = parseArticles(content, result);
+ result.setTotalFound(articles.size());
+
+ logger.debug("解析到 {} 篇文章", articles.size());
+
+ for (int i = 0; i < articles.size(); i++) {
+ Article article = articles.get(i);
+ try {
+ processArticle(article, options, result, i);
+ } catch (ValidationException e) {
+ logger.warn("文章验证失败 [位置 {}]: {}", i, e.getMessage());
+ result.addError("无效文章 at index " + i + ": " + e.getMessage());
+ result.setInvalid(result.getInvalid() + 1);
+ if (!options.isSkipInvalid()) {
+ throw new ImportException("文章验证失败: " + e.getMessage(), sourcePath.toString(), i);
+ }
+ } catch (DuplicateArticleException e) {
+ logger.warn("重复文章 [位置 {}]: {}", i, e.getMessage());
+ result.setSkipped(result.getSkipped() + 1);
+ }
+ }
+
+ logger.info("导入完成: {}", result.getSummary());
+
+ } catch (IOException e) {
+ logger.error("读取文件失败: {}", e.getMessage(), e);
+ throw new ImportException("无法读取导入文件: " + e.getMessage(), sourcePath.toString(), e);
+ } catch (ImportException e) {
+ throw e;
+ } catch (Exception e) {
+ logger.error("导入过程出错: {}", e.getMessage(), e);
+ throw new ImportException("导入失败: " + e.getMessage(), sourcePath.toString(), e);
+ }
+
+ return result;
+ }
+
+ public List parseArticles(String json) throws ImportException {
+ ImportResult result = new ImportResult();
+ return parseArticles(json, result);
+ }
+
+ private List parseArticles(String json, ImportResult result) throws ImportException {
+ try {
+ Map data = objectMapper.readValue(json, Map.class);
+
+ List> articlesList = null;
+ if (data.containsKey("articles")) {
+ articlesList = (List>) data.get("articles");
+ } else if (data.containsKey("data")) {
+ articlesList = (List>) data.get("data");
+ } else if (data instanceof List) {
+ articlesList = (List>) data;
+ }
+
+ if (articlesList == null) {
+ throw new ImportException("JSON格式错误:未找到 'articles' 或 'data' 字段");
+ }
+
+ List articles = new ArrayList<>();
+ for (int i = 0; i < articlesList.size(); i++) {
+ try {
+ Object item = articlesList.get(i);
+ if (item instanceof Map) {
+ Article article = mapToArticle((Map, ?>) item, i);
+ articles.add(article);
+ }
+ } catch (Exception e) {
+ logger.warn("解析第 {} 篇文章失败: {}", i, e.getMessage());
+ result.addError("解析失败 at index " + i + ": " + e.getMessage());
+ }
+ }
+
+ return articles;
+ } catch (ImportException e) {
+ throw e;
+ } catch (Exception e) {
+ logger.error("JSON解析失败: {}", e.getMessage(), e);
+ throw new ImportException("JSON解析失败: " + e.getMessage(), e);
+ }
+ }
+
+ @SuppressWarnings("unchecked")
+ private Article mapToArticle(Map, ?> map, int index) throws ValidationException {
+ String title = (String) map.get("title");
+ String url = (String) map.get("url");
+ String content = (String) map.get("content");
+ Object crawledAtObj = map.get("crawledAt");
+ LocalDateTime crawledAt = null;
+
+ if (crawledAtObj != null) {
+ try {
+ if (crawledAtObj instanceof String) {
+ crawledAt = LocalDateTime.parse((String) crawledAtObj);
+ }
+ } catch (Exception e) {
+ logger.warn("无法解析 crawledAt 字段: {}, 使用默认值", crawledAtObj);
+ }
+ }
+
+ if (title == null || title.trim().isEmpty()) {
+ throw new ValidationException("标题不能为空", "title", null, "非空字符串");
+ }
+
+ if (url == null || url.trim().isEmpty()) {
+ throw new ValidationException("URL不能为空", "url", null, "非空字符串");
+ }
+
+ if (content == null) {
+ content = "";
+ }
+
+ return new Article(title.trim(), url.trim(), content.trim(), crawledAt);
+ }
+
+ private void processArticle(Article article, ImportOptions options, ImportResult result, int index)
+ throws ValidationException, DuplicateArticleException {
+
+ if (options.isValidateTitle() && article.getTitle().length() > MAX_TITLE_LENGTH) {
+ throw new ValidationException(
+ "标题过长: 最大" + MAX_TITLE_LENGTH + "字符",
+ "title",
+ article.getTitle(),
+ "长度 <= " + MAX_TITLE_LENGTH
+ );
+ }
+
+ if (options.isValidateUrl() && !URL_PATTERN.matcher(article.getUrl()).matches()) {
+ throw new ValidationException(
+ "URL格式无效: " + article.getUrl(),
+ "url",
+ article.getUrl(),
+ "必须以 http:// 或 https:// 开头"
+ );
+ }
+
+ Article existing = repository.findByUrl(article.getUrl());
+ if (existing != null) {
+ switch (options.getDuplicateStrategy()) {
+ case SKIP:
+ logger.debug("跳过重复文章: {}", article.getUrl());
+ throw new DuplicateArticleException("文章URL已存在: " + article.getUrl(), article.getUrl());
+
+ case OVERWRITE:
+ logger.debug("覆盖重复文章: {}", article.getUrl());
+ repository.remove(existing);
+ repository.add(article);
+ result.setOverwritten(result.getOverwritten() + 1);
+ result.setImported(result.getImported() + 1);
+ return;
+
+ case ERROR:
+ throw new DuplicateArticleException(
+ "发现重复URL: " + article.getUrl(),
+ article.getUrl(),
+ repository.getAll().indexOf(existing)
+ );
+ }
+ }
+
+ String content = article.getContent();
+ if (content.length() > options.getMaxContentLength()) {
+ content = content.substring(0, options.getMaxContentLength());
+ logger.debug("文章内容已截断到 {} 字符: {}", options.getMaxContentLength(), article.getTitle());
+ }
+
+ repository.add(article);
+ result.setImported(result.getImported() + 1);
+ logger.debug("成功导入文章: {}", article.getTitle());
+ }
+
+ private String readFileContent(Path sourcePath) throws IOException {
+ StringBuilder content = new StringBuilder();
+ try (BufferedReader reader = Files.newBufferedReader(sourcePath, StandardCharsets.UTF_8)) {
+ String line;
+ while ((line = reader.readLine()) != null) {
+ content.append(line).append("\n");
+ }
+ }
+ return content.toString();
+ }
+
+ private void validateSourcePath(Path sourcePath) throws ImportException {
+ if (sourcePath == null) {
+ throw new ImportException("导入路径不能为空");
+ }
+
+ if (!Files.exists(sourcePath)) {
+ throw new ImportException("导入文件不存在: " + sourcePath, sourcePath.toString());
+ }
+
+ if (!Files.isReadable(sourcePath)) {
+ throw new ImportException("文件不可读: " + sourcePath, sourcePath.toString());
+ }
+
+ try {
+ long size = Files.size(sourcePath);
+ if (size > 100 * 1024 * 1024) {
+ logger.warn("导入文件较大 ({} MB),处理可能较慢", size / (1024 * 1024));
+ }
+ } catch (IOException e) {
+ logger.warn("无法获取文件大小: {}", e.getMessage());
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonSerializer.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonSerializer.java
new file mode 100644
index 0000000..c1f606b
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonSerializer.java
@@ -0,0 +1,81 @@
+package com.example.datacollect.util;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+public class JsonSerializer {
+ private static final Logger logger = LoggerFactory.getLogger(JsonSerializer.class);
+ private static final ObjectMapper objectMapper = new ObjectMapper();
+
+ static {
+ objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
+ }
+
+ private JsonSerializer() {
+ }
+
+ public static String serialize(T obj) {
+ try {
+ return objectMapper.writeValueAsString(obj);
+ } catch (Exception e) {
+ logger.error("Failed to serialize object", e);
+ throw new RuntimeException("Failed to serialize object", e);
+ }
+ }
+
+ public static String serializeCompact(T obj) {
+ try {
+ ObjectMapper compactMapper = new ObjectMapper();
+ return compactMapper.writeValueAsString(obj);
+ } catch (Exception e) {
+ logger.error("Failed to serialize object (compact)", e);
+ throw new RuntimeException("Failed to serialize object", e);
+ }
+ }
+
+ public static T deserialize(String json, Class clazz) {
+ try {
+ return objectMapper.readValue(json, clazz);
+ } catch (Exception e) {
+ logger.error("Failed to deserialize object", e);
+ throw new RuntimeException("Failed to deserialize object", e);
+ }
+ }
+
+ public static List deserializeList(String json, Class clazz) {
+ try {
+ return objectMapper.readValue(json,
+ objectMapper.getTypeFactory().constructCollectionType(List.class, clazz));
+ } catch (Exception e) {
+ logger.error("Failed to deserialize list", e);
+ throw new RuntimeException("Failed to deserialize list", e);
+ }
+ }
+
+ public static void writeToFile(T obj, String filePath) throws IOException {
+ File file = new File(filePath);
+ objectMapper.writeValue(file, obj);
+ logger.debug("Successfully wrote object to file: {}", filePath);
+ }
+
+ public static T readFromFile(String filePath, Class clazz) throws IOException {
+ File file = new File(filePath);
+ T obj = objectMapper.readValue(file, clazz);
+ logger.debug("Successfully read object from file: {}", filePath);
+ return obj;
+ }
+
+ public static List readListFromFile(String filePath, Class clazz) throws IOException {
+ File file = new File(filePath);
+ List list = objectMapper.readValue(file,
+ objectMapper.getTypeFactory().constructCollectionType(List.class, clazz));
+ logger.debug("Successfully read list from file: {}", filePath);
+ return list;
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/RetryUtils.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/RetryUtils.java
new file mode 100644
index 0000000..d749419
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/RetryUtils.java
@@ -0,0 +1,39 @@
+package com.example.datacollect.util;
+
+import com.example.datacollect.exception.NetworkException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.concurrent.Callable;
+
+public class RetryUtils {
+ private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class);
+
+ private static final int MAX_RETRIES = 3;
+ private static final long BASE_DELAY_MS = 500;
+
+ public static T executeWithRetry(Callable task) throws NetworkException {
+ Exception lastException = null;
+
+ for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+ try {
+ if (attempt > 0) {
+ long waitTime = BASE_DELAY_MS * (long) Math.pow(2, attempt - 1);
+ logger.info("重试 {}/{} 次,等待 {} ms", attempt, MAX_RETRIES, waitTime);
+ Thread.sleep(waitTime);
+ }
+
+ return task.call();
+ } catch (Exception e) {
+ lastException = e;
+ logger.warn("第 {} 次尝试失败: {}", attempt + 1, e.getMessage());
+
+ if (attempt < MAX_RETRIES) {
+ continue;
+ }
+ }
+ }
+
+ logger.error("所有 {} 次重试均失败", MAX_RETRIES + 1);
+ throw new NetworkException("网络错误,已重试三次", lastException);
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/view/ConsoleView.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/view/ConsoleView.java
new file mode 100644
index 0000000..a26e19c
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/view/ConsoleView.java
@@ -0,0 +1,52 @@
+package com.example.datacollect.view;
+
+import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.List;
+import java.util.Scanner;
+
+public class ConsoleView implements AutoCloseable {
+ private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class);
+ private static final String ANSI_RESET = "\u001B[0m";
+ private static final String ANSI_GREEN = "\u001B[32m";
+ private static final String ANSI_RED = "\u001B[31m";
+ private static final String ANSI_BLUE = "\u001B[34m";
+
+ private final Scanner scanner = new Scanner(System.in);
+
+ @Override
+ public void close() {
+ scanner.close();/* 关闭扫描器,释放资源 */
+ logger.debug("ConsoleView closed");
+ }
+
+ public String readLine() {
+ System.out.print("> ");
+ String input = scanner.nextLine();
+ return input;/* 返回用户输入 */
+ }
+
+ public void printSuccess(String msg) {
+ System.out.println(ANSI_GREEN + msg + ANSI_RESET);
+ }
+
+ public void printError(String msg) {
+ System.out.println(ANSI_RED + msg + ANSI_RESET);
+ }
+
+ public void printInfo(String msg) {
+ System.out.println(ANSI_BLUE + msg + ANSI_RESET);
+ }
+
+ public void display(List articles) {
+ if (articles.isEmpty()) {
+ printInfo("暂无文章,请先执行 crawl。");
+ return;
+ }
+ for (int i = 0; i < articles.size(); i++) {
+ Article a = articles.get(i);/* 获取文章 */
+ System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());/* 打印文章标题和URL */
+ }
+ }
+}
diff --git a/project/java-cli-期末课程项目/src/main/resources/logback.xml b/project/java-cli-期末课程项目/src/main/resources/logback.xml
new file mode 100644
index 0000000..221a083
--- /dev/null
+++ b/project/java-cli-期末课程项目/src/main/resources/logback.xml
@@ -0,0 +1,25 @@
+
+
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/crawler.log
+
+ logs/crawler.%d{yyyy-MM-dd}.log
+ 30
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+
+
+
+
+
diff --git a/project/java-cli-期末课程项目/target/classes/logback.xml b/project/java-cli-期末课程项目/target/classes/logback.xml
new file mode 100644
index 0000000..221a083
--- /dev/null
+++ b/project/java-cli-期末课程项目/target/classes/logback.xml
@@ -0,0 +1,25 @@
+
+
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/crawler.log
+
+ logs/crawler.%d{yyyy-MM-dd}.log
+ 30
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+
+
+
+
+
diff --git a/project/java-cli-期末课程项目/target/maven-archiver/pom.properties b/project/java-cli-期末课程项目/target/maven-archiver/pom.properties
new file mode 100644
index 0000000..5c1de34
--- /dev/null
+++ b/project/java-cli-期末课程项目/target/maven-archiver/pom.properties
@@ -0,0 +1,3 @@
+artifactId=datacollect-cli
+groupId=com.example
+version=0.1.0
diff --git a/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
new file mode 100644
index 0000000..e69de29
diff --git a/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
new file mode 100644
index 0000000..0ccfd6d
--- /dev/null
+++ b/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
@@ -0,0 +1,32 @@
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\repository\PersistenceManager.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ExitCommand.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\CrawlCommand.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ExportException.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ExportCommand.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ImportCommand.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ImportException.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\DuplicateArticleException.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\CrawlerException.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\Command.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\model\Article.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\PeopleStrategy.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\NetworkException.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\controller\CrawlerController.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\StrategyFactory.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\JsonImporter.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\RetryUtils.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ListCommand.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\Main.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\UrlFormatException.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\JsonSerializer.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ParseException.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\YouthStrategy.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\CsdnStrategy.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\HelpCommand.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\repository\ArticleRepository.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ValidationException.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\view\ConsoleView.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\JsonExporter.java
+C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\AnalyzeCommand.java
diff --git a/project/java-cli-期末课程项目/test_crawler.ps1 b/project/java-cli-期末课程项目/test_crawler.ps1
new file mode 100644
index 0000000..3ad50ef
--- /dev/null
+++ b/project/java-cli-期末课程项目/test_crawler.ps1
@@ -0,0 +1,92 @@
+$ErrorActionPreference = "Continue"
+
+Write-Host "=== 测试 CLI 爬虫程序 ===" -ForegroundColor Cyan
+
+# 测试1: 启动程序并显示帮助
+Write-Host "`n1. 测试帮助命令..." -ForegroundColor Yellow
+$helpOutput = echo "help" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
+if ($LASTEXITCODE -ne 0) {
+ Write-Host "帮助命令执行失败" -ForegroundColor Red
+ Write-Host $helpOutput
+} else {
+ Write-Host "帮助命令执行成功" -ForegroundColor Green
+ Write-Host $helpOutput | Select-Object -First 15
+}
+
+# 测试2: 测试 list 命令(空列表)
+Write-Host "`n2. 测试 list 命令(空列表)..." -ForegroundColor Yellow
+$listOutput = echo "list" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
+if ($LASTEXITCODE -ne 0) {
+ Write-Host "list 命令执行失败" -ForegroundColor Red
+ Write-Host $listOutput
+} else {
+ Write-Host "list 命令执行成功" -ForegroundColor Green
+}
+
+# 测试3: 测试 Juejin 策略
+Write-Host "`n3. 测试 Juejin 策略..." -ForegroundColor Yellow
+$juejinOutput = @("crawl https://juejin.cn/", "list", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
+if ($LASTEXITCODE -ne 0) {
+ Write-Host "Juejin 策略测试失败" -ForegroundColor Red
+ Write-Host $juejinOutput | Select-Object -Last 10
+} else {
+ $articleCount = ($juejinOutput | Select-String "Crawled" | ForEach-Object { $_.Line -replace "Crawled (\d+) articles\.", '$1' })
+ Write-Host "Juejin 策略测试成功 - 爬取到 $articleCount 篇文章" -ForegroundColor Green
+}
+
+# 测试4: 测试 HnuNews 策略
+Write-Host "`n4. 测试 HnuNews 策略..." -ForegroundColor Yellow
+$hnuOutput = @("crawl https://news.hnu.edu.cn/", "list", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
+if ($LASTEXITCODE -ne 0) {
+ Write-Host "HnuNews 策略测试失败" -ForegroundColor Red
+ Write-Host $hnuOutput | Select-Object -Last 10
+} else {
+ $articleCount = ($hnuOutput | Select-String "Crawled" | ForEach-Object { $_.Line -replace "Crawled (\d+) articles\.", '$1' })
+ Write-Host "HnuNews 策略测试成功 - 爬取到 $articleCount 篇文章" -ForegroundColor Green
+}
+
+# 测试5: 测试导出功能
+Write-Host "`n5. 测试导出功能..." -ForegroundColor Yellow
+$exportOutput = @("crawl https://juejin.cn/", "export test_export.json", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
+if (-not (Test-Path "test_export.json")) {
+ Write-Host "导出功能测试失败" -ForegroundColor Red
+ Write-Host $exportOutput | Select-Object -Last 10
+} else {
+ $fileSize = (Get-Item "test_export.json").Length
+ Write-Host "导出功能测试成功 - 文件大小: $fileSize 字节" -ForegroundColor Green
+ Remove-Item "test_export.json" -Force
+}
+
+# 测试6: 测试导入功能
+Write-Host "`n6. 测试导入功能..." -ForegroundColor Yellow
+@("crawl https://juejin.cn/", "export import_test.json", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 | Out-Null
+$importOutput = @("import import_test.json", "list", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
+if ($LASTEXITCODE -ne 0) {
+ Write-Host "导入功能测试失败" -ForegroundColor Red
+ Write-Host $importOutput | Select-Object -Last 10
+} else {
+ Write-Host "导入功能测试成功" -ForegroundColor Green
+ Remove-Item "import_test.json" -Force
+}
+
+# 测试7: 测试未知命令
+Write-Host "`n7. 测试未知命令处理..." -ForegroundColor Yellow
+$unknownOutput = echo "unknown_command" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
+if ($unknownOutput -match "Unknown command") {
+ Write-Host "未知命令处理测试成功" -ForegroundColor Green
+} else {
+ Write-Host "未知命令处理测试失败" -ForegroundColor Red
+}
+
+# 测试8: 测试会话持久化(退出后重新启动)
+Write-Host "`n8. 测试会话持久化..." -ForegroundColor Yellow
+@("crawl https://juejin.cn/", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 | Out-Null
+$restoreOutput = echo "list" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1
+if ($restoreOutput -match "Loaded") {
+ Write-Host "会话持久化测试成功" -ForegroundColor Green
+} else {
+ Write-Host "会话持久化测试失败" -ForegroundColor Red
+ Write-Host $restoreOutput | Select-Object -Last 5
+}
+
+Write-Host "`n=== 测试完成 ===" -ForegroundColor Cyan
\ No newline at end of file
diff --git a/project/java-cli-期末课程项目/test_export.json b/project/java-cli-期末课程项目/test_export.json
new file mode 100644
index 0000000..a3ea8df
--- /dev/null
+++ b/project/java-cli-期末课程项目/test_export.json
@@ -0,0 +1,17 @@
+[ {
+ "title" : "7月1日起施行 超龄劳动者迎来权益保障新规",
+ "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40727022.html",
+ "content" : ""
+}, {
+ "title" : "经港珠澳大桥出入境港澳单牌车总量突破1000万辆次",
+ "url" : "http://gba.people.cn/n1/2026/0525/c42272-40726946.html",
+ "content" : ""
+}, {
+ "title" : "外交部谈美伊谈判",
+ "url" : "http://world.people.com.cn/n1/2026/0525/c1002-40726926.html",
+ "content" : ""
+}, {
+ "title" : "重庆发布今年首个地质灾害红色预警",
+ "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40726849.html",
+ "content" : ""
+} ]
\ No newline at end of file
diff --git a/project/java-cli-期末课程项目/test_import_export.ps1 b/project/java-cli-期末课程项目/test_import_export.ps1
new file mode 100644
index 0000000..c7074af
--- /dev/null
+++ b/project/java-cli-期末课程项目/test_import_export.ps1
@@ -0,0 +1,236 @@
+# Test Script for CLI Crawler - Data Import/Export Features
+# This script automates the test sequence
+
+$ErrorActionPreference = "Stop"
+$env:JAVA_HOME = "C:\Program Files\Java\latest\jdk-25"
+$APP_JAR = "target\datacollect-cli-0.1.0-jar-with-dependencies.jar"
+$TEST_EXPORT_FILE = "data\test_export.json"
+$USERPROFILE_PATH = "$env:USERPROFILE\.datacollect"
+
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "CLI Crawler - Import/Export Test Suite" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host ""
+
+# Clean up function
+function Clean-Up {
+ Write-Host "[CLEANUP] Removing old data files..." -ForegroundColor Yellow
+ if (Test-Path $USERPROFILE_PATH) {
+ Remove-Item "$USERPROFILE_PATH\*" -Force -Recurse -ErrorAction SilentlyContinue
+ }
+ if (Test-Path $TEST_EXPORT_FILE) {
+ Remove-Item $TEST_EXPORT_FILE -Force -ErrorAction SilentlyContinue
+ }
+}
+
+# Run CLI command function
+function Run-CLI {
+ param([string]$Commands)
+ $commandsArray = $Commands -split "`n"
+ foreach ($cmd in $commandsArray) {
+ $cmd = $cmd.Trim()
+ if ($cmd -ne "") {
+ Write-Host "[CLI] $cmd" -ForegroundColor Gray
+ $result = & java -jar $APP_JAR $cmd 2>&1
+ Write-Host $result -ForegroundColor Green
+ Write-Host ""
+ }
+ }
+}
+
+# Step 1: Initial Cleanup
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 1: Initial Cleanup" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Clean-Up
+Write-Host ""
+
+# Step 2: Crawl some data
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 2: Crawl Data (CSDN)" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: crawl https://www.csdn.net/" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "crawl https://www.csdn.net/" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+Start-Sleep -Seconds 2
+
+# Step 3: List articles
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 3: List Articles" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: list" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "list" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+Start-Sleep -Seconds 1
+
+# Step 4: Export to JSON
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 4: Export to JSON" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: export data\test_export.json --format json" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "export data\test_export.json --format json" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+Start-Sleep -Seconds 1
+
+# Step 5: Check JSON file
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 5: Check Exported JSON File" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+if (Test-Path $TEST_EXPORT_FILE) {
+ Write-Host "[SUCCESS] JSON file created: $TEST_EXPORT_FILE" -ForegroundColor Green
+ Write-Host ""
+ Write-Host "JSON File Content Preview (first 1500 chars):" -ForegroundColor Cyan
+ $content = Get-Content $TEST_EXPORT_FILE -Raw
+ if ($content.Length -gt 1500) {
+ Write-Host ($content.Substring(0, 1500) + "...") -ForegroundColor White
+ } else {
+ Write-Host $content -ForegroundColor White
+ }
+
+ # Check for crawledAt field
+ if ($content -match "crawledAt") {
+ Write-Host ""
+ Write-Host "[SUCCESS] crawledAt field found in JSON!" -ForegroundColor Green
+ } else {
+ Write-Host ""
+ Write-Host "[ERROR] crawledAt field NOT found in JSON!" -ForegroundColor Red
+ }
+
+ # Check for metadata
+ if ($content -match "metadata") {
+ Write-Host "[SUCCESS] metadata field found in JSON!" -ForegroundColor Green
+ } else {
+ Write-Host "[WARNING] metadata field NOT found in JSON!" -ForegroundColor Yellow
+ }
+} else {
+ Write-Host "[ERROR] JSON file NOT created!" -ForegroundColor Red
+}
+Write-Host ""
+
+# Step 6: Get article count before clear
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 6: Get Article Count Before Clear" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: list" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "list" 2>&1
+Write-Host $result -ForegroundColor Green
+
+# Count articles
+$articleCount = 0
+$lines = $result -split "`n"
+foreach ($line in $lines) {
+ if ($line -match "Total: (\d+) articles") {
+ $articleCount = [int]$matches[1]
+ break
+ }
+}
+Write-Host ""
+Write-Host "Current article count: $articleCount" -ForegroundColor Cyan
+Write-Host ""
+Start-Sleep -Seconds 1
+
+# Step 7: Clear all data
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 7: Clear All Data" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: clear" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "clear" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+Start-Sleep -Seconds 1
+
+# Step 8: Verify data is cleared
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 8: Verify Data Cleared" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: list" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "list" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+Start-Sleep -Seconds 1
+
+# Step 9: Import data from JSON
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 9: Import Data from JSON" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: import data\test_export.json" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "import data\test_export.json" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+Start-Sleep -Seconds 1
+
+# Step 10: Verify data restored
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 10: Verify Data Restored" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: list" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "list" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+
+# Count articles after import
+$articleCountAfterImport = 0
+$lines = $result -split "`n"
+foreach ($line in $lines) {
+ if ($line -match "Total: (\d+) articles") {
+ $articleCountAfterImport = [int]$matches[1]
+ break
+ }
+}
+
+if ($articleCountAfterImport -eq $articleCount) {
+ Write-Host "[SUCCESS] Data restored successfully! Article count matches: $articleCountAfterImport" -ForegroundColor Green
+} else {
+ Write-Host "[WARNING] Article count mismatch. Before: $articleCount, After: $articleCountAfterImport" -ForegroundColor Yellow
+}
+Write-Host ""
+
+# Step 11: Test duplicate import (should not duplicate)
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 11: Test Duplicate Import (No Duplication)" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: import data\test_export.json (second time)" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "import data\test_export.json" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+Start-Sleep -Seconds 1
+
+# Step 12: Final article count
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "STEP 12: Final Article Count" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Command: list" -ForegroundColor Yellow
+$result = & java -jar $APP_JAR "list" 2>&1
+Write-Host $result -ForegroundColor Green
+Write-Host ""
+
+# Final count
+$finalCount = 0
+$lines = $result -split "`n"
+foreach ($line in $lines) {
+ if ($line -match "Total: (\d+) articles") {
+ $finalCount = [int]$matches[1]
+ break
+ }
+}
+
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "TEST SUMMARY" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "Articles after first import: $articleCountAfterImport" -ForegroundColor White
+Write-Host "Articles after second import: $finalCount" -ForegroundColor White
+Write-Host ""
+
+if ($finalCount -eq $articleCountAfterImport) {
+ Write-Host "[SUCCESS] Duplicate import correctly skipped! No duplication occurred." -ForegroundColor Green
+} else {
+ Write-Host "[ERROR] Duplicate import created duplicates! Count increased from $articleCountAfterImport to $finalCount" -ForegroundColor Red
+}
+
+Write-Host ""
+Write-Host "========================================" -ForegroundColor Cyan
+Write-Host "ALL TESTS COMPLETED" -ForegroundColor Cyan
+Write-Host "========================================" -ForegroundColor Cyan
diff --git a/project/java-cli-期末课程项目/test_input.txt b/project/java-cli-期末课程项目/test_input.txt
new file mode 100644
index 0000000..abe3e5f
--- /dev/null
+++ b/project/java-cli-期末课程项目/test_input.txt
@@ -0,0 +1,2 @@
+export data/test_standard_export.json
+exit
\ No newline at end of file