diff --git a/project/202506050307-汪君玥-期末实验报告.docx b/project/202506050307-汪君玥-期末实验报告.docx new file mode 100644 index 0000000..0e50195 Binary files /dev/null and b/project/202506050307-汪君玥-期末实验报告.docx differ diff --git a/project/java-cli-期末课程项目/.gitignore b/project/java-cli-期末课程项目/.gitignore new file mode 100644 index 0000000..0ebcf1a --- /dev/null +++ b/project/java-cli-期末课程项目/.gitignore @@ -0,0 +1,4 @@ +*.jar +*.jar +*.class +*.log \ No newline at end of file diff --git a/project/java-cli-期末课程项目/data/csdnexport.json b/project/java-cli-期末课程项目/data/csdnexport.json new file mode 100644 index 0000000..d842d31 --- /dev/null +++ b/project/java-cli-期末课程项目/data/csdnexport.json @@ -0,0 +1,273 @@ +[ { + "title" : "7月1日起施行 超龄劳动者迎来权益保障新规", + "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40727022.html", + "content" : "" +}, { + "title" : "经港珠澳大桥出入境港澳单牌车总量突破1000万辆次", + "url" : "http://gba.people.cn/n1/2026/0525/c42272-40726946.html", + "content" : "" +}, { + "title" : "外交部谈美伊谈判", + "url" : "http://world.people.com.cn/n1/2026/0525/c1002-40726926.html", + "content" : "" +}, { + "title" : "重庆发布今年首个地质灾害红色预警", + "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40726849.html", + "content" : "" +}, { + "title" : "重庆发布今年首个地质灾害红色预警", + "url" : "http://cq.people.com.cn/n2/2026/0525/c365401-41590405.html", + "content" : "" +}, { + "title" : "账号管理规范", + "url" : "https://blog.csdn.net/blogdevteam/article/details/126135357", + "content" : "" +}, { + "title" : "代码产出暴涨250%,Claude Code已100%由自己编写!CC 之父 Boris 最新对话:我现在只负责写提示词", + "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161325096", + "content" : "" +}, { + "title" : "我们公司全员把 Cursor 换成了自研的 全开源AtomCode", + "url" : "https://blog.csdn.net/jiangtao/article/details/161373705", + "content" : "" +}, { + "title" : "与菲尔兹奖得主Timothy Gowers对话:整个数学研究的范式将被AI改变", + "url" : "https://blog.csdn.net/jzagi/article/details/161327725", + "content" : "" +}, { + "title" : "AI又“翻车”!Gemini狂删2.8万行代码、系统宕机33分钟,还伪造沟通记录谎称“已恢复正常”", + "url" : "https://blog.csdn.net/csdnnews/article/details/161325101", + "content" : "" +}, { + "title" : "开源项目“离谱的死亡方式”", + "url" : "https://blog.csdn.net/csdnnews/article/details/161325111", + "content" : "" +}, { + "title" : "“DeepSeek崩了”又冲上热搜;特斯拉FSD中文名改为“特斯拉辅助驾驶”:价格依旧为6.4万元;苹果WWDC26将成库克告别秀 | 极客头条", + "url" : "https://blog.csdn.net/weixin_39786569/article/details/161394638", + "content" : "" +}, { + "title" : "“超级Agent”大梦初醒:任务一长就“飘”、动辄陷入“无限探索”?一场对话复盘工业级智能体的真实痛点与终局 | AI进化论", + "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294914", + "content" : "" +}, { + "title" : "从全网群嘲到让学术界颤抖!OpenAI 攻破 80 年数学悬案,菲尔兹奖得主预言灵验:AI正将人类逐出科研循环", + "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294921", + "content" : "" +}, { + "title" : "雷军直言“输给特斯拉不丢人”;传Manus创始人计划融资10亿美元回购公司 | 极客头条", + "url" : "https://blog.csdn.net/weixin_39786569/article/details/161313996", + "content" : "" +}, { + "title" : "GitHub遭入侵,黑客开价5万美元卖源码!员工装了个VS Code插件,致3800个内部仓库被盗", + "url" : "https://blog.csdn.net/csdnnews/article/details/161294926", + "content" : "" +}, { + "title" : "Chaterm — 开源SRE副驾驶,让你与服务器直接对话! 服务器 14.7K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/157735374", + "content" : "" +}, { + "title" : "拆箱开源版Coze:Agent核心三件套大公开,48小时揽下9K Star 人工智能 47.5K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149722641", + "content" : "" +}, { + "title" : "MinIO:开源对象存储解决方案的领先者 开源 67.6K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149424765", + "content" : "" +}, { + "title" : "LocalSend:比 AirDrop 更自由!这款神器让文件传输不再受限 https 64.1K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149356472", + "content" : "" +}, { + "title" : "Excalidraw:一款轻量、高效、极具手感的在线白板工具 产品经理 56.7K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149249425", + "content" : "" +}, { + "title" : "star31.6k,Aider:让代码编写如虎添翼的终端神器 人工智能 66.5K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149169547", + "content" : "" +}, { + "title" : "用Rust编写的开源支付解决方案——Hyperswitch rust 63.6K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149066439", + "content" : "" +}, { + "title" : "Langflow:这个拖拽式AI工作流神器正在颠覆传统编程 人工智能 76.9K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/148900678", + "content" : "" +}, { + "title" : "一键抠图有多强?19Kstar 的 Rembg 开源神器 python 58.7K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/148851428", + "content" : "" +}, { + "title" : "CHATERM AI:开启云资源氛围管理新篇章! 人工智能 70.3K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/148769366", + "content" : "" +}, { + "title" : "CSDN会员推广伙伴招募:分销返佣 + 资源互换,诚邀合作", + "url" : "https://blog.csdn.net/blogdevteam/article/details/160479095", + "content" : "" +}, { + "title" : "深入解析进程:从PCB到僵尸进程", + "url" : "https://blog.csdn.net/2401_86275172/article/details/160566166", + "content" : "" +}, { + "title" : "【功能跃升】Claude Code v2.1.145:开放 --json 脚本接口,打通 tmux 状态栏,超大文件智能截断", + "url" : "https://blog.csdn.net/Rthan/article/details/161241670", + "content" : "" +}, { + "title" : "【读书笔记】《幸福关系的七段旅程》", + "url" : "https://blog.csdn.net/Chandler2017/article/details/160967281", + "content" : "" +}, { + "title" : "Spring 核心原理:IoC/DI 与 Bean 生命周期全景解析", + "url" : "https://blog.csdn.net/2401_88151415/article/details/161253437", + "content" : "" +}, { + "title" : "鸿蒙 PC 跨设备拖拽:实现原理 + 实战代码", + "url" : "https://blog.csdn.net/qq_36478920/article/details/161291953", + "content" : "" +}, { + "title" : "volatile 的底层原理及应用场景", + "url" : "https://blog.csdn.net/tongluowan007/article/details/161230327", + "content" : "" +}, { + "title" : "ROS开发专栏---ROS2humble安装详细教程---适配Ubuntu 22.04", + "url" : "https://blog.csdn.net/weixin_61186812/article/details/161054923", + "content" : "" +}, { + "title" : "2026年全国青少年信息素养大赛算法应用主题赛(C++赛项-初赛-赛前冲刺模拟卷2:文末附答案和解析)", + "url" : "https://blog.csdn.net/weixin_66461496/article/details/161206019", + "content" : "" +}, { + "title" : "系统分析师 备考知识点整理", + "url" : "https://blog.csdn.net/david_232656/article/details/161291901", + "content" : "" +}, { + "title" : "Linux之文件", + "url" : "https://blog.csdn.net/bksczm/article/details/161055964", + "content" : "" +}, { + "title" : "Python 数据分析基础入门:《Excel Python:飞速搞定数据分析与处理》学习笔记系列(附录 C 高级 Python 概念)", + "url" : "https://blog.csdn.net/m0_67558301/article/details/161324964", + "content" : "" +}, { + "title" : "【LE Audio】CAP精讲[8]:CCID绑定术,打通音频流与控制的任督二脉", + "url" : "https://blog.csdn.net/weixin_37800531/article/details/161135741", + "content" : "" +}, { + "title" : "Codex Mac版安装教程(AppStore无法下载解决)", + "url" : "https://blog.csdn.net/weixin_41961749/article/details/161110569", + "content" : "" +}, { + "title" : "应用层中的UDP协议原理", + "url" : "https://blog.csdn.net/2503_90262217/article/details/161200229", + "content" : "" +}, { + "title" : "【AI】Git、Node.js 一站式保姆级安装指南", + "url" : "https://blog.csdn.net/2401_87342824/article/details/161199150", + "content" : "" +}, { + "title" : "Re: Linux系统篇(十八)进程篇·三:深度硬核!全面起底 Linux 进程状态变化与内核链表动态解绑", + "url" : "https://blog.csdn.net/Z2314246476/article/details/161076726", + "content" : "" +}, { + "title" : "本周 GitHub 最热项目全解析!Star History 2026年第20周(5月8日-14日)排行榜深度盘点", + "url" : "https://blog.csdn.net/yanceyxin/article/details/161130991", + "content" : "" +}, { + "title" : "Google I/O 2026深度解读:AI Agent时代全面到来,从“大模型时代“到“智能体时代“的历史性跨越", + "url" : "https://blog.csdn.net/shaobingj126/article/details/161307384", + "content" : "" +}, { + "title" : "c#基础知识合集07 方法值传递 引用传递 ref参数 out输出参数 in参数 参数列表", + "url" : "https://blog.csdn.net/2603_96051737/article/details/161256831", + "content" : "" +}, { + "title" : "谷歌辞职、创业失败、重读神经科学,她说 AI 时代最危险的事是外包你的思考 | 万有引力", + "url" : "https://blog.csdn.net/tangxiaoyin/article/details/161428871", + "content" : "" +}, { + "title" : "传字节向Seed员工开放「豆包股」认购权;滴滴出行App大规模故障,官方致歉;小米MiMo-V2.5系列API永久降价:最高降99% | 极客头条", + "url" : "https://blog.csdn.net/weixin_39786569/article/details/161446737", + "content" : "" +}, { + "title" : "华为韬定律刷屏,程序员真正该读懂的信号是什么? | 硅基时间", + "url" : "https://blog.csdn.net/csdnnews/article/details/161432746", + "content" : "" +}, { + "title" : "一位10年Android老兵选择「逆行」:“如果未来只剩AI写代码,那就把我落下吧!”", + "url" : "https://blog.csdn.net/csdnnews/article/details/161432759", + "content" : "" +}, { + "title" : "告别繁琐预处理!MindSpeed LLM推出Train_from_HF功能,实现加载即训练", + "url" : "https://blog.csdn.net/csdnnews/article/details/161426770", + "content" : "" +}, { + "title" : "MindSpeed LLM结合Agent-Skills适配Mamba3模型,解锁SSM模型新潜能", + "url" : "https://blog.csdn.net/csdnnews/article/details/161427107", + "content" : "" +}, { + "title" : "高性能计算:鲲鹏软硬协同定义AI4S 计算新范式", + "url" : "https://blog.csdn.net/csdnnews/article/details/161426451", + "content" : "" +}, { + "title" : "AI公司烧不起Token了!国产Agent杀出,逼近Opus 4.6还免费,天工AI发布SkyClaw-v1.0:面向真实工作流的百万上下文 Agent 模型", + "url" : "https://blog.csdn.net/csdnnews/article/details/161422508", + "content" : "" +}, { + "title" : "2026年618大促7000元内演唱会手机推荐:Find X9s Pro领衔,远摄防抖清晰度全解析", + "url" : "https://blog.csdn.net/2601_95822891/article/details/161261185", + "content" : "" +}, { + "title" : "Python运算符:身份运算符(is/is not)与双等号的区别", + "url" : "https://blog.csdn.net/AIRoses/article/details/161410239", + "content" : "" +}, { + "title" : "Codex 与 Claude Code 安装配置教程", + "url" : "https://blog.csdn.net/weixin_45888077/article/details/161401615", + "content" : "" +}, { + "title" : "初识java(十一):继承", + "url" : "https://blog.csdn.net/2502_93282244/article/details/161372118", + "content" : "" +}, { + "title" : "我那台在抽屉里躺了三年的旧手机,被我改造成了全天候私人云盘", + "url" : "https://blog.csdn.net/SDFsoul/article/details/161278737", + "content" : "" +}, { + "title" : "【必看】2026年 {计算题} |专项解析 ~ H:动态规划 & 图论", + "url" : "https://blog.csdn.net/weixin_42115157/article/details/161057408", + "content" : "" +}, { + "title" : "FreeRTOS——按键控制任务的挂起和恢复", + "url" : "https://blog.csdn.net/weixin_64611877/article/details/161456747", + "content" : "" +}, { + "title" : "【c++笔记】类和对象流食般投喂(中)", + "url" : "https://blog.csdn.net/dj_798/article/details/160994229", + "content" : "" +}, { + "title" : "C++的IO流", + "url" : "https://blog.csdn.net/suimingtao/article/details/160892078", + "content" : "" +}, { + "title" : "Java——标准序列化机制", + "url" : "https://blog.csdn.net/cold___play/article/details/161107932", + "content" : "" +}, { + "title" : "1.6T光模块将成AI数据中心主流", + "url" : "https://blog.csdn.net/m0_75253087/article/details/160956039", + "content" : "" +}, { + "title" : "通用程序无缺陷保证的不可能性:停机问题与哥德尔不完备定理的双轨论证 —— 兼论“边界情况不可穷举”的形式化含义", + "url" : "https://blog.csdn.net/qq_43689451/article/details/161271922", + "content" : "" +}, { + "title" : "新书速览|信息与通信工程综合实验:自动目标识别专题", + "url" : "https://blog.csdn.net/quanzhankaifaqua/article/details/161193290", + "content" : "" +}, { + "title" : "深入理解 OSI 七层网络模型:从原理到实践", + "url" : "https://blog.csdn.net/2603_95882547/article/details/161140630", + "content" : "" +} ] \ No newline at end of file diff --git a/project/java-cli-期末课程项目/data/my_export.json b/project/java-cli-期末课程项目/data/my_export.json new file mode 100644 index 0000000..f323df4 --- /dev/null +++ b/project/java-cli-期末课程项目/data/my_export.json @@ -0,0 +1,185 @@ +[ { + "title" : "7月1日起施行 超龄劳动者迎来权益保障新规", + "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40727022.html", + "content" : "" +}, { + "title" : "经港珠澳大桥出入境港澳单牌车总量突破1000万辆次", + "url" : "http://gba.people.cn/n1/2026/0525/c42272-40726946.html", + "content" : "" +}, { + "title" : "外交部谈美伊谈判", + "url" : "http://world.people.com.cn/n1/2026/0525/c1002-40726926.html", + "content" : "" +}, { + "title" : "重庆发布今年首个地质灾害红色预警", + "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40726849.html", + "content" : "" +}, { + "title" : "重庆发布今年首个地质灾害红色预警", + "url" : "http://cq.people.com.cn/n2/2026/0525/c365401-41590405.html", + "content" : "" +}, { + "title" : "账号管理规范", + "url" : "https://blog.csdn.net/blogdevteam/article/details/126135357", + "content" : "" +}, { + "title" : "代码产出暴涨250%,Claude Code已100%由自己编写!CC 之父 Boris 最新对话:我现在只负责写提示词", + "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161325096", + "content" : "" +}, { + "title" : "我们公司全员把 Cursor 换成了自研的 全开源AtomCode", + "url" : "https://blog.csdn.net/jiangtao/article/details/161373705", + "content" : "" +}, { + "title" : "与菲尔兹奖得主Timothy Gowers对话:整个数学研究的范式将被AI改变", + "url" : "https://blog.csdn.net/jzagi/article/details/161327725", + "content" : "" +}, { + "title" : "AI又“翻车”!Gemini狂删2.8万行代码、系统宕机33分钟,还伪造沟通记录谎称“已恢复正常”", + "url" : "https://blog.csdn.net/csdnnews/article/details/161325101", + "content" : "" +}, { + "title" : "开源项目“离谱的死亡方式”", + "url" : "https://blog.csdn.net/csdnnews/article/details/161325111", + "content" : "" +}, { + "title" : "“DeepSeek崩了”又冲上热搜;特斯拉FSD中文名改为“特斯拉辅助驾驶”:价格依旧为6.4万元;苹果WWDC26将成库克告别秀 | 极客头条", + "url" : "https://blog.csdn.net/weixin_39786569/article/details/161394638", + "content" : "" +}, { + "title" : "“超级Agent”大梦初醒:任务一长就“飘”、动辄陷入“无限探索”?一场对话复盘工业级智能体的真实痛点与终局 | AI进化论", + "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294914", + "content" : "" +}, { + "title" : "从全网群嘲到让学术界颤抖!OpenAI 攻破 80 年数学悬案,菲尔兹奖得主预言灵验:AI正将人类逐出科研循环", + "url" : "https://blog.csdn.net/dQCFKyQDXYm3F8rB0/article/details/161294921", + "content" : "" +}, { + "title" : "雷军直言“输给特斯拉不丢人”;传Manus创始人计划融资10亿美元回购公司 | 极客头条", + "url" : "https://blog.csdn.net/weixin_39786569/article/details/161313996", + "content" : "" +}, { + "title" : "GitHub遭入侵,黑客开价5万美元卖源码!员工装了个VS Code插件,致3800个内部仓库被盗", + "url" : "https://blog.csdn.net/csdnnews/article/details/161294926", + "content" : "" +}, { + "title" : "Chaterm — 开源SRE副驾驶,让你与服务器直接对话! 服务器 14.7K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/157735374", + "content" : "" +}, { + "title" : "拆箱开源版Coze:Agent核心三件套大公开,48小时揽下9K Star 人工智能 47.5K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149722641", + "content" : "" +}, { + "title" : "MinIO:开源对象存储解决方案的领先者 开源 67.6K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149424765", + "content" : "" +}, { + "title" : "LocalSend:比 AirDrop 更自由!这款神器让文件传输不再受限 https 64.1K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149356472", + "content" : "" +}, { + "title" : "Excalidraw:一款轻量、高效、极具手感的在线白板工具 产品经理 56.7K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149249425", + "content" : "" +}, { + "title" : "star31.6k,Aider:让代码编写如虎添翼的终端神器 人工智能 66.5K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149169547", + "content" : "" +}, { + "title" : "用Rust编写的开源支付解决方案——Hyperswitch rust 63.6K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/149066439", + "content" : "" +}, { + "title" : "Langflow:这个拖拽式AI工作流神器正在颠覆传统编程 人工智能 76.9K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/148900678", + "content" : "" +}, { + "title" : "一键抠图有多强?19Kstar 的 Rembg 开源神器 python 58.7K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/148851428", + "content" : "" +}, { + "title" : "CHATERM AI:开启云资源氛围管理新篇章! 人工智能 70.3K 查看详情", + "url" : "https://blog.csdn.net/coderroad/article/details/148769366", + "content" : "" +}, { + "title" : "CSDN会员推广伙伴招募:分销返佣 + 资源互换,诚邀合作", + "url" : "https://blog.csdn.net/blogdevteam/article/details/160479095", + "content" : "" +}, { + "title" : "深入解析进程:从PCB到僵尸进程", + "url" : "https://blog.csdn.net/2401_86275172/article/details/160566166", + "content" : "" +}, { + "title" : "【功能跃升】Claude Code v2.1.145:开放 --json 脚本接口,打通 tmux 状态栏,超大文件智能截断", + "url" : "https://blog.csdn.net/Rthan/article/details/161241670", + "content" : "" +}, { + "title" : "【读书笔记】《幸福关系的七段旅程》", + "url" : "https://blog.csdn.net/Chandler2017/article/details/160967281", + "content" : "" +}, { + "title" : "Spring 核心原理:IoC/DI 与 Bean 生命周期全景解析", + "url" : "https://blog.csdn.net/2401_88151415/article/details/161253437", + "content" : "" +}, { + "title" : "鸿蒙 PC 跨设备拖拽:实现原理 + 实战代码", + "url" : "https://blog.csdn.net/qq_36478920/article/details/161291953", + "content" : "" +}, { + "title" : "volatile 的底层原理及应用场景", + "url" : "https://blog.csdn.net/tongluowan007/article/details/161230327", + "content" : "" +}, { + "title" : "ROS开发专栏---ROS2humble安装详细教程---适配Ubuntu 22.04", + "url" : "https://blog.csdn.net/weixin_61186812/article/details/161054923", + "content" : "" +}, { + "title" : "2026年全国青少年信息素养大赛算法应用主题赛(C++赛项-初赛-赛前冲刺模拟卷2:文末附答案和解析)", + "url" : "https://blog.csdn.net/weixin_66461496/article/details/161206019", + "content" : "" +}, { + "title" : "系统分析师 备考知识点整理", + "url" : "https://blog.csdn.net/david_232656/article/details/161291901", + "content" : "" +}, { + "title" : "Linux之文件", + "url" : "https://blog.csdn.net/bksczm/article/details/161055964", + "content" : "" +}, { + "title" : "Python 数据分析基础入门:《Excel Python:飞速搞定数据分析与处理》学习笔记系列(附录 C 高级 Python 概念)", + "url" : "https://blog.csdn.net/m0_67558301/article/details/161324964", + "content" : "" +}, { + "title" : "【LE Audio】CAP精讲[8]:CCID绑定术,打通音频流与控制的任督二脉", + "url" : "https://blog.csdn.net/weixin_37800531/article/details/161135741", + "content" : "" +}, { + "title" : "Codex Mac版安装教程(AppStore无法下载解决)", + "url" : "https://blog.csdn.net/weixin_41961749/article/details/161110569", + "content" : "" +}, { + "title" : "应用层中的UDP协议原理", + "url" : "https://blog.csdn.net/2503_90262217/article/details/161200229", + "content" : "" +}, { + "title" : "【AI】Git、Node.js 一站式保姆级安装指南", + "url" : "https://blog.csdn.net/2401_87342824/article/details/161199150", + "content" : "" +}, { + "title" : "Re: Linux系统篇(十八)进程篇·三:深度硬核!全面起底 Linux 进程状态变化与内核链表动态解绑", + "url" : "https://blog.csdn.net/Z2314246476/article/details/161076726", + "content" : "" +}, { + "title" : "本周 GitHub 最热项目全解析!Star History 2026年第20周(5月8日-14日)排行榜深度盘点", + "url" : "https://blog.csdn.net/yanceyxin/article/details/161130991", + "content" : "" +}, { + "title" : "Google I/O 2026深度解读:AI Agent时代全面到来,从“大模型时代“到“智能体时代“的历史性跨越", + "url" : "https://blog.csdn.net/shaobingj126/article/details/161307384", + "content" : "" +}, { + "title" : "c#基础知识合集07 方法值传递 引用传递 ref参数 out输出参数 in参数 参数列表", + "url" : "https://blog.csdn.net/2603_96051737/article/details/161256831", + "content" : "" +} ] \ No newline at end of file diff --git a/project/java-cli-期末课程项目/data/sample_test.json b/project/java-cli-期末课程项目/data/sample_test.json new file mode 100644 index 0000000..5462cd5 --- /dev/null +++ b/project/java-cli-期末课程项目/data/sample_test.json @@ -0,0 +1,29 @@ +{ + "metadata": { + "exportTime": "2026-05-31T12:00:00", + "totalCount": 3, + "source": "CLI Crawler v1.0", + "exportMode": "STANDARD", + "version": "1.0" + }, + "articles": [ + { + "title": "测试文章1", + "url": "https://example.com/article1", + "content": "这是测试内容1", + "crawledAt": "2026-05-31T10:00:00" + }, + { + "title": "测试文章2", + "url": "https://example.com/article2", + "content": "这是测试内容2", + "crawledAt": "2026-05-31T11:00:00" + }, + { + "title": "测试文章3", + "url": "https://example.com/article3", + "content": "这是测试内容3", + "crawledAt": "2026-05-31T12:00:00" + } + ] +} diff --git a/project/java-cli-期末课程项目/minimal_test.ps1 b/project/java-cli-期末课程项目/minimal_test.ps1 new file mode 100644 index 0000000..fa53d41 --- /dev/null +++ b/project/java-cli-期末课程项目/minimal_test.ps1 @@ -0,0 +1,54 @@ +# Simple Import/Export Test - Minimal Version +$env:JAVA_HOME = "C:\Program Files\Java\latest\jdk-25" +$APP_JAR = "target\datacollect-cli-0.1.0-jar-with-dependencies.jar" +$SAMPLE = "data\sample_test.json" +$EXPORT = "data\export_result.json" + +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Import/Export Test - Minimal" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "" + +Write-Host "[TEST 1] Import" -ForegroundColor Yellow +$result = & java -jar $APP_JAR import $SAMPLE 2>&1 +Write-Host $result +Write-Host "" + +Write-Host "[TEST 2] List" -ForegroundColor Yellow +$result = & java -jar $APP_JAR list 2>&1 +Write-Host $result +Write-Host "" + +Write-Host "[TEST 3] Export" -ForegroundColor Yellow +$result = & java -jar $APP_JAR export $EXPORT --format json 2>&1 +Write-Host $result +Write-Host "" + +Write-Host "[TEST 4] Check Export File" -ForegroundColor Yellow +if (Test-Path $EXPORT) { + Write-Host "[SUCCESS] File created!" -ForegroundColor Green + $content = Get-Content $EXPORT -Raw + Write-Host "Length: $($content.Length) chars" -ForegroundColor Cyan + + if ($content -match "crawledAt") { + Write-Host "[SUCCESS] crawledAt field found!" -ForegroundColor Green + } + if ($content -match "metadata") { + Write-Host "[SUCCESS] metadata field found!" -ForegroundColor Green + } +} +Write-Host "" + +Write-Host "[TEST 5] Import Again (Duplicate)" -ForegroundColor Yellow +$result = & java -jar $APP_JAR import $SAMPLE 2>&1 +Write-Host $result +Write-Host "" + +Write-Host "[TEST 6] Final List" -ForegroundColor Yellow +$result = & java -jar $APP_JAR list 2>&1 +Write-Host $result + +Write-Host "" +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "TEST COMPLETED" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan diff --git a/project/java-cli-期末课程项目/pom.xml b/project/java-cli-期末课程项目/pom.xml new file mode 100644 index 0000000..7864c8a --- /dev/null +++ b/project/java-cli-期末课程项目/pom.xml @@ -0,0 +1,67 @@ + + 4.0.0 + com.example + datacollect-cli + 0.1.0 + + 11 + 11 + + + + org.jsoup + jsoup + 1.17.2 + + + org.slf4j + slf4j-api + 2.0.9 + + + ch.qos.logback + logback-classic + 1.4.14 + + + com.fasterxml.jackson.core + jackson-databind + 2.16.1 + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + + org.apache.maven.plugins + maven-assembly-plugin + 3.3.0 + + + + com.example.datacollect.Main + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + diff --git a/project/java-cli-期末课程项目/simple_test.cmd b/project/java-cli-期末课程项目/simple_test.cmd new file mode 100644 index 0000000..a89d53e --- /dev/null +++ b/project/java-cli-期末课程项目/simple_test.cmd @@ -0,0 +1,56 @@ +@echo off +set JAVA_HOME=C:\Program Files\Java\latest\jdk-25 +set APP_JAR=target\datacollect-cli-0.1.0-jar-with-dependencies.jar +set SAMPLE=data\sample_test.json +set EXPORT=data\export_result.json + +echo ======================================== +echo Import/Export Feature Test +echo ======================================== +echo. + +echo [TEST 1] Import sample JSON file +echo Command: import %SAMPLE% +java -jar %APP_JAR% import %SAMPLE% +echo. +echo. + +echo [TEST 2] List articles +echo Command: list +java -jar %APP_JAR% list +echo. +echo. + +echo [TEST 3] Export to JSON +echo Command: export %EXPORT% --format json +java -jar %APP_JAR% export %EXPORT% --format json +echo. +echo. + +echo [TEST 4] Check exported file +if exist %EXPORT% ( + echo [SUCCESS] Export file created + echo. + echo First 1000 characters of exported file: + powershell -Command "Get-Content %EXPORT% | Select-Object -First 20" +) else ( + echo [ERROR] Export file NOT created +) +echo. +echo. + +echo [TEST 5] Test duplicate import +echo Command: import %SAMPLE% (again) +java -jar %APP_JAR% import %SAMPLE% +echo. +echo. + +echo [TEST 6] Final list +echo Command: list +java -jar %APP_JAR% list +echo. +echo. + +echo ======================================== +echo Tests completed! Check output above. +echo ======================================== diff --git a/project/java-cli-期末课程项目/simple_test.ps1 b/project/java-cli-期末课程项目/simple_test.ps1 new file mode 100644 index 0000000..53b1e14 --- /dev/null +++ b/project/java-cli-期末课程项目/simple_test.ps1 @@ -0,0 +1,117 @@ +# Simple Import/Export Test +$ErrorActionPreference = "Stop" +$env:JAVA_HOME = "C:\Program Files\Java\latest\jdk-25" +$APP_JAR = "target\datacollect-cli-0.1.0-jar-with-dependencies.jar" +$TEST_FILE = "data\sample_test.json" +$EXPORT_FILE = "data\export_result.json" + +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Import/Export Feature Test" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "" + +# Step 1: Import sample data +Write-Host "[TEST 1] Import sample JSON file" -ForegroundColor Yellow +Write-Host "Command: import $TEST_FILE" -ForegroundColor Gray +$result = & java -jar $APP_JAR "import $TEST_FILE" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" + +# Step 2: List articles +Write-Host "[TEST 2] List articles after import" -ForegroundColor Yellow +Write-Host "Command: list" -ForegroundColor Gray +$result = & java -jar $APP_JAR "list" 2>&1 +Write-Host $result -ForegroundColor Green + +# Extract count +$count1 = 0 +$result -split "`n" | ForEach-Object { + if ($_ -match "Total: (\d+)") { + $count1 = [int]$matches[1] + } +} +Write-Host "Article count: $count1" -ForegroundColor Cyan +Write-Host "" + +# Step 3: Export to new file +Write-Host "[TEST 3] Export to new JSON file" -ForegroundColor Yellow +Write-Host "Command: export $EXPORT_FILE --format json" -ForegroundColor Gray +$result = & java -jar $APP_JAR "export $EXPORT_FILE --format json" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" + +# Step 4: Check exported file +Write-Host "[TEST 4] Verify exported JSON file" -ForegroundColor Yellow +if (Test-Path $EXPORT_FILE) { + Write-Host "[SUCCESS] Export file created" -ForegroundColor Green + $content = Get-Content $EXPORT_FILE -Raw + Write-Host "File size: $($content.Length) characters" -ForegroundColor Cyan + + # Check for crawledAt + if ($content -match "crawledAt") { + Write-Host "[SUCCESS] crawledAt field found in exported JSON" -ForegroundColor Green + } else { + Write-Host "[ERROR] crawledAt field NOT found" -ForegroundColor Red + } + + # Check for metadata + if ($content -match "metadata") { + Write-Host "[SUCCESS] metadata field found" -ForegroundColor Green + } else { + Write-Host "[ERROR] metadata field NOT found" -ForegroundColor Red + } +} else { + Write-Host "[ERROR] Export file NOT created" -ForegroundColor Red +} +Write-Host "" + +# Step 5: Test duplicate import +Write-Host "[TEST 5] Test duplicate import (should skip duplicates)" -ForegroundColor Yellow +Write-Host "Command: import $TEST_FILE (again)" -ForegroundColor Gray +$result = & java -jar $APP_JAR "import $TEST_FILE" 2>&1 +Write-Host $result -ForegroundColor Green + +# Step 6: List and verify no duplication +Write-Host "[TEST 6] Verify no duplication" -ForegroundColor Yellow +Write-Host "Command: list" -ForegroundColor Gray +$result = & java -jar $APP_JAR "list" 2>&1 +Write-Host $result -ForegroundColor Green + +$count2 = 0 +$result -split "`n" | ForEach-Object { + if ($_ -match "Total: (\d+)") { + $count2 = [int]$matches[1] + } +} +Write-Host "Article count after second import: $count2" -ForegroundColor Cyan +Write-Host "" + +# Summary +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "TEST SUMMARY" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +if ($count1 -eq 3 -and $count2 -eq 3) { + Write-Host "[SUCCESS] All tests passed!" -ForegroundColor Green + Write-Host "- Import: Successfully imported 3 articles" -ForegroundColor White + Write-Host "- Export: Successfully exported to JSON" -ForegroundColor White + Write-Host "- Duplicate: Correctly skipped duplicate articles" -ForegroundColor White + Write-Host "- crawledAt field: Present in exported JSON" -ForegroundColor White +} else { + Write-Host "[PARTIAL] Some tests may have issues" -ForegroundColor Yellow + Write-Host "First import count: $count1" -ForegroundColor White + Write-Host "Second import count: $count2" -ForegroundColor White +} +Write-Host "" + +# Show exported file content +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "EXPORTED JSON CONTENT (Preview)" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +if (Test-Path $EXPORT_FILE) { + $exportContent = Get-Content $EXPORT_FILE -Raw + if ($exportContent.Length -gt 1000) { + Write-Host ($exportContent.Substring(0, 1000) + "...") -ForegroundColor White + } else { + Write-Host $exportContent -ForegroundColor White + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/Main.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/Main.java new file mode 100644 index 0000000..3171e0e --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/Main.java @@ -0,0 +1,60 @@ +package com.example.datacollect; + +import com.example.datacollect.controller.CrawlerController; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.repository.PersistenceManager; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.util.JsonExporter; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public class Main { + private static final Logger logger = LoggerFactory.getLogger(Main.class); + + public static void main(String[] args) { + try (ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + PersistenceManager persistenceManager = new PersistenceManager(repository)) { + + logger.info("Starting CLI Crawler application"); + + JsonExporter jsonExporter = new JsonExporter(repository); + StrategyFactory strategyFactory = new StrategyFactory(); + + loadSession(persistenceManager, view, repository); + + CrawlerController controller = new CrawlerController(view, repository, strategyFactory, persistenceManager, jsonExporter); + + view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands."); + logger.info("Application initialized successfully"); + + while (true) { + try { + controller.handle(view.readLine()); + } catch (Exception e) { + view.printError("Error: " + e.getMessage()); + logger.error("Error in main loop: {}", e.getMessage(), e); + } + } + } catch (Exception e) { + logger.error("Fatal error in application: {}", e.getMessage(), e); + System.err.println("Fatal error: " + e.getMessage()); + System.exit(1); + } + } + + private static void loadSession(PersistenceManager persistenceManager, ConsoleView view, ArticleRepository repository) { + try { + persistenceManager.load();/* 加载会话 */ + if (repository.size() > 0) {/* 如果有文章 */ + view.printInfo("Loaded " + repository.size() + " articles from previous session");/* 打印加载的文章数量 */ + } + } catch (IOException e) { + view.printError("Warning: Failed to load previous session: " + e.getMessage()); + logger.warn("Failed to load previous session: {}", e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/AnalyzeCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/AnalyzeCommand.java new file mode 100644 index 0000000..ec9bcc3 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/AnalyzeCommand.java @@ -0,0 +1,103 @@ +package com.example.datacollect.command; + +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.util.RetryUtils; +import com.example.datacollect.view.ConsoleView; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.concurrent.Callable; + +public class AnalyzeCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class); + private final ConsoleView view; + private final StrategyFactory strategyFactory; + + public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { + this.view = view; + this.strategyFactory = strategyFactory; + } + + @Override + public String getName() { + return "analyze"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + if (args.length < 2) { + view.printError("Usage: analyze "); + logger.warn("Invalid command: missing URL argument"); + return; + } + String url = args[1]; + logger.info("Analyze command executed for URL: {}", url); + + try { + CrawlStrategy strategy = strategyFactory.getStrategy(url); + if (strategy == null) { + view.printError("No strategy found for: " + url); + logger.error("No strategy found for URL: {}", url); + return; + } + + Callable fetchTask = () -> { + logger.debug("Fetching document from: {}", url); + try { + return Jsoup.connect(url) + .userAgent("Mozilla/5.0") + .timeout(5000) + .get(); + } catch (IOException e) { + throw new NetworkException("Failed to connect to " + url + ": " + e.getMessage(), e); + } + }; + + Document doc = RetryUtils.executeWithRetry(fetchTask); + logger.info("Successfully fetched document from: {}", url); + + List
articles = strategy.parse(url, doc); + logger.info("Parsed {} articles for analysis", articles.size()); + + int total = articles.size(); + int totalTitleLen = 0; + int totalContentLen = 0; + + for (Article a : articles) { + totalTitleLen += a.getTitle() == null ? 0 : a.getTitle().length(); + totalContentLen += a.getContent() == null ? 0 : a.getContent().length(); + } + + view.printInfo("===== 分析统计结果 ====="); + view.printInfo("文章总数:" + total + " 篇"); + view.printInfo("标题总长度:" + totalTitleLen); + view.printInfo("内容总长度:" + totalContentLen); + if (total > 0) { + view.printInfo("平均标题长度:" + (totalTitleLen / total)); + view.printInfo("平均内容长度:" + (totalContentLen / total)); + } + view.printInfo("======================"); + view.printSuccess("分析完成(数据未保存)"); + + logger.info("Analysis completed: {} articles analyzed", total); + } catch (NetworkException e) { + view.printError("Network error: " + e.getMessage()); + logger.error("Network error while analyzing {}: {}", url, e.getMessage(), e); + } catch (ParseException e) { + view.printError("Parse error: " + e.getMessage()); + logger.error("Parse error while analyzing {}: {}", url, e.getMessage(), e); + } catch (Exception e) { + view.printError("分析失败:" + e.getMessage()); + logger.error("Unexpected error while analyzing {}: {}", url, e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/Command.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/Command.java new file mode 100644 index 0000000..029cadc --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/Command.java @@ -0,0 +1,8 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; + +public interface Command { + String getName(); + void execute(String[] args, ArticleRepository repository); +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/CrawlCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/CrawlCommand.java new file mode 100644 index 0000000..1c32175 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/CrawlCommand.java @@ -0,0 +1,114 @@ +package com.example.datacollect.command; + +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.exception.UrlFormatException; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.util.RetryUtils; +import com.example.datacollect.view.ConsoleView; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.concurrent.Callable; + +public class CrawlCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); + private final ConsoleView view; + private final StrategyFactory strategyFactory; + + public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) { + this.view = view; + this.strategyFactory = strategyFactory; + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + if (args == null || args.length < 2) { + view.printError("用法: crawl "); + logger.warn("无效命令: 缺少URL参数"); + return; + } + + String url = args[1]; + if (url == null || url.trim().isEmpty()) { + view.printError("错误: URL不能为空"); + logger.error("无效参数: URL为空"); + return; + } + + try { + new URL(url); + } catch (MalformedURLException e) { + logger.error("无效URL格式: {}", url, e); + throw new UrlFormatException("无效的URL格式: " + url, url, e); + } + + logger.info("开始爬取: {}", url); + + CrawlStrategy strategy = strategyFactory.getStrategy(url); + if (strategy == null) { + view.printError("未找到策略: " + url); + logger.error("未找到URL对应的策略: {}", url); + return; + } + + try { + view.printInfo("正在爬取: " + url); + + Callable fetchTask = () -> { + logger.debug("正在获取文档: {}", url); + try { + return Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,*/*;q=0.8") + .header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8") + .header("Accept-Encoding", "gzip, deflate, br") + .header("Connection", "keep-alive") + .header("Referer", url) + .header("Cache-Control", "max-age=0") + .timeout(15000) + .followRedirects(true) + .get(); + } catch (IOException e) { + throw new NetworkException("连接失败: " + e.getMessage(), e); + } + }; + + Document doc = RetryUtils.executeWithRetry(fetchTask); + logger.info("成功获取文档: {}", url); + + var articles = strategy.parse(url, doc); + logger.info("解析文章数: {}", articles.size()); + + repository.addAll(articles); + logger.info("成功添加 {} 篇文章到仓库", articles.size()); + + view.printSuccess("爬取完成,共 " + articles.size() + " 篇文章。"); + logger.info("成功从 {} 爬取 {} 篇文章", url, articles.size()); + } catch (NetworkException e) { + view.printError(e.getMessage()); + logger.error("爬取 {} 时网络错误: {}", url, e.getMessage(), e); + } catch (ParseException e) { + view.printError("解析错误: " + e.getMessage()); + logger.error("爬取 {} 时解析错误: {}", url, e.getMessage(), e); + } catch (UrlFormatException e) { + view.printError("URL格式错误: " + e.getMessage()); + logger.error("爬取 {} 时URL格式错误: {}", url, e.getMessage(), e); + } catch (Exception e) { + view.printError("爬取失败: " + e.getMessage()); + logger.error("爬取 {} 时发生未知错误: {}", url, e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExitCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExitCommand.java new file mode 100644 index 0000000..69230cb --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExitCommand.java @@ -0,0 +1,42 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.repository.PersistenceManager; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; + +public class ExitCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class); + private final ConsoleView view; + private final PersistenceManager persistenceManager; + + public ExitCommand(ConsoleView view, PersistenceManager persistenceManager) { + this.view = view; + this.persistenceManager = persistenceManager; + } + + @Override + public String getName() { + return "exit"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + logger.info("Exit command executed, saving data before shutdown"); + + try { + persistenceManager.save();/* 保存数据到持久化管理器 */ + view.printInfo("Saved " + repository.size() + " articles"); + logger.info("Successfully saved {} articles before exit", repository.size()); + } catch (IOException e) { + view.printError("Warning: Failed to save data: " + e.getMessage()); + logger.error("Failed to save data on exit: {}", e.getMessage(), e); + } + + view.printSuccess("Bye!"); + System.exit(0); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExportCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExportCommand.java new file mode 100644 index 0000000..94eb154 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ExportCommand.java @@ -0,0 +1,66 @@ +package com.example.datacollect.command; + +import com.example.datacollect.exception.ExportException; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.repository.PersistenceManager; +import com.example.datacollect.util.JsonExporter; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.file.Paths; + +public class ExportCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ExportCommand.class); + private final ConsoleView view; + private final PersistenceManager persistenceManager; + private final JsonExporter jsonExporter; + + public ExportCommand(ConsoleView view, PersistenceManager persistenceManager, JsonExporter jsonExporter) { + this.view = view; + this.persistenceManager = persistenceManager; + this.jsonExporter = jsonExporter; + } + + @Override + public String getName() { + return "export"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + String filePath = null; + String format = "json"; + + if (args.length < 2) { + view.printError("Usage: export [--format json]"); + logger.warn("Invalid command: missing file path argument"); + return; + } + + filePath = args[1]; + + for (int i = 2; i < args.length; i++) { + if (args[i].equals("--format") && i + 1 < args.length) { + format = args[i + 1].toLowerCase(); + } + } + + logger.info("导出请求: 文件={}, 格式={}", filePath, format); + + try { + if ("json".equals(format)) { + jsonExporter.exportToFile(Paths.get(filePath)); + view.printSuccess("Successfully exported " + repository.size() + " articles to " + filePath); + logger.info("Exported {} articles to {}", repository.size(), filePath); + } else { + view.printError("Unsupported format: " + format + ". Only 'json' is supported."); + logger.warn("Unsupported format: {}", format); + } + } catch (ExportException e) { + view.printError("Export failed: " + e.getMessage()); + logger.error("Export error: {}", e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/HelpCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/HelpCommand.java new file mode 100644 index 0000000..42e443a --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/HelpCommand.java @@ -0,0 +1,33 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HelpCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class); + private final ConsoleView view; + + public HelpCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "help"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + logger.info("Help command executed"); + view.printInfo("Commands:"); + view.printInfo(" crawl - Crawl articles from URL"); + view.printInfo(" list - List all articles"); + view.printInfo(" export - Export articles to JSON file"); + view.printInfo(" import - Import articles from JSON file"); + view.printInfo(" analyze - Analyze URL structure"); + view.printInfo(" help - Show this help"); + view.printInfo(" exit - Exit and save data"); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ImportCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ImportCommand.java new file mode 100644 index 0000000..ba89248 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ImportCommand.java @@ -0,0 +1,71 @@ +package com.example.datacollect.command; + +import com.example.datacollect.exception.ImportException; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.repository.PersistenceManager; +import com.example.datacollect.util.JsonImporter; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ImportCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ImportCommand.class); + private final ConsoleView view; + private final PersistenceManager persistenceManager; + + public ImportCommand(ConsoleView view, PersistenceManager persistenceManager) { + this.view = view; + this.persistenceManager = persistenceManager; + } + + @Override + public String getName() { + return "import"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + if (args.length < 2) { + view.printError("Usage: import "); + logger.warn("Invalid command: missing file path argument"); + return; + } + + String filePath = args[1]; + + try { + int beforeCount = repository.size(); + JsonImporter.ImportResult result = persistenceManager.importWithReport(filePath); + int afterCount = repository.size(); + + StringBuilder message = new StringBuilder(); + message.append("Import completed:\n"); + message.append(" - Total found: ").append(result.getTotalFound()).append("\n"); + message.append(" - Imported: ").append(result.getImported()).append("\n"); + message.append(" - Skipped (duplicates): ").append(result.getSkipped()).append("\n"); + message.append(" - Invalid: ").append(result.getInvalid()).append("\n"); + message.append(" - Overwritten: ").append(result.getOverwritten()).append("\n"); + message.append(" - Repository total: ").append(afterCount); + + if (!result.getErrors().isEmpty()) { + message.append("\n - Errors: ").append(result.getErrors().size()); + for (int i = 0; i < Math.min(3, result.getErrors().size()); i++) { + message.append("\n ").append(i + 1).append(". ").append(result.getErrors().get(i)); + } + if (result.getErrors().size() > 3) { + message.append("\n ... and ").append(result.getErrors().size() - 3).append(" more errors"); + } + } + + view.printSuccess(message.toString()); + logger.info("Import result: {}", result.getSummary()); + + } catch (ImportException e) { + view.printError("Import failed: " + e.getMessage()); + logger.error("Import error: {}", e.getMessage(), e); + } catch (Exception e) { + view.printError("Import failed: " + e.getMessage()); + logger.error("Import error: {}", e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ListCommand.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ListCommand.java new file mode 100644 index 0000000..9261a3d --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/command/ListCommand.java @@ -0,0 +1,26 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ListCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); + private final ConsoleView view; + + public ListCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "list"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + logger.info("List command executed, showing {} articles", repository.size()); + view.display(repository.getAll()); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/controller/CrawlerController.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/controller/CrawlerController.java new file mode 100644 index 0000000..4caad8a --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/controller/CrawlerController.java @@ -0,0 +1,71 @@ +package com.example.datacollect.controller; + +import com.example.datacollect.command.AnalyzeCommand; +import com.example.datacollect.command.Command; +import com.example.datacollect.command.CrawlCommand; +import com.example.datacollect.command.ExitCommand; +import com.example.datacollect.command.ExportCommand; +import com.example.datacollect.command.HelpCommand; +import com.example.datacollect.command.ImportCommand; +import com.example.datacollect.command.ListCommand; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.repository.PersistenceManager; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.util.JsonExporter; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.HashMap; +import java.util.Map; + +public class CrawlerController { + private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); + private final Map commands = new HashMap<>();/* 命令映射表 */ + private final ConsoleView view;/* 控制台视图 */ + private final ArticleRepository repository;/* 文章仓库 */ + + public CrawlerController(ConsoleView view, ArticleRepository repository, + StrategyFactory strategyFactory, PersistenceManager persistenceManager, JsonExporter jsonExporter) { + this.view = view; + this.repository = repository; + register(new HelpCommand(view)); + register(new ListCommand(view)); + register(new CrawlCommand(view, strategyFactory)); + register(new ExitCommand(view, persistenceManager)); + register(new AnalyzeCommand(view, strategyFactory)); + register(new ExportCommand(view, persistenceManager, jsonExporter)); + register(new ImportCommand(view, persistenceManager)); + logger.info("CrawlerController initialized with {} commands", commands.size()); + } + + private void register(Command command) {/* 注册命令 */ + commands.put(command.getName(), command);/* 将命令添加到映射表 */ + logger.debug("Registered command: {}", command.getName());/* 记录注册的命令 */ + } + + public void handle(String input) {/* 处理用户输入 */ + String text = input == null ? "" : input.trim();/* 处理空输入 */ + if (text.isEmpty()) { + return; + } + + String[] args = text.split("\\s+");/* 解析命令行参数 */ + String cmdName = args[0].toLowerCase();/* 提取命令名称并转换为小写 */ + + logger.debug("Processing command: {}", cmdName); + + Command command = commands.get(cmdName);/* 获取命令对象 */ + if (command == null) { + view.printError("Unknown command: " + cmdName); + logger.warn("Unknown command attempted: {}", cmdName); + return; + } + + try { + command.execute(args, repository);/* 执行命令 */ + } catch (Exception e) { + view.printError("Command execution failed: " + e.getMessage()); + logger.error("Error executing command {}: {}", cmdName, e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/CrawlerException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/CrawlerException.java new file mode 100644 index 0000000..230adb3 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/CrawlerException.java @@ -0,0 +1,10 @@ +package com.example.datacollect.exception; + +public class CrawlerException extends Exception { + public CrawlerException(String message) { + super(message); + } + public CrawlerException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/DuplicateArticleException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/DuplicateArticleException.java new file mode 100644 index 0000000..5d6c3e5 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/DuplicateArticleException.java @@ -0,0 +1,56 @@ +package com.example.datacollect.exception; + +public class DuplicateArticleException extends Exception { + private final String duplicateUrl; + private final Integer existingIndex; + + public DuplicateArticleException(String message) { + super(message); + this.duplicateUrl = null; + this.existingIndex = null; + } + + public DuplicateArticleException(String message, String duplicateUrl) { + super(message); + this.duplicateUrl = duplicateUrl; + this.existingIndex = null; + } + + public DuplicateArticleException(String message, String duplicateUrl, Integer existingIndex) { + super(message); + this.duplicateUrl = duplicateUrl; + this.existingIndex = existingIndex; + } + + public DuplicateArticleException(String message, String duplicateUrl, Throwable cause) { + super(message, cause); + this.duplicateUrl = duplicateUrl; + this.existingIndex = null; + } + + public DuplicateArticleException(String message, String duplicateUrl, Integer existingIndex, Throwable cause) { + super(message, cause); + this.duplicateUrl = duplicateUrl; + this.existingIndex = existingIndex; + } + + public String getDuplicateUrl() { + return duplicateUrl; + } + + public Integer getExistingIndex() { + return existingIndex; + } + + @Override + public String getMessage() { + StringBuilder sb = new StringBuilder(super.getMessage()); + if (duplicateUrl != null) { + sb.append(" [重复URL: ").append(duplicateUrl).append("]"); + } + if (existingIndex != null) { + sb.append(" [已存在位置: ").append(existingIndex).append("]"); + } + return sb.toString(); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ExportException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ExportException.java new file mode 100644 index 0000000..ae46dae --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ExportException.java @@ -0,0 +1,63 @@ +package com.example.datacollect.exception; + +public class ExportException extends Exception { + private final String filePath; + private final Long estimatedSize; + + public ExportException(String message) { + super(message); + this.filePath = null; + this.estimatedSize = null; + } + + public ExportException(String message, String filePath) { + super(message); + this.filePath = filePath; + this.estimatedSize = null; + } + + public ExportException(String message, String filePath, Long estimatedSize) { + super(message); + this.filePath = filePath; + this.estimatedSize = estimatedSize; + } + + public ExportException(String message, Throwable cause) { + super(message, cause); + this.filePath = null; + this.estimatedSize = null; + } + + public ExportException(String message, String filePath, Throwable cause) { + super(message, cause); + this.filePath = filePath; + this.estimatedSize = null; + } + + public String getFilePath() { + return filePath; + } + + public Long getEstimatedSize() { + return estimatedSize; + } + + @Override + public String getMessage() { + StringBuilder sb = new StringBuilder(super.getMessage()); + if (filePath != null) { + sb.append(" [文件: ").append(filePath).append("]"); + } + if (estimatedSize != null) { + sb.append(" [预估大小: ").append(formatSize(estimatedSize)).append("]"); + } + return sb.toString(); + } + + private static String formatSize(long size) { + if (size < 1024) return size + " B"; + if (size < 1024 * 1024) return String.format("%.2f KB", size / 1024.0); + if (size < 1024 * 1024 * 1024) return String.format("%.2f MB", size / (1024.0 * 1024)); + return String.format("%.2f GB", size / (1024.0 * 1024 * 1024)); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ImportException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ImportException.java new file mode 100644 index 0000000..ba893bb --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ImportException.java @@ -0,0 +1,56 @@ +package com.example.datacollect.exception; + +public class ImportException extends Exception { + private final String filePath; + private final Integer lineNumber; + + public ImportException(String message) { + super(message); + this.filePath = null; + this.lineNumber = null; + } + + public ImportException(String message, String filePath) { + super(message); + this.filePath = filePath; + this.lineNumber = null; + } + + public ImportException(String message, String filePath, Integer lineNumber) { + super(message); + this.filePath = filePath; + this.lineNumber = lineNumber; + } + + public ImportException(String message, Throwable cause) { + super(message, cause); + this.filePath = null; + this.lineNumber = null; + } + + public ImportException(String message, String filePath, Throwable cause) { + super(message, cause); + this.filePath = filePath; + this.lineNumber = null; + } + + public String getFilePath() { + return filePath; + } + + public Integer getLineNumber() { + return lineNumber; + } + + @Override + public String getMessage() { + StringBuilder sb = new StringBuilder(super.getMessage()); + if (filePath != null) { + sb.append(" [文件: ").append(filePath).append("]"); + } + if (lineNumber != null) { + sb.append(" [行号: ").append(lineNumber).append("]"); + } + return sb.toString(); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/NetworkException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/NetworkException.java new file mode 100644 index 0000000..3a24c92 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/NetworkException.java @@ -0,0 +1,10 @@ +package com.example.datacollect.exception; + +public class NetworkException extends CrawlerException { + public NetworkException(String message) { + super(message); + } + public NetworkException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ParseException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ParseException.java new file mode 100644 index 0000000..09f9f20 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ParseException.java @@ -0,0 +1,10 @@ +package com.example.datacollect.exception; + +public class ParseException extends CrawlerException { + public ParseException(String message) { + super(message); + } + public ParseException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/UrlFormatException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/UrlFormatException.java new file mode 100644 index 0000000..f94380e --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/UrlFormatException.java @@ -0,0 +1,30 @@ +package com.example.datacollect.exception; + +public class UrlFormatException extends RuntimeException { + + private final String invalidUrl; + + public UrlFormatException(String message) { + super(message); + this.invalidUrl = null; + } + + public UrlFormatException(String message, String invalidUrl) { + super(message); + this.invalidUrl = invalidUrl; + } + + public UrlFormatException(String message, Throwable cause) { + super(message, cause); + this.invalidUrl = null; + } + + public UrlFormatException(String message, String invalidUrl, Throwable cause) { + super(message, cause); + this.invalidUrl = invalidUrl; + } + + public String getInvalidUrl() { + return invalidUrl; + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ValidationException.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ValidationException.java new file mode 100644 index 0000000..274ba18 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/exception/ValidationException.java @@ -0,0 +1,72 @@ +package com.example.datacollect.exception; + +public class ValidationException extends Exception { + private final String fieldName; + private final String invalidValue; + private final String validationRule; + + public ValidationException(String message) { + super(message); + this.fieldName = null; + this.invalidValue = null; + this.validationRule = null; + } + + public ValidationException(String message, String fieldName) { + super(message); + this.fieldName = fieldName; + this.invalidValue = null; + this.validationRule = null; + } + + public ValidationException(String message, String fieldName, String invalidValue) { + super(message); + this.fieldName = fieldName; + this.invalidValue = invalidValue; + this.validationRule = null; + } + + public ValidationException(String message, String fieldName, String invalidValue, String validationRule) { + super(message); + this.fieldName = fieldName; + this.invalidValue = invalidValue; + this.validationRule = validationRule; + } + + public ValidationException(String message, Throwable cause) { + super(message, cause); + this.fieldName = null; + this.invalidValue = null; + this.validationRule = null; + } + + public String getFieldName() { + return fieldName; + } + + public String getInvalidValue() { + return invalidValue; + } + + public String getValidationRule() { + return validationRule; + } + + @Override + public String getMessage() { + StringBuilder sb = new StringBuilder(super.getMessage()); + if (fieldName != null) { + sb.append(" [字段: ").append(fieldName).append("]"); + } + if (invalidValue != null) { + String displayValue = invalidValue.length() > 50 + ? invalidValue.substring(0, 50) + "..." + : invalidValue; + sb.append(" [值: ").append(displayValue).append("]"); + } + if (validationRule != null) { + sb.append(" [规则: ").append(validationRule).append("]"); + } + return sb.toString(); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/model/Article.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/model/Article.java new file mode 100644 index 0000000..c593e11 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/model/Article.java @@ -0,0 +1,99 @@ +package com.example.datacollect.model; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import java.time.LocalDateTime; + +public class Article { + private String title; + private String url; + private String content; + private LocalDateTime crawledAt; + + public Article() { + this.crawledAt = LocalDateTime.now(); + } + + public Article(String title, String url, String content) { + setTitle(title); + setUrl(url); + setContent(content); + this.crawledAt = LocalDateTime.now(); + } + + @JsonCreator + public Article(@JsonProperty("title") String title, + @JsonProperty("url") String url, + @JsonProperty("content") String content, + @JsonProperty("crawledAt") LocalDateTime crawledAt) { + setTitle(title); + setUrl(url); + setContent(content); + this.crawledAt = crawledAt != null ? crawledAt : LocalDateTime.now(); + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + if (title == null) { + throw new IllegalArgumentException("Title cannot be null"); + } + if (title.trim().isEmpty()) { + throw new IllegalArgumentException("Title cannot be empty"); + } + if (title.length() > 500) { + throw new IllegalArgumentException("Title cannot exceed 500 characters"); + } + this.title = title.trim(); + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + if (url == null) { + throw new IllegalArgumentException("URL cannot be null"); + } + if (url.trim().isEmpty()) { + throw new IllegalArgumentException("URL cannot be empty"); + } + if (!url.startsWith("http://") && !url.startsWith("https://")) { + throw new IllegalArgumentException("URL must start with http:// or https://"); + } + this.url = url.trim(); + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + if (content == null) { + this.content = ""; + } else if (content.length() > 10000) { + this.content = content.substring(0, 10000);/* 截断内容到 10000 个字符 */ + } else { + this.content = content; + } + } + + public LocalDateTime getCrawledAt() { + return crawledAt; + } + + public void setCrawledAt(LocalDateTime crawledAt) { + this.crawledAt = crawledAt; + } + + @Override + public String toString() { + return "Article{" + + "title='" + title + '\'' + + ", url='" + url + '\'' + + ", crawledAt='" + crawledAt + '\'' + + '}'; + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/ArticleRepository.java new file mode 100644 index 0000000..4b6b981 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/ArticleRepository.java @@ -0,0 +1,172 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import com.example.datacollect.util.JsonSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class ArticleRepository implements AutoCloseable { + private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); + private static final int MAX_TITLE_LENGTH = 500; + private static final int MAX_CONTENT_LENGTH = 10000; + + private final List
articles = new ArrayList<>(); + private final Set urlSet = new HashSet<>(); + + public void add(Article article) { + if (article == null) { + logger.error("Attempted to add null article"); + throw new IllegalArgumentException("Article cannot be null"); + } + + String title = article.getTitle(); + String url = article.getUrl(); + String content = article.getContent(); + + if (title == null || title.trim().isEmpty()) { + logger.warn("Attempted to add article with empty title"); + throw new IllegalArgumentException("Article title cannot be null or empty"); + } + + if (url == null || url.trim().isEmpty()) { + logger.warn("Attempted to add article with empty URL"); + throw new IllegalArgumentException("Article URL cannot be null or empty"); + } + + if (title.length() > MAX_TITLE_LENGTH) { + logger.warn("Article title too long: {} characters (max: {})", title.length(), MAX_TITLE_LENGTH); + throw new IllegalArgumentException("Article title exceeds maximum length of " + MAX_TITLE_LENGTH); + } + + if (content != null && content.length() > MAX_CONTENT_LENGTH) { + logger.warn("Article content too long: {} characters (max: {})", content.length(), MAX_CONTENT_LENGTH); + content = content.substring(0, MAX_CONTENT_LENGTH); + } + + if (!url.startsWith("http://") && !url.startsWith("https://")) { + logger.warn("Invalid URL format: {}", url); + throw new IllegalArgumentException("Article URL must start with http:// or https://"); + } + + if (urlSet.contains(url)) { + logger.warn("Duplicate article URL detected: {}", url); + return; + } + + Article validatedArticle = new Article(title.trim(), url.trim(), content != null ? content.trim() : ""); + articles.add(validatedArticle); + urlSet.add(url); + logger.debug("Added article: {}", title); + } + + public void addAll(List
articleList) { + if (articleList == null) { + logger.error("Attempted to add null article list"); + throw new IllegalArgumentException("Article list cannot be null"); + } + + int successCount = 0; + int skipCount = 0; + + for (Article article : articleList) { + if (article != null) { + try { + add(article); + successCount++; + } catch (IllegalArgumentException e) { + logger.warn("Skipped invalid article: {}", e.getMessage()); + skipCount++; + } + } else { + logger.warn("Skipped null article in list"); + skipCount++; + } + } + + logger.info("Added {} articles, skipped {} invalid articles", successCount, skipCount); + } + + public List
getAll() { + logger.debug("Retrieving all articles, total: {}", articles.size()); + return Collections.unmodifiableList(articles); + } + + public int size() { + return articles.size(); + } + + public void clear() { + int count = articles.size(); + articles.clear(); + urlSet.clear(); + logger.info("Cleared repository, removed {} articles", count); + } + + public void remove(Article article) { + if (article == null) { + logger.warn("Attempted to remove null article"); + return; + } + + String url = article.getUrl(); + if (url != null && urlSet.contains(url)) { + articles.remove(article); + urlSet.remove(url); + logger.debug("Removed article: {}", article.getTitle()); + } else { + logger.warn("Article not found in repository: {}", url); + } + } + + public Article findByUrl(String url) { + if (url == null || url.trim().isEmpty()) { + logger.debug("findByUrl called with null or empty URL"); + return null; + } + + for (Article article : articles) { + if (article.getUrl().equals(url)) { + logger.debug("Found article by URL: {}", url); + return article; + } + } + + logger.debug("No article found with URL: {}", url); + return null; + } + + public boolean containsUrl(String url) { + return url != null && urlSet.contains(url); + } + + public void saveToJson(String filePath) throws IOException { + JsonSerializer.writeToFile(articles, filePath); + logger.info("Saved {} articles to JSON file: {}", articles.size(), filePath); + } + + public void loadFromJson(String filePath) throws IOException { + List
loadedArticles = JsonSerializer.readListFromFile(filePath, Article.class); + addAll(loadedArticles); + logger.info("Loaded {} articles from JSON file: {}", loadedArticles.size(), filePath); + } + + public String toJsonString() { + return JsonSerializer.serialize(articles); + } + + public String toJsonStringCompact() { + return JsonSerializer.serializeCompact(articles); + } + + @Override + public void close() { + logger.debug("ArticleRepository closed"); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/PersistenceManager.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/PersistenceManager.java new file mode 100644 index 0000000..d85755d --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/repository/PersistenceManager.java @@ -0,0 +1,182 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.exception.ExportException; +import com.example.datacollect.exception.ImportException; +import com.example.datacollect.model.Article; +import com.example.datacollect.util.JsonExporter; +import com.example.datacollect.util.JsonImporter; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.atomic.AtomicBoolean; + +public class PersistenceManager implements AutoCloseable { + private static final Logger logger = LoggerFactory.getLogger(PersistenceManager.class); + private static final String DEFAULT_BACKUP_DIR = "data"; + private static final String DEFAULT_BACKUP_FILE = "articles.json"; + private static final String BACKUP_FILE_PATTERN = "articles_%s.json"; + private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss"); + + private final ObjectMapper objectMapper; + private final Path backupDir; + private final Path backupFile; + private final ArticleRepository repository; + private final AtomicBoolean autoSaveEnabled; + private final JsonExporter jsonExporter; + private final JsonImporter jsonImporter; + + public PersistenceManager(ArticleRepository repository) { + this(repository, DEFAULT_BACKUP_DIR); + } + + public PersistenceManager(ArticleRepository repository, String backupDir) { + this.repository = repository; + this.backupDir = Paths.get(backupDir); + this.backupFile = this.backupDir.resolve(DEFAULT_BACKUP_FILE); + this.autoSaveEnabled = new AtomicBoolean(true); + + this.objectMapper = new ObjectMapper(); + this.objectMapper.enable(SerializationFeature.INDENT_OUTPUT); + + this.jsonExporter = new JsonExporter(repository); + this.jsonImporter = new JsonImporter(repository); + + ensureBackupDirExists(); + logger.info("PersistenceManager initialized with backup directory: {}", backupDir); + } + + private void ensureBackupDirExists() { + try { + if (!Files.exists(backupDir)) { + Files.createDirectories(backupDir); + logger.debug("Created backup directory: {}", backupDir); + } + } catch (IOException e) { + logger.error("Failed to create backup directory: {}", e.getMessage(), e); + throw new RuntimeException("Failed to create backup directory", e); + } + } + + public void save() throws IOException { + if (!autoSaveEnabled.get()) { + logger.debug("Auto-save is disabled, skipping save"); + return; + } + + List
articles = repository.getAll(); + + try (BufferedWriter writer = Files.newBufferedWriter(backupFile, StandardCharsets.UTF_8)) { + objectMapper.writeValue(writer, articles); + logger.info("Successfully saved {} articles to {}", articles.size(), backupFile); + } + } + + public void load() throws IOException { + if (!Files.exists(backupFile)) { + logger.info("No backup file found at {}, starting fresh", backupFile); + return; + } + + try (var reader = Files.newBufferedReader(backupFile, StandardCharsets.UTF_8)) { + List
articles = objectMapper.readValue(reader, + objectMapper.getTypeFactory().constructCollectionType(List.class, Article.class)); + + if (articles != null && !articles.isEmpty()) { + repository.addAll(articles); + logger.info("Successfully loaded {} articles from {}", articles.size(), backupFile); + } + } + } + + public void exportTo(String filePath) throws IOException { + try { + JsonExporter.ExportOptions options = new JsonExporter.ExportOptions(); + options.setMode(JsonExporter.ExportMode.MINIMAL); + options.setIncludeMetadata(true); + jsonExporter.exportToFile(Paths.get(filePath), options); + } catch (ExportException e) { + throw new IOException("Export failed: " + e.getMessage(), e); + } + } + + public void importFrom(String filePath) throws IOException { + try { + JsonImporter.ImportOptions options = new JsonImporter.ImportOptions(); + options.setDuplicateStrategy(JsonImporter.DuplicateStrategy.SKIP); + jsonImporter.importFromFile(Paths.get(filePath), options); + } catch (ImportException e) { + throw new IOException("Import failed: " + e.getMessage(), e); + } + } + + public void createSnapshot() throws IOException { + String timestamp = LocalDateTime.now().format(DATE_FORMATTER); + Path snapshotFile = backupDir.resolve(String.format(BACKUP_FILE_PATTERN, timestamp)); + + try { + JsonExporter.ExportOptions options = new JsonExporter.ExportOptions(); + options.setMode(JsonExporter.ExportMode.STANDARD); + options.setIncludeMetadata(true); + jsonExporter.exportToFile(snapshotFile, options); + logger.info("Created snapshot: {} ({} articles)", snapshotFile, repository.size()); + } catch (ExportException e) { + throw new IOException("Failed to create snapshot: " + e.getMessage(), e); + } + } + + public List listSnapshots() throws IOException { + List snapshots = new ArrayList<>(); + + if (Files.exists(backupDir)) { + try (var stream = Files.list(backupDir)) { + stream.filter(path -> { + String fileName = path.getFileName().toString(); + return fileName.startsWith("articles_") && fileName.endsWith(".json") && !fileName.equals(DEFAULT_BACKUP_FILE); + }).forEach(path -> snapshots.add(path.toString())); + } + } + + return snapshots; + } + + public void setAutoSaveEnabled(boolean enabled) { + autoSaveEnabled.set(enabled); + logger.info("Auto-save {} {}", enabled ? "enabled" : "disabled"); + } + + public boolean isAutoSaveEnabled() { + return autoSaveEnabled.get(); + } + + public String getBackupFilePath() { + return backupFile.toString(); + } + + public JsonImporter.ImportResult importWithReport(String filePath) throws ImportException { + JsonImporter.ImportOptions options = new JsonImporter.ImportOptions(); + options.setDuplicateStrategy(JsonImporter.DuplicateStrategy.SKIP); + return jsonImporter.importFromFile(Paths.get(filePath), options); + } + + @Override + public void close() { + try { + save(); + logger.info("PersistenceManager closed, data saved"); + } catch (IOException e) { + logger.error("Failed to save data on close: {}", e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java new file mode 100644 index 0000000..ed69e19 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java @@ -0,0 +1,11 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import java.util.List; + +public interface CrawlStrategy { + List
parse(String url, Document doc) throws ParseException; + boolean supports(String url); +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CsdnStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CsdnStrategy.java new file mode 100644 index 0000000..0635236 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/CsdnStrategy.java @@ -0,0 +1,115 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +public class CsdnStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(CsdnStrategy.class); + + @Override + public boolean supports(String url) { + return url.contains("csdn.net"); + } + + @Override + public List
parse(String url, Document doc) throws ParseException { + logger.info("Starting to parse CSDN: {}", url); + List
articles = new ArrayList<>(); + Set seenUrls = new HashSet<>(); + + try { + Elements links = doc.select("a[href*='/article/details/']"); + logger.debug("Found {} article links", links.size()); + + if (links.isEmpty()) { + links = doc.select("a[href*='csdn.net/article/']"); + logger.debug("Trying alternative selector, found {} items", links.size()); + } + + if (links.isEmpty()) { + links = doc.select("a.title, a.article-title, .article-item a, .list-item a"); + logger.debug("Trying fallback selectors, found {} items", links.size()); + } + + for (Element link : links) { + try { + String href = link.attr("href"); + if (href == null || href.isEmpty()) { + continue; + } + + String articleUrl = link.attr("abs:href"); + if (articleUrl == null || articleUrl.isEmpty()) { + if (!href.startsWith("http")) { + if (!href.startsWith("//")) { + articleUrl = "https://" + (href.startsWith("/") ? "" : "/") + href; + } else { + articleUrl = "https:" + href; + } + } else { + articleUrl = href; + } + } + + if (!articleUrl.contains("csdn.net")) { + continue; + } + + if (seenUrls.contains(articleUrl)) { + continue; + } + seenUrls.add(articleUrl); + + String title = link.text().trim(); + + if (title.isEmpty() || title.length() < 5) { + Element titleEl = link.selectFirst("span, h3, h4, .title"); + if (titleEl != null) { + title = titleEl.text().trim(); + } + } + + if (title.isEmpty() || title.length() < 5) { + continue; + } + + String content = ""; + Element parent = link.parent(); + if (parent != null) { + Element desc = parent.selectFirst("p.description, .desc, .summary"); + if (desc != null) { + content = desc.text().trim(); + } + } + + Article article = new Article(title, articleUrl, content); + articles.add(article); + logger.debug("Parsed article: {}", title); + + } catch (Exception e) { + logger.debug("Skipping link due to error: {}", e.getMessage()); + } + } + + if (articles.isEmpty()) { + logger.warn("No articles found. CSDN page structure may have changed."); + } + + logger.info("Successfully parsed {} articles from CSDN", articles.size()); + return articles; + } catch (Exception e) { + logger.error("Failed to parse CSDN page: {}", e.getMessage(), e); + throw new ParseException("Failed to parse CSDN: " + e.getMessage(), e); + } + } +} \ No newline at end of file diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java new file mode 100644 index 0000000..6892510 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java @@ -0,0 +1,77 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.List; + +/* HNU News 策略 +- 添加 logger 成员 +- 添加异常处理 +- 实现防御性编程 */ +public class HnuNewsStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(HnuNewsStrategy.class); + + @Override + public boolean supports(String url) { + return url.contains("news.hnu.edu.cn");/* 支持 HNU News 网站 */ + } + + @Override + public List
parse(String url, Document doc) throws ParseException { + logger.info("Starting to parse HNU News: {}", url); + List
articles = new ArrayList<>();/* 存储储解析后的文章 */ + + try { + Elements listItems = doc.select("ul.list11 li");/* 选择文章列表项 */ + logger.debug("Found {} list items", listItems.size());/* 记录找到的列表项数量 */ + + for (Element li : listItems) { + try { + Element link = li.selectFirst("a");/* 选择列表项中的链接 */ + if (link == null) { + logger.warn("No link found in list item");/* 记录未找到链接 */ + continue; + } + + String articleUrl = link.attr("href");/* 获取链接的 href 属性值 */ + if (!articleUrl.startsWith("http")) { + articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", "");/* 补全相对路径 */ + } + + String title = "";/* 存储文章标题 */ + Element titleEl = link.selectFirst("h4.l2.h4s2");/* 选择标题元素 */ + if (titleEl != null) { + title = titleEl.text().trim();/* 提取标题文本并移除首尾空格 */ + } + + String content = "";/* 存储文章内容 */ + Element contentEl = link.selectFirst("p.l3.ps3");/* 选择内容元素 */ + if (contentEl != null) { + content = contentEl.text().trim();/* 提取内容文本并移除首尾空格 */ + } + + if (!title.isEmpty()) { + Article article = new Article(title, articleUrl, content);/* 创建文章对象 */ + articles.add(article);/* 将文章添加到列表 */ + } else { + logger.warn("Empty title found, skipping article"); + } + } catch (Exception e) { + logger.error("Error parsing individual article: {}", e.getMessage()); + } + } + + logger.info("Successfully parsed {} articles from HNU News", articles.size()); + return articles; + } catch (Exception e) { + logger.error("Failed to parse HNU News page: {}", e.getMessage(), e); + throw new ParseException("Failed to parse HNU News: " + e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java new file mode 100644 index 0000000..eb25935 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/PeopleStrategy.java @@ -0,0 +1,83 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.List; +/* 人民网策略类 */ +public class PeopleStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(PeopleStrategy.class); + + @Override + public boolean supports(String url) { + return url.contains("people.com.cn");/* 检查URL是否包含people.com.cn */ + } + + @Override + public List
parse(String url, Document doc) throws ParseException { + logger.info("Starting to parse People's Daily News: {}", url); + List
articles = new ArrayList<>();/* 初始化文章列表 */ + + try { + Elements newsItems = doc.select("div.w1000, div.news-item, li.list_item");/* 选择新闻容器 */ + logger.debug("Found {} news containers", newsItems.size()); + + if (newsItems.isEmpty()) { + newsItems = doc.select("a[href*='/n1/']");/* 选择替代选择器 */ + logger.debug("Trying alternative selector, found {} items", newsItems.size()); + } + + for (Element item : newsItems) { + try { + Element link = item.selectFirst("a");/* 选择链接元素 */ + if (link == null) { + link = item.tagName().equals("a") ? item : null;/* 检查是否为链接元素 */ + } + + if (link == null) { + logger.warn("No link found in news item"); + continue; + } + + String articleUrl = link.attr("href");/* 获取链接URL */ + if (!articleUrl.startsWith("http")) {/* 检查是否为绝对URL */ + if (articleUrl.startsWith("/")) { + articleUrl = "https://www.people.com.cn" + articleUrl; + } else { + articleUrl = "https://www.people.com.cn/" + articleUrl; + } + } + + String title = link.text().trim();/* 获取标题文本 */ + + String content = "";/* 初始化内容文本 */ + Element contentEl = item.selectFirst("p, div.ed, div.summary");/* 选择内容元素 */ + if (contentEl != null) { + content = contentEl.text().trim();/* 获取内容文本 */ + } + + if (!title.isEmpty() && title.length() > 5) { + Article article = new Article(title, articleUrl, content);/* 创建文章对象 */ + articles.add(article);/* 添加文章到列表 */ + logger.debug("Parsed article: {}", title);/* 记录解析文章 */ + } else { + logger.warn("Invalid title found, skipping article");/* 记录无效标题 */ + } + } catch (Exception e) { + logger.error("Error parsing individual article: {}", e.getMessage()); + } + } + + logger.info("Successfully parsed {} articles from People's Daily News", articles.size()); + return articles; + } catch (Exception e) { + logger.error("Failed to parse People's Daily News page: {}", e.getMessage(), e); + throw new ParseException("Failed to parse People's Daily News: " + e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/StrategyFactory.java new file mode 100644 index 0000000..31554d4 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/StrategyFactory.java @@ -0,0 +1,35 @@ +package com.example.datacollect.strategy; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.List; + +public class StrategyFactory { + private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class); + private final List strategies = new ArrayList<>(); + + public StrategyFactory() { + strategies.add(new HnuNewsStrategy()); + strategies.add(new YouthStrategy()); + strategies.add(new PeopleStrategy()); + strategies.add(new CsdnStrategy()); + logger.info("Initialized StrategyFactory with {} strategies", strategies.size()); + } + + public CrawlStrategy getStrategy(String url) { + for (CrawlStrategy s : strategies) { + if (s.supports(url)) { + logger.debug("Found strategy {} for URL: {}", s.getClass().getSimpleName(), url); + return s; + } + } + logger.warn("No strategy found for URL: {}", url); + return null; + } + + public void register(CrawlStrategy strategy) { + strategies.add(strategy); + logger.info("Registered new strategy: {}", strategy.getClass().getSimpleName()); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/YouthStrategy.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/YouthStrategy.java new file mode 100644 index 0000000..946cdc3 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/strategy/YouthStrategy.java @@ -0,0 +1,112 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.List; +/* 青年网新闻解析策略*/ +public class YouthStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(YouthStrategy.class); + + @Override + public boolean supports(String url) { + return url.contains("youth.cn");/* 检查URL是否包含青年网域名 */ + } + + @Override + public List
parse(String url, Document doc) throws ParseException { + logger.info("Starting to parse Youth News: {}", url); + List
articles = new ArrayList<>(); + + try { + Elements newsItems = doc.select("div.news-item, div.article-item, li.news-list-item, div.list-item, ul.list li, .news-list li"); + logger.debug("Found {} news items with primary selectors", newsItems.size()); + + if (newsItems.isEmpty()) { + newsItems = doc.select("a[href*='/n1/'], a[href*='/gn/'], a[href*='/qy/'], a[href*='/jj/']"); + logger.debug("Trying alternative selector (news category links), found {} items", newsItems.size()); + } + + if (newsItems.isEmpty()) { + newsItems = doc.select("a[href$='.html']"); + logger.debug("Trying fallback selector (html links), found {} items", newsItems.size()); + } + + for (Element item : newsItems) { + try { + Element link = item.selectFirst("a"); + if (link == null) { + link = item.tagName().equals("a") ? item : null; + } + + if (link == null) { + logger.debug("No link found in item, skipping"); + continue; + } + + String articleUrl = link.attr("href"); + + if (!articleUrl.startsWith("http")) { + if (articleUrl.startsWith("/")) { + articleUrl = "https://www.youth.cn" + articleUrl; + } else { + articleUrl = "https://www.youth.cn/" + articleUrl; + } + } + + String title = link.text().trim(); + + if (title.isEmpty()) { + Element titleEl = link.selectFirst("span, h3, h4, .title"); + if (titleEl != null) { + title = titleEl.text().trim(); + } + } + + if (title.isEmpty()) { + Element parentTitle = item.selectFirst("span, h3, h4, .title, .news-title"); + if (parentTitle != null) { + title = parentTitle.text().trim(); + } + } + + if (title.isEmpty()) { + logger.debug("Empty title found, skipping"); + continue; + } + + String content = ""; + Element contentEl = item.selectFirst("p.summary, p.desc, div.brief, .summary, .desc"); + if (contentEl != null) { + content = contentEl.text().trim(); + } + + if (!title.isEmpty() && title.length() > 5) { + Article article = new Article(title, articleUrl, content); + articles.add(article); + logger.debug("Parsed article: {}", title); + } else { + logger.debug("Invalid title found (length: {}), skipping article", title.length()); + } + } catch (Exception e) { + logger.debug("Error parsing individual article: {}", e.getMessage()); + } + } + + if (articles.isEmpty()) { + logger.warn("No articles found. Youth.cn page structure may have changed."); + } + + logger.info("Successfully parsed {} articles from Youth News", articles.size()); + return articles; + } catch (Exception e) { + logger.error("Failed to parse Youth News page: {}", e.getMessage(), e); + throw new ParseException("Failed to parse Youth News: " + e.getMessage(), e); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonExporter.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonExporter.java new file mode 100644 index 0000000..705fcaf --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonExporter.java @@ -0,0 +1,261 @@ +package com.example.datacollect.util; + +import com.example.datacollect.exception.ExportException; +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.*; +import java.util.stream.Collectors; + +public class JsonExporter { + private static final Logger logger = LoggerFactory.getLogger(JsonExporter.class); + private static final String VERSION = "1.0"; + private static final DateTimeFormatter EXPORT_TIME_FORMAT = DateTimeFormatter.ISO_LOCAL_DATE_TIME; + + public enum ExportMode { + STANDARD, + COMPACT, + MINIMAL + } + + public static class ExportOptions { + private ExportMode mode = ExportMode.STANDARD; + private String filterKeyword; + private LocalDateTime startDate; + private LocalDateTime endDate; + private boolean includeMetadata = true; + + public ExportOptions() {} + + public ExportMode getMode() { + return mode; + } + + public void setMode(ExportMode mode) { + this.mode = mode; + } + + public String getFilterKeyword() { + return filterKeyword; + } + + public void setFilterKeyword(String filterKeyword) { + this.filterKeyword = filterKeyword; + } + + public LocalDateTime getStartDate() { + return startDate; + } + + public void setStartDate(LocalDateTime startDate) { + this.startDate = startDate; + } + + public LocalDateTime getEndDate() { + return endDate; + } + + public void setEndDate(LocalDateTime endDate) { + this.endDate = endDate; + } + + public boolean isIncludeMetadata() { + return includeMetadata; + } + + public void setIncludeMetadata(boolean includeMetadata) { + this.includeMetadata = includeMetadata; + } + } + + public static class ExportMetadata { + private String exportTime; + private int totalCount; + private String source; + private String exportMode; + private String version; + + public ExportMetadata() {} + + public String getExportTime() { + return exportTime; + } + + public void setExportTime(String exportTime) { + this.exportTime = exportTime; + } + + public int getTotalCount() { + return totalCount; + } + + public void setTotalCount(int totalCount) { + this.totalCount = totalCount; + } + + public String getSource() { + return source; + } + + public void setSource(String source) { + this.source = source; + } + + public String getExportMode() { + return exportMode; + } + + public void setExportMode(String exportMode) { + this.exportMode = exportMode; + } + + public String getVersion() { + return version; + } + + public void setVersion(String version) { + this.version = version; + } + } + + private final ArticleRepository repository; + private final ObjectMapper objectMapper; + + public JsonExporter(ArticleRepository repository) { + this.repository = repository; + this.objectMapper = new ObjectMapper(); + } + + public void exportToFile(Path targetPath) throws ExportException { + exportToFile(targetPath, new ExportOptions()); + } + + public void exportToFile(Path targetPath, ExportOptions options) throws ExportException { + logger.info("开始导出到文件: {}, 模式: {}", targetPath, options.getMode()); + + validateTargetPath(targetPath); + + try { + List
articles = getFilteredArticles(options); + logger.debug("过滤后待导出文章数: {}", articles.size()); + + String json = generateJson(articles, options); + + try (BufferedWriter writer = Files.newBufferedWriter(targetPath, StandardCharsets.UTF_8)) { + writer.write(json); + } + + logger.info("成功导出 {} 篇文章到: {}", articles.size(), targetPath); + } catch (IOException e) { + logger.error("导出文件失败: {}", e.getMessage(), e); + throw new ExportException("无法写入导出文件: " + e.getMessage(), targetPath.toString(), e); + } + } + + public String exportToString() throws ExportException { + return exportToString(new ExportOptions()); + } + + public String exportToString(ExportOptions options) throws ExportException { + List
articles = getFilteredArticles(options); + return generateJson(articles, options); + } + + private List
getFilteredArticles(ExportOptions options) { + List
articles = repository.getAll(); + + if (options.getFilterKeyword() != null && !options.getFilterKeyword().trim().isEmpty()) { + String keyword = options.getFilterKeyword().toLowerCase(); + articles = articles.stream() + .filter(a -> a.getTitle().toLowerCase().contains(keyword) + || a.getContent().toLowerCase().contains(keyword)) + .collect(Collectors.toList()); + logger.debug("关键词过滤后剩余文章数: {}", articles.size()); + } + + return articles; + } + + private String generateJson(List
articles, ExportOptions options) throws ExportException { + try { + Map output = new LinkedHashMap<>(); + + if (options.isIncludeMetadata() && options.getMode() != ExportMode.MINIMAL) { + ExportMetadata metadata = new ExportMetadata(); + metadata.setExportTime(LocalDateTime.now().format(EXPORT_TIME_FORMAT)); + metadata.setTotalCount(articles.size()); + metadata.setSource("CLI Crawler v" + VERSION); + metadata.setExportMode(options.getMode().name()); + metadata.setVersion(VERSION); + output.put("metadata", metadata); + } + + output.put("articles", articles); + + if (options.getMode() == ExportMode.STANDARD) { + return objectMapper.writerWithDefaultPrettyPrinter().writeValueAsString(output); + } else { + return objectMapper.writeValueAsString(output); + } + } catch (Exception e) { + logger.error("生成JSON失败: {}", e.getMessage(), e); + throw new ExportException("无法生成JSON: " + e.getMessage(), e); + } + } + + private void validateTargetPath(Path targetPath) throws ExportException { + if (targetPath == null) { + throw new ExportException("导出路径不能为空"); + } + + Path parent = targetPath.getParent(); + if (parent != null && !Files.exists(parent)) { + try { + Files.createDirectories(parent); + logger.info("创建导出目录: {}", parent); + } catch (IOException e) { + throw new ExportException("无法创建导出目录: " + parent, e); + } + } + } + + public List exportWithSnapshots(String baseDir) throws ExportException { + logger.info("开始批量导出快照到目录: {}", baseDir); + + List exportedFiles = new ArrayList<>(); + Path basePath = Path.of(baseDir); + + try { + if (!Files.exists(basePath)) { + Files.createDirectories(basePath); + } + + ExportOptions standardOptions = new ExportOptions(); + standardOptions.setMode(ExportMode.STANDARD); + standardOptions.setIncludeMetadata(true); + + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); + Path snapshotPath = basePath.resolve("snapshot_" + timestamp + ".json"); + exportToFile(snapshotPath, standardOptions); + exportedFiles.add(snapshotPath); + + logger.info("批量导出完成,共导出 {} 个文件", exportedFiles.size()); + } catch (Exception e) { + logger.error("批量导出失败: {}", e.getMessage(), e); + throw new ExportException("批量导出失败: " + e.getMessage(), e); + } + + return exportedFiles; + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonImporter.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonImporter.java new file mode 100644 index 0000000..740083a --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonImporter.java @@ -0,0 +1,386 @@ +package com.example.datacollect.util; + +import com.example.datacollect.exception.DuplicateArticleException; +import com.example.datacollect.exception.ImportException; +import com.example.datacollect.exception.ValidationException; +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.fasterxml.jackson.databind.ObjectMapper; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.BufferedReader; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.*; +import java.time.LocalDateTime; +import java.util.regex.Pattern; + +public class JsonImporter { + private static final Logger logger = LoggerFactory.getLogger(JsonImporter.class); + + private static final Pattern URL_PATTERN = Pattern.compile("^https?://.*"); + private static final int MAX_TITLE_LENGTH = 500; + private static final int MAX_CONTENT_LENGTH = 10000; + + public enum DuplicateStrategy { + SKIP, + OVERWRITE, + ERROR + } + + public static class ImportOptions { + private DuplicateStrategy duplicateStrategy = DuplicateStrategy.SKIP; + private boolean validateUrl = true; + private boolean validateTitle = true; + private boolean skipInvalid = true; + private int maxContentLength = MAX_CONTENT_LENGTH; + + public ImportOptions() {} + + public DuplicateStrategy getDuplicateStrategy() { + return duplicateStrategy; + } + + public void setDuplicateStrategy(DuplicateStrategy duplicateStrategy) { + this.duplicateStrategy = duplicateStrategy; + } + + public boolean isValidateUrl() { + return validateUrl; + } + + public void setValidateUrl(boolean validateUrl) { + this.validateUrl = validateUrl; + } + + public boolean isValidateTitle() { + return validateTitle; + } + + public void setValidateTitle(boolean validateTitle) { + this.validateTitle = validateTitle; + } + + public boolean isSkipInvalid() { + return skipInvalid; + } + + public void setSkipInvalid(boolean skipInvalid) { + this.skipInvalid = skipInvalid; + } + + public int getMaxContentLength() { + return maxContentLength; + } + + public void setMaxContentLength(int maxContentLength) { + this.maxContentLength = maxContentLength; + } + } + + public static class ImportResult { + private int totalFound; + private int imported; + private int skipped; + private int invalid; + private int overwritten; + private List errors; + private List warnings; + + public ImportResult() { + this.errors = new ArrayList<>(); + this.warnings = new ArrayList<>(); + } + + public int getTotalFound() { + return totalFound; + } + + public void setTotalFound(int totalFound) { + this.totalFound = totalFound; + } + + public int getImported() { + return imported; + } + + public void setImported(int imported) { + this.imported = imported; + } + + public int getSkipped() { + return skipped; + } + + public void setSkipped(int skipped) { + this.skipped = skipped; + } + + public int getInvalid() { + return invalid; + } + + public void setInvalid(int invalid) { + this.invalid = invalid; + } + + public int getOverwritten() { + return overwritten; + } + + public void setOverwritten(int overwritten) { + this.overwritten = overwritten; + } + + public List getErrors() { + return errors; + } + + public void addError(String error) { + this.errors.add(error); + } + + public List getWarnings() { + return warnings; + } + + public void addWarning(String warning) { + this.warnings.add(warning); + } + + public String getSummary() { + return String.format( + "导入完成: 总共找到=%d, 成功导入=%d, 跳过=%d, 无效=%d, 覆盖=%d, 错误=%d", + totalFound, imported, skipped, invalid, overwritten, errors.size() + ); + } + } + + private final ArticleRepository repository; + private final ObjectMapper objectMapper; + + public JsonImporter(ArticleRepository repository) { + this.repository = repository; + this.objectMapper = new ObjectMapper(); + } + + public ImportResult importFromFile(Path sourcePath) throws ImportException { + return importFromFile(sourcePath, new ImportOptions()); + } + + public ImportResult importFromFile(Path sourcePath, ImportOptions options) throws ImportException { + logger.info("开始从文件导入: {}", sourcePath); + validateSourcePath(sourcePath); + + ImportResult result = new ImportResult(); + + try { + String content = readFileContent(sourcePath); + List
articles = parseArticles(content, result); + result.setTotalFound(articles.size()); + + logger.debug("解析到 {} 篇文章", articles.size()); + + for (int i = 0; i < articles.size(); i++) { + Article article = articles.get(i); + try { + processArticle(article, options, result, i); + } catch (ValidationException e) { + logger.warn("文章验证失败 [位置 {}]: {}", i, e.getMessage()); + result.addError("无效文章 at index " + i + ": " + e.getMessage()); + result.setInvalid(result.getInvalid() + 1); + if (!options.isSkipInvalid()) { + throw new ImportException("文章验证失败: " + e.getMessage(), sourcePath.toString(), i); + } + } catch (DuplicateArticleException e) { + logger.warn("重复文章 [位置 {}]: {}", i, e.getMessage()); + result.setSkipped(result.getSkipped() + 1); + } + } + + logger.info("导入完成: {}", result.getSummary()); + + } catch (IOException e) { + logger.error("读取文件失败: {}", e.getMessage(), e); + throw new ImportException("无法读取导入文件: " + e.getMessage(), sourcePath.toString(), e); + } catch (ImportException e) { + throw e; + } catch (Exception e) { + logger.error("导入过程出错: {}", e.getMessage(), e); + throw new ImportException("导入失败: " + e.getMessage(), sourcePath.toString(), e); + } + + return result; + } + + public List
parseArticles(String json) throws ImportException { + ImportResult result = new ImportResult(); + return parseArticles(json, result); + } + + private List
parseArticles(String json, ImportResult result) throws ImportException { + try { + Map data = objectMapper.readValue(json, Map.class); + + List articlesList = null; + if (data.containsKey("articles")) { + articlesList = (List) data.get("articles"); + } else if (data.containsKey("data")) { + articlesList = (List) data.get("data"); + } else if (data instanceof List) { + articlesList = (List) data; + } + + if (articlesList == null) { + throw new ImportException("JSON格式错误:未找到 'articles' 或 'data' 字段"); + } + + List
articles = new ArrayList<>(); + for (int i = 0; i < articlesList.size(); i++) { + try { + Object item = articlesList.get(i); + if (item instanceof Map) { + Article article = mapToArticle((Map) item, i); + articles.add(article); + } + } catch (Exception e) { + logger.warn("解析第 {} 篇文章失败: {}", i, e.getMessage()); + result.addError("解析失败 at index " + i + ": " + e.getMessage()); + } + } + + return articles; + } catch (ImportException e) { + throw e; + } catch (Exception e) { + logger.error("JSON解析失败: {}", e.getMessage(), e); + throw new ImportException("JSON解析失败: " + e.getMessage(), e); + } + } + + @SuppressWarnings("unchecked") + private Article mapToArticle(Map map, int index) throws ValidationException { + String title = (String) map.get("title"); + String url = (String) map.get("url"); + String content = (String) map.get("content"); + Object crawledAtObj = map.get("crawledAt"); + LocalDateTime crawledAt = null; + + if (crawledAtObj != null) { + try { + if (crawledAtObj instanceof String) { + crawledAt = LocalDateTime.parse((String) crawledAtObj); + } + } catch (Exception e) { + logger.warn("无法解析 crawledAt 字段: {}, 使用默认值", crawledAtObj); + } + } + + if (title == null || title.trim().isEmpty()) { + throw new ValidationException("标题不能为空", "title", null, "非空字符串"); + } + + if (url == null || url.trim().isEmpty()) { + throw new ValidationException("URL不能为空", "url", null, "非空字符串"); + } + + if (content == null) { + content = ""; + } + + return new Article(title.trim(), url.trim(), content.trim(), crawledAt); + } + + private void processArticle(Article article, ImportOptions options, ImportResult result, int index) + throws ValidationException, DuplicateArticleException { + + if (options.isValidateTitle() && article.getTitle().length() > MAX_TITLE_LENGTH) { + throw new ValidationException( + "标题过长: 最大" + MAX_TITLE_LENGTH + "字符", + "title", + article.getTitle(), + "长度 <= " + MAX_TITLE_LENGTH + ); + } + + if (options.isValidateUrl() && !URL_PATTERN.matcher(article.getUrl()).matches()) { + throw new ValidationException( + "URL格式无效: " + article.getUrl(), + "url", + article.getUrl(), + "必须以 http:// 或 https:// 开头" + ); + } + + Article existing = repository.findByUrl(article.getUrl()); + if (existing != null) { + switch (options.getDuplicateStrategy()) { + case SKIP: + logger.debug("跳过重复文章: {}", article.getUrl()); + throw new DuplicateArticleException("文章URL已存在: " + article.getUrl(), article.getUrl()); + + case OVERWRITE: + logger.debug("覆盖重复文章: {}", article.getUrl()); + repository.remove(existing); + repository.add(article); + result.setOverwritten(result.getOverwritten() + 1); + result.setImported(result.getImported() + 1); + return; + + case ERROR: + throw new DuplicateArticleException( + "发现重复URL: " + article.getUrl(), + article.getUrl(), + repository.getAll().indexOf(existing) + ); + } + } + + String content = article.getContent(); + if (content.length() > options.getMaxContentLength()) { + content = content.substring(0, options.getMaxContentLength()); + logger.debug("文章内容已截断到 {} 字符: {}", options.getMaxContentLength(), article.getTitle()); + } + + repository.add(article); + result.setImported(result.getImported() + 1); + logger.debug("成功导入文章: {}", article.getTitle()); + } + + private String readFileContent(Path sourcePath) throws IOException { + StringBuilder content = new StringBuilder(); + try (BufferedReader reader = Files.newBufferedReader(sourcePath, StandardCharsets.UTF_8)) { + String line; + while ((line = reader.readLine()) != null) { + content.append(line).append("\n"); + } + } + return content.toString(); + } + + private void validateSourcePath(Path sourcePath) throws ImportException { + if (sourcePath == null) { + throw new ImportException("导入路径不能为空"); + } + + if (!Files.exists(sourcePath)) { + throw new ImportException("导入文件不存在: " + sourcePath, sourcePath.toString()); + } + + if (!Files.isReadable(sourcePath)) { + throw new ImportException("文件不可读: " + sourcePath, sourcePath.toString()); + } + + try { + long size = Files.size(sourcePath); + if (size > 100 * 1024 * 1024) { + logger.warn("导入文件较大 ({} MB),处理可能较慢", size / (1024 * 1024)); + } + } catch (IOException e) { + logger.warn("无法获取文件大小: {}", e.getMessage()); + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonSerializer.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonSerializer.java new file mode 100644 index 0000000..c1f606b --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/JsonSerializer.java @@ -0,0 +1,81 @@ +package com.example.datacollect.util; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.SerializationFeature; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.File; +import java.io.IOException; +import java.util.List; + +public class JsonSerializer { + private static final Logger logger = LoggerFactory.getLogger(JsonSerializer.class); + private static final ObjectMapper objectMapper = new ObjectMapper(); + + static { + objectMapper.enable(SerializationFeature.INDENT_OUTPUT); + } + + private JsonSerializer() { + } + + public static String serialize(T obj) { + try { + return objectMapper.writeValueAsString(obj); + } catch (Exception e) { + logger.error("Failed to serialize object", e); + throw new RuntimeException("Failed to serialize object", e); + } + } + + public static String serializeCompact(T obj) { + try { + ObjectMapper compactMapper = new ObjectMapper(); + return compactMapper.writeValueAsString(obj); + } catch (Exception e) { + logger.error("Failed to serialize object (compact)", e); + throw new RuntimeException("Failed to serialize object", e); + } + } + + public static T deserialize(String json, Class clazz) { + try { + return objectMapper.readValue(json, clazz); + } catch (Exception e) { + logger.error("Failed to deserialize object", e); + throw new RuntimeException("Failed to deserialize object", e); + } + } + + public static List deserializeList(String json, Class clazz) { + try { + return objectMapper.readValue(json, + objectMapper.getTypeFactory().constructCollectionType(List.class, clazz)); + } catch (Exception e) { + logger.error("Failed to deserialize list", e); + throw new RuntimeException("Failed to deserialize list", e); + } + } + + public static void writeToFile(T obj, String filePath) throws IOException { + File file = new File(filePath); + objectMapper.writeValue(file, obj); + logger.debug("Successfully wrote object to file: {}", filePath); + } + + public static T readFromFile(String filePath, Class clazz) throws IOException { + File file = new File(filePath); + T obj = objectMapper.readValue(file, clazz); + logger.debug("Successfully read object from file: {}", filePath); + return obj; + } + + public static List readListFromFile(String filePath, Class clazz) throws IOException { + File file = new File(filePath); + List list = objectMapper.readValue(file, + objectMapper.getTypeFactory().constructCollectionType(List.class, clazz)); + logger.debug("Successfully read list from file: {}", filePath); + return list; + } +} \ No newline at end of file diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/RetryUtils.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/RetryUtils.java new file mode 100644 index 0000000..d749419 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/util/RetryUtils.java @@ -0,0 +1,39 @@ +package com.example.datacollect.util; + +import com.example.datacollect.exception.NetworkException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.concurrent.Callable; + +public class RetryUtils { + private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class); + + private static final int MAX_RETRIES = 3; + private static final long BASE_DELAY_MS = 500; + + public static T executeWithRetry(Callable task) throws NetworkException { + Exception lastException = null; + + for (int attempt = 0; attempt <= MAX_RETRIES; attempt++) { + try { + if (attempt > 0) { + long waitTime = BASE_DELAY_MS * (long) Math.pow(2, attempt - 1); + logger.info("重试 {}/{} 次,等待 {} ms", attempt, MAX_RETRIES, waitTime); + Thread.sleep(waitTime); + } + + return task.call(); + } catch (Exception e) { + lastException = e; + logger.warn("第 {} 次尝试失败: {}", attempt + 1, e.getMessage()); + + if (attempt < MAX_RETRIES) { + continue; + } + } + } + + logger.error("所有 {} 次重试均失败", MAX_RETRIES + 1); + throw new NetworkException("网络错误,已重试三次", lastException); + } +} diff --git a/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/view/ConsoleView.java b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/view/ConsoleView.java new file mode 100644 index 0000000..a26e19c --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/java/com/example/datacollect/view/ConsoleView.java @@ -0,0 +1,52 @@ +package com.example.datacollect.view; + +import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.List; +import java.util.Scanner; + +public class ConsoleView implements AutoCloseable { + private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class); + private static final String ANSI_RESET = "\u001B[0m"; + private static final String ANSI_GREEN = "\u001B[32m"; + private static final String ANSI_RED = "\u001B[31m"; + private static final String ANSI_BLUE = "\u001B[34m"; + + private final Scanner scanner = new Scanner(System.in); + + @Override + public void close() { + scanner.close();/* 关闭扫描器,释放资源 */ + logger.debug("ConsoleView closed"); + } + + public String readLine() { + System.out.print("> "); + String input = scanner.nextLine(); + return input;/* 返回用户输入 */ + } + + public void printSuccess(String msg) { + System.out.println(ANSI_GREEN + msg + ANSI_RESET); + } + + public void printError(String msg) { + System.out.println(ANSI_RED + msg + ANSI_RESET); + } + + public void printInfo(String msg) { + System.out.println(ANSI_BLUE + msg + ANSI_RESET); + } + + public void display(List
articles) { + if (articles.isEmpty()) { + printInfo("暂无文章,请先执行 crawl。"); + return; + } + for (int i = 0; i < articles.size(); i++) { + Article a = articles.get(i);/* 获取文章 */ + System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());/* 打印文章标题和URL */ + } + } +} diff --git a/project/java-cli-期末课程项目/src/main/resources/logback.xml b/project/java-cli-期末课程项目/src/main/resources/logback.xml new file mode 100644 index 0000000..221a083 --- /dev/null +++ b/project/java-cli-期末课程项目/src/main/resources/logback.xml @@ -0,0 +1,25 @@ + + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + logs/crawler.log + + logs/crawler.%d{yyyy-MM-dd}.log + 30 + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + diff --git a/project/java-cli-期末课程项目/target/classes/logback.xml b/project/java-cli-期末课程项目/target/classes/logback.xml new file mode 100644 index 0000000..221a083 --- /dev/null +++ b/project/java-cli-期末课程项目/target/classes/logback.xml @@ -0,0 +1,25 @@ + + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + logs/crawler.log + + logs/crawler.%d{yyyy-MM-dd}.log + 30 + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + diff --git a/project/java-cli-期末课程项目/target/maven-archiver/pom.properties b/project/java-cli-期末课程项目/target/maven-archiver/pom.properties new file mode 100644 index 0000000..5c1de34 --- /dev/null +++ b/project/java-cli-期末课程项目/target/maven-archiver/pom.properties @@ -0,0 +1,3 @@ +artifactId=datacollect-cli +groupId=com.example +version=0.1.0 diff --git a/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..e69de29 diff --git a/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..0ccfd6d --- /dev/null +++ b/project/java-cli-期末课程项目/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,32 @@ +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\repository\PersistenceManager.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ExitCommand.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\CrawlCommand.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ExportException.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ExportCommand.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ImportCommand.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ImportException.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\DuplicateArticleException.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\CrawlerException.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\Command.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\model\Article.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\PeopleStrategy.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\NetworkException.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\controller\CrawlerController.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\StrategyFactory.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\JsonImporter.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\RetryUtils.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\ListCommand.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\Main.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\UrlFormatException.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\JsonSerializer.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ParseException.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\YouthStrategy.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\strategy\CsdnStrategy.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\HelpCommand.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\repository\ArticleRepository.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\exception\ValidationException.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\view\ConsoleView.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\util\JsonExporter.java +C:\Users\27687\Desktop\java-cli-期末\src\main\java\com\example\datacollect\command\AnalyzeCommand.java diff --git a/project/java-cli-期末课程项目/test_crawler.ps1 b/project/java-cli-期末课程项目/test_crawler.ps1 new file mode 100644 index 0000000..3ad50ef --- /dev/null +++ b/project/java-cli-期末课程项目/test_crawler.ps1 @@ -0,0 +1,92 @@ +$ErrorActionPreference = "Continue" + +Write-Host "=== 测试 CLI 爬虫程序 ===" -ForegroundColor Cyan + +# 测试1: 启动程序并显示帮助 +Write-Host "`n1. 测试帮助命令..." -ForegroundColor Yellow +$helpOutput = echo "help" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 +if ($LASTEXITCODE -ne 0) { + Write-Host "帮助命令执行失败" -ForegroundColor Red + Write-Host $helpOutput +} else { + Write-Host "帮助命令执行成功" -ForegroundColor Green + Write-Host $helpOutput | Select-Object -First 15 +} + +# 测试2: 测试 list 命令(空列表) +Write-Host "`n2. 测试 list 命令(空列表)..." -ForegroundColor Yellow +$listOutput = echo "list" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 +if ($LASTEXITCODE -ne 0) { + Write-Host "list 命令执行失败" -ForegroundColor Red + Write-Host $listOutput +} else { + Write-Host "list 命令执行成功" -ForegroundColor Green +} + +# 测试3: 测试 Juejin 策略 +Write-Host "`n3. 测试 Juejin 策略..." -ForegroundColor Yellow +$juejinOutput = @("crawl https://juejin.cn/", "list", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 +if ($LASTEXITCODE -ne 0) { + Write-Host "Juejin 策略测试失败" -ForegroundColor Red + Write-Host $juejinOutput | Select-Object -Last 10 +} else { + $articleCount = ($juejinOutput | Select-String "Crawled" | ForEach-Object { $_.Line -replace "Crawled (\d+) articles\.", '$1' }) + Write-Host "Juejin 策略测试成功 - 爬取到 $articleCount 篇文章" -ForegroundColor Green +} + +# 测试4: 测试 HnuNews 策略 +Write-Host "`n4. 测试 HnuNews 策略..." -ForegroundColor Yellow +$hnuOutput = @("crawl https://news.hnu.edu.cn/", "list", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 +if ($LASTEXITCODE -ne 0) { + Write-Host "HnuNews 策略测试失败" -ForegroundColor Red + Write-Host $hnuOutput | Select-Object -Last 10 +} else { + $articleCount = ($hnuOutput | Select-String "Crawled" | ForEach-Object { $_.Line -replace "Crawled (\d+) articles\.", '$1' }) + Write-Host "HnuNews 策略测试成功 - 爬取到 $articleCount 篇文章" -ForegroundColor Green +} + +# 测试5: 测试导出功能 +Write-Host "`n5. 测试导出功能..." -ForegroundColor Yellow +$exportOutput = @("crawl https://juejin.cn/", "export test_export.json", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 +if (-not (Test-Path "test_export.json")) { + Write-Host "导出功能测试失败" -ForegroundColor Red + Write-Host $exportOutput | Select-Object -Last 10 +} else { + $fileSize = (Get-Item "test_export.json").Length + Write-Host "导出功能测试成功 - 文件大小: $fileSize 字节" -ForegroundColor Green + Remove-Item "test_export.json" -Force +} + +# 测试6: 测试导入功能 +Write-Host "`n6. 测试导入功能..." -ForegroundColor Yellow +@("crawl https://juejin.cn/", "export import_test.json", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 | Out-Null +$importOutput = @("import import_test.json", "list", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 +if ($LASTEXITCODE -ne 0) { + Write-Host "导入功能测试失败" -ForegroundColor Red + Write-Host $importOutput | Select-Object -Last 10 +} else { + Write-Host "导入功能测试成功" -ForegroundColor Green + Remove-Item "import_test.json" -Force +} + +# 测试7: 测试未知命令 +Write-Host "`n7. 测试未知命令处理..." -ForegroundColor Yellow +$unknownOutput = echo "unknown_command" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 +if ($unknownOutput -match "Unknown command") { + Write-Host "未知命令处理测试成功" -ForegroundColor Green +} else { + Write-Host "未知命令处理测试失败" -ForegroundColor Red +} + +# 测试8: 测试会话持久化(退出后重新启动) +Write-Host "`n8. 测试会话持久化..." -ForegroundColor Yellow +@("crawl https://juejin.cn/", "exit") | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 | Out-Null +$restoreOutput = echo "list" | java -jar target\datacollect-cli-0.1.0-jar-with-dependencies.jar 2>&1 +if ($restoreOutput -match "Loaded") { + Write-Host "会话持久化测试成功" -ForegroundColor Green +} else { + Write-Host "会话持久化测试失败" -ForegroundColor Red + Write-Host $restoreOutput | Select-Object -Last 5 +} + +Write-Host "`n=== 测试完成 ===" -ForegroundColor Cyan \ No newline at end of file diff --git a/project/java-cli-期末课程项目/test_export.json b/project/java-cli-期末课程项目/test_export.json new file mode 100644 index 0000000..a3ea8df --- /dev/null +++ b/project/java-cli-期末课程项目/test_export.json @@ -0,0 +1,17 @@ +[ { + "title" : "7月1日起施行 超龄劳动者迎来权益保障新规", + "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40727022.html", + "content" : "" +}, { + "title" : "经港珠澳大桥出入境港澳单牌车总量突破1000万辆次", + "url" : "http://gba.people.cn/n1/2026/0525/c42272-40726946.html", + "content" : "" +}, { + "title" : "外交部谈美伊谈判", + "url" : "http://world.people.com.cn/n1/2026/0525/c1002-40726926.html", + "content" : "" +}, { + "title" : "重庆发布今年首个地质灾害红色预警", + "url" : "http://society.people.com.cn/n1/2026/0525/c1008-40726849.html", + "content" : "" +} ] \ No newline at end of file diff --git a/project/java-cli-期末课程项目/test_import_export.ps1 b/project/java-cli-期末课程项目/test_import_export.ps1 new file mode 100644 index 0000000..c7074af --- /dev/null +++ b/project/java-cli-期末课程项目/test_import_export.ps1 @@ -0,0 +1,236 @@ +# Test Script for CLI Crawler - Data Import/Export Features +# This script automates the test sequence + +$ErrorActionPreference = "Stop" +$env:JAVA_HOME = "C:\Program Files\Java\latest\jdk-25" +$APP_JAR = "target\datacollect-cli-0.1.0-jar-with-dependencies.jar" +$TEST_EXPORT_FILE = "data\test_export.json" +$USERPROFILE_PATH = "$env:USERPROFILE\.datacollect" + +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "CLI Crawler - Import/Export Test Suite" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "" + +# Clean up function +function Clean-Up { + Write-Host "[CLEANUP] Removing old data files..." -ForegroundColor Yellow + if (Test-Path $USERPROFILE_PATH) { + Remove-Item "$USERPROFILE_PATH\*" -Force -Recurse -ErrorAction SilentlyContinue + } + if (Test-Path $TEST_EXPORT_FILE) { + Remove-Item $TEST_EXPORT_FILE -Force -ErrorAction SilentlyContinue + } +} + +# Run CLI command function +function Run-CLI { + param([string]$Commands) + $commandsArray = $Commands -split "`n" + foreach ($cmd in $commandsArray) { + $cmd = $cmd.Trim() + if ($cmd -ne "") { + Write-Host "[CLI] $cmd" -ForegroundColor Gray + $result = & java -jar $APP_JAR $cmd 2>&1 + Write-Host $result -ForegroundColor Green + Write-Host "" + } + } +} + +# Step 1: Initial Cleanup +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 1: Initial Cleanup" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Clean-Up +Write-Host "" + +# Step 2: Crawl some data +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 2: Crawl Data (CSDN)" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: crawl https://www.csdn.net/" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "crawl https://www.csdn.net/" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" +Start-Sleep -Seconds 2 + +# Step 3: List articles +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 3: List Articles" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: list" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "list" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" +Start-Sleep -Seconds 1 + +# Step 4: Export to JSON +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 4: Export to JSON" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: export data\test_export.json --format json" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "export data\test_export.json --format json" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" +Start-Sleep -Seconds 1 + +# Step 5: Check JSON file +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 5: Check Exported JSON File" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +if (Test-Path $TEST_EXPORT_FILE) { + Write-Host "[SUCCESS] JSON file created: $TEST_EXPORT_FILE" -ForegroundColor Green + Write-Host "" + Write-Host "JSON File Content Preview (first 1500 chars):" -ForegroundColor Cyan + $content = Get-Content $TEST_EXPORT_FILE -Raw + if ($content.Length -gt 1500) { + Write-Host ($content.Substring(0, 1500) + "...") -ForegroundColor White + } else { + Write-Host $content -ForegroundColor White + } + + # Check for crawledAt field + if ($content -match "crawledAt") { + Write-Host "" + Write-Host "[SUCCESS] crawledAt field found in JSON!" -ForegroundColor Green + } else { + Write-Host "" + Write-Host "[ERROR] crawledAt field NOT found in JSON!" -ForegroundColor Red + } + + # Check for metadata + if ($content -match "metadata") { + Write-Host "[SUCCESS] metadata field found in JSON!" -ForegroundColor Green + } else { + Write-Host "[WARNING] metadata field NOT found in JSON!" -ForegroundColor Yellow + } +} else { + Write-Host "[ERROR] JSON file NOT created!" -ForegroundColor Red +} +Write-Host "" + +# Step 6: Get article count before clear +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 6: Get Article Count Before Clear" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: list" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "list" 2>&1 +Write-Host $result -ForegroundColor Green + +# Count articles +$articleCount = 0 +$lines = $result -split "`n" +foreach ($line in $lines) { + if ($line -match "Total: (\d+) articles") { + $articleCount = [int]$matches[1] + break + } +} +Write-Host "" +Write-Host "Current article count: $articleCount" -ForegroundColor Cyan +Write-Host "" +Start-Sleep -Seconds 1 + +# Step 7: Clear all data +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 7: Clear All Data" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: clear" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "clear" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" +Start-Sleep -Seconds 1 + +# Step 8: Verify data is cleared +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 8: Verify Data Cleared" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: list" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "list" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" +Start-Sleep -Seconds 1 + +# Step 9: Import data from JSON +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 9: Import Data from JSON" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: import data\test_export.json" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "import data\test_export.json" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" +Start-Sleep -Seconds 1 + +# Step 10: Verify data restored +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 10: Verify Data Restored" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: list" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "list" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" + +# Count articles after import +$articleCountAfterImport = 0 +$lines = $result -split "`n" +foreach ($line in $lines) { + if ($line -match "Total: (\d+) articles") { + $articleCountAfterImport = [int]$matches[1] + break + } +} + +if ($articleCountAfterImport -eq $articleCount) { + Write-Host "[SUCCESS] Data restored successfully! Article count matches: $articleCountAfterImport" -ForegroundColor Green +} else { + Write-Host "[WARNING] Article count mismatch. Before: $articleCount, After: $articleCountAfterImport" -ForegroundColor Yellow +} +Write-Host "" + +# Step 11: Test duplicate import (should not duplicate) +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 11: Test Duplicate Import (No Duplication)" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: import data\test_export.json (second time)" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "import data\test_export.json" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" +Start-Sleep -Seconds 1 + +# Step 12: Final article count +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "STEP 12: Final Article Count" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Command: list" -ForegroundColor Yellow +$result = & java -jar $APP_JAR "list" 2>&1 +Write-Host $result -ForegroundColor Green +Write-Host "" + +# Final count +$finalCount = 0 +$lines = $result -split "`n" +foreach ($line in $lines) { + if ($line -match "Total: (\d+) articles") { + $finalCount = [int]$matches[1] + break + } +} + +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "TEST SUMMARY" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "Articles after first import: $articleCountAfterImport" -ForegroundColor White +Write-Host "Articles after second import: $finalCount" -ForegroundColor White +Write-Host "" + +if ($finalCount -eq $articleCountAfterImport) { + Write-Host "[SUCCESS] Duplicate import correctly skipped! No duplication occurred." -ForegroundColor Green +} else { + Write-Host "[ERROR] Duplicate import created duplicates! Count increased from $articleCountAfterImport to $finalCount" -ForegroundColor Red +} + +Write-Host "" +Write-Host "========================================" -ForegroundColor Cyan +Write-Host "ALL TESTS COMPLETED" -ForegroundColor Cyan +Write-Host "========================================" -ForegroundColor Cyan diff --git a/project/java-cli-期末课程项目/test_input.txt b/project/java-cli-期末课程项目/test_input.txt new file mode 100644 index 0000000..abe3e5f --- /dev/null +++ b/project/java-cli-期末课程项目/test_input.txt @@ -0,0 +1,2 @@ +export data/test_standard_export.json +exit \ No newline at end of file