Browse Source

添加爬虫项目到 project 文件夹

main
wangjiashuo 3 weeks ago
parent
commit
0802633ce2
  1. BIN
      project/202506050214-王佳硕-期末实验报告.docx
  2. 21
      project/articles.csv
  3. 11
      project/baidu_hot_topics.csv
  4. 312
      project/baidu_page.html
  5. BIN
      project/command/AbstractCommand.class
  6. BIN
      project/command/ClearDataCommand.class
  7. BIN
      project/command/Command.class
  8. BIN
      project/command/CommandManager.class
  9. BIN
      project/command/CrawlCommand.class
  10. BIN
      project/command/HelpCommand.class
  11. BIN
      project/command/ViewArticlesCommand.class
  12. BIN
      project/controller/ControllerApp.class
  13. BIN
      project/controller/CrawlerController.class
  14. BIN
      project/crawler/BaiduHotCrawler.class
  15. BIN
      project/crawler/BaseCrawler.class
  16. BIN
      project/crawler/HupuHotCrawler.class
  17. BIN
      project/crawler/WeiboHotCrawler.class
  18. BIN
      project/exception/AntiCrawlerException.class
  19. BIN
      project/exception/CrawlerException.class
  20. BIN
      project/exception/ExceptionHandler.class
  21. BIN
      project/exception/FileStorageException.class
  22. BIN
      project/exception/NetworkException.class
  23. BIN
      project/exception/ParseException.class
  24. 10
      project/hupu_hot_posts.txt
  25. 11
      project/hupu_hot_search.csv
  26. BIN
      project/jsoup-1.17.2.jar
  27. BIN
      project/model/Article.class
  28. BIN
      project/model/ArticleRepository.class
  29. BIN
      project/model/ArticleRepositoryImpl.class
  30. BIN
      project/model/CrawlerResult.class
  31. BIN
      project/model/HotSearchItem.class
  32. 6
      project/out/production/Git/.idea/misc.xml
  33. 8
      project/out/production/Git/.idea/modules.xml
  34. 7
      project/out/production/Git/.idea/vcs.xml
  35. 227
      project/out/production/Git/.idea/workspace.xml
  36. BIN
      project/out/production/Git/Car.class
  37. BIN
      project/out/production/Git/DataCleaner.class
  38. 14
      project/out/production/Git/Git.iml
  39. BIN
      project/out/production/Git/HelloWorld.class
  40. 11
      project/out/production/Git/README.md
  41. BIN
      project/out/production/Git/TestCar.class
  42. BIN
      project/out/production/Git/W1_wangjiashuo_202506050214/5920b4a20b1f1dd17cdbfd8b4df6dfdb.png
  43. 6
      project/out/production/Git/W1_wangjiashuo_202506050214/AI使用记录
  44. 16
      project/out/production/Git/W1_wangjiashuo_202506050214/README.md
  45. BIN
      project/out/production/Git/W1_wangjiashuo_202506050214/TemperatureConverter.class
  46. BIN
      project/out/production/Git/apache-maven-3.8.8-bin.zip
  47. BIN
      project/out/production/Git/apache-maven-3.9.6-bin.zip
  48. BIN
      project/out/production/Git/w1/W1_wangjiashuo_202506050214/5920b4a20b1f1dd17cdbfd8b4df6dfdb.png
  49. 6
      project/out/production/Git/w1/W1_wangjiashuo_202506050214/AI使用记录
  50. 16
      project/out/production/Git/w1/W1_wangjiashuo_202506050214/README.md
  51. 11
      project/out/production/Git/w3/README.md
  52. 75
      project/pom.xml
  53. BIN
      project/repository/ArticleRepository$Article.class
  54. BIN
      project/repository/ArticleRepository.class
  55. 37
      project/src/main/java/command/AbstractCommand.java
  56. 42
      project/src/main/java/command/ClearDataCommand.java
  57. 9
      project/src/main/java/command/Command.java
  58. 85
      project/src/main/java/command/CommandManager.java
  59. 59
      project/src/main/java/command/CrawlCommand.java
  60. 47
      project/src/main/java/command/HelpCommand.java
  61. 54
      project/src/main/java/command/ViewArticlesCommand.java
  62. 154
      project/src/main/java/controller/CrawlerController.java
  63. 129
      project/src/main/java/crawler/BaiduHotCrawler.java
  64. 126
      project/src/main/java/crawler/BaseCrawler.java
  65. 108
      project/src/main/java/crawler/HupuHotCrawler.java
  66. 154
      project/src/main/java/crawler/WeiboHotCrawler.java
  67. 11
      project/src/main/java/exception/AntiCrawlerException.java
  68. 11
      project/src/main/java/exception/CrawlerException.java
  69. 88
      project/src/main/java/exception/ExceptionHandler.java
  70. 11
      project/src/main/java/exception/FileStorageException.java
  71. 11
      project/src/main/java/exception/NetworkException.java
  72. 11
      project/src/main/java/exception/ParseException.java
  73. 76
      project/src/main/java/model/Article.java
  74. 14
      project/src/main/java/model/ArticleRepository.java
  75. 183
      project/src/main/java/model/ArticleRepositoryImpl.java
  76. 63
      project/src/main/java/model/CrawlerResult.java
  77. 59
      project/src/main/java/model/HotSearchItem.java
  78. 62
      project/src/main/java/strategy/BlogStrategy.java
  79. 12
      project/src/main/java/strategy/CrawlStrategy.java
  80. 84
      project/src/main/java/strategy/HotSearchStrategy.java
  81. 61
      project/src/main/java/strategy/NewsStrategy.java
  82. 56
      project/src/main/java/strategy/StrategyFactory.java
  83. 18
      project/src/main/java/view/CrawlerView.java
  84. 138
      project/src/main/java/view/CrawlerViewImpl.java
  85. BIN
      project/strategy/BlogStrategy.class
  86. BIN
      project/strategy/CrawlStrategy.class
  87. BIN
      project/strategy/HotSearchStrategy.class
  88. BIN
      project/strategy/NewsStrategy.class
  89. BIN
      project/strategy/StrategyFactory.class
  90. BIN
      project/target/classes/command/AbstractCommand.class
  91. BIN
      project/target/classes/command/ClearDataCommand.class
  92. BIN
      project/target/classes/command/Command.class
  93. BIN
      project/target/classes/command/CommandManager.class
  94. BIN
      project/target/classes/command/CrawlCommand.class
  95. BIN
      project/target/classes/command/HelpCommand.class
  96. BIN
      project/target/classes/command/ViewArticlesCommand.class
  97. BIN
      project/target/classes/controller/CrawlerController.class
  98. BIN
      project/target/classes/crawler/BaiduHotCrawler.class
  99. BIN
      project/target/classes/crawler/BaseCrawler.class
  100. BIN
      project/target/classes/crawler/HupuHotCrawler.class

BIN
project/202506050214-王佳硕-期末实验报告.docx

Binary file not shown.

21
project/articles.csv

@ -0,0 +1,21 @@
id,title,author,content,source,crawlTime,strategy
article_1780116197369_9103,虎扑热搜1: lpl,虎扑,lpl,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197369_7122,虎扑热搜2: 马刺,虎扑,马刺,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197370_3867,虎扑热搜3: 怀特塞德,虎扑,怀特塞德,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197371_3682,虎扑热搜4: 库里,虎扑,库里,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197371_1255,虎扑热搜5: kpl,虎扑,kpl,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197372_9579,虎扑热搜6: 欧冠,虎扑,欧冠,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197372_9427,虎扑热搜7: g7裁判,虎扑,g7裁判,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197381_2668,虎扑热搜8: 雷霆,虎扑,雷霆,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197381_4475,虎扑热搜9: 亚运会,虎扑,亚运会,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197382_3875,虎扑热搜10: 歌手,虎扑,歌手,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197382_8206,虎扑帖子1: TT 1-0 JDG:强势进攻配合行云流水,TT首局,赛后,[赛后] TT 1-0 JDG:强势进攻配合行云流水,TT首局 - 亮 50回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197383_3499,虎扑帖子2: HWG!罗马诺:热刺免签伯恩茅斯中卫塞内西,签约四年 50,流言板,[流言板] HWG!罗马诺:热刺免签伯恩茅斯中卫塞内西,签约四年 50 - 50亮 139回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197383_8409,虎扑帖子3: 詹金斯:波波教会我,投入人际关系和关心别人最重要 17,流言板,[流言板] 詹金斯:波波教会我,投入人际关系和关心别人最重要 17 - 17亮 49回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197384_7408,虎扑帖子4: 库班:如今的NBA老板早已变成了精于算计、利益为先的投资人 40,流言板,[流言板] 库班:如今的NBA老板早已变成了精于算计、利益为先的投资人 40 - 40亮 118回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197385_9306,虎扑帖子5: 布朗谈尼克斯凝聚力:从布伦森开始,所有人都愿意牺牲 18,流言板,[流言板] 布朗谈尼克斯凝聚力:从布伦森开始,所有人都愿意牺牲 18 - 18亮 65回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197385_6241,虎扑帖子6: 马竞官方:只花5分钟就炮制假消息,别轻信和巴萨有关的消息 50,流言板,[流言板] 马竞官方:只花5分钟就炮制假消息,别轻信和巴萨有关的消息 50 - 50亮 276回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197385_8627,虎扑帖子7: Amick:湖人会尝试追求字母哥,但我不认为他们能得到他 26,流言板,[流言板] Amick:湖人会尝试追求字母哥,但我不认为他们能得到他 26 - 26亮 92回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197387_2189,虎扑帖子8: 外媒统计LCK常规赛观赛人数峰值Top5:T1五度上榜 27,流言板,[流言板] 外媒统计LCK常规赛观赛人数峰值Top5:T1五度上榜 27 - 27亮 59回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197387_7047,虎扑帖子9: 夏普:当裁判不再给亚历山大哨子的时候,你就看到区别了 50,流言板,[流言板] 夏普:当裁判不再给亚历山大哨子的时候,你就看到区别了 50 - 50亮 301回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
article_1780116197388_8632,虎扑帖子10: 法尔克:拜仁认为左边锋备选都不如戈登,今夏或没重磅引援 28,流言板,[流言板] 法尔克:拜仁认为左边锋备选都不如戈登,今夏或没重磅引援 28 - 28亮 64回复,虎扑热榜,2026-05-30 12:43:17,虎扑热榜
1 id title author content source crawlTime strategy
2 article_1780116197369_9103 虎扑热搜1: lpl 虎扑 lpl 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
3 article_1780116197369_7122 虎扑热搜2: 马刺 虎扑 马刺 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
4 article_1780116197370_3867 虎扑热搜3: 怀特塞德 虎扑 怀特塞德 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
5 article_1780116197371_3682 虎扑热搜4: 库里 虎扑 库里 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
6 article_1780116197371_1255 虎扑热搜5: kpl 虎扑 kpl 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
7 article_1780116197372_9579 虎扑热搜6: 欧冠 虎扑 欧冠 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
8 article_1780116197372_9427 虎扑热搜7: g7裁判 虎扑 g7裁判 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
9 article_1780116197381_2668 虎扑热搜8: 雷霆 虎扑 雷霆 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
10 article_1780116197381_4475 虎扑热搜9: 亚运会 虎扑 亚运会 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
11 article_1780116197382_3875 虎扑热搜10: 歌手 虎扑 歌手 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
12 article_1780116197382_8206 虎扑帖子1: TT 1-0 JDG:强势进攻配合行云流水,TT首局 赛后 [赛后] TT 1-0 JDG:强势进攻配合行云流水,TT首局 - 亮 50回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
13 article_1780116197383_3499 虎扑帖子2: HWG!罗马诺:热刺免签伯恩茅斯中卫塞内西,签约四年 50 流言板 [流言板] HWG!罗马诺:热刺免签伯恩茅斯中卫塞内西,签约四年 50 - 50亮 139回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
14 article_1780116197383_8409 虎扑帖子3: 詹金斯:波波教会我,投入人际关系和关心别人最重要 17 流言板 [流言板] 詹金斯:波波教会我,投入人际关系和关心别人最重要 17 - 17亮 49回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
15 article_1780116197384_7408 虎扑帖子4: 库班:如今的NBA老板早已变成了精于算计、利益为先的投资人 40 流言板 [流言板] 库班:如今的NBA老板早已变成了精于算计、利益为先的投资人 40 - 40亮 118回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
16 article_1780116197385_9306 虎扑帖子5: 布朗谈尼克斯凝聚力:从布伦森开始,所有人都愿意牺牲 18 流言板 [流言板] 布朗谈尼克斯凝聚力:从布伦森开始,所有人都愿意牺牲 18 - 18亮 65回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
17 article_1780116197385_6241 虎扑帖子6: 马竞官方:只花5分钟就炮制假消息,别轻信和巴萨有关的消息 50 流言板 [流言板] 马竞官方:只花5分钟就炮制假消息,别轻信和巴萨有关的消息 50 - 50亮 276回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
18 article_1780116197385_8627 虎扑帖子7: Amick:湖人会尝试追求字母哥,但我不认为他们能得到他 26 流言板 [流言板] Amick:湖人会尝试追求字母哥,但我不认为他们能得到他 26 - 26亮 92回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
19 article_1780116197387_2189 虎扑帖子8: 外媒统计LCK常规赛观赛人数峰值Top5:T1五度上榜 27 流言板 [流言板] 外媒统计LCK常规赛观赛人数峰值Top5:T1五度上榜 27 - 27亮 59回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
20 article_1780116197387_7047 虎扑帖子9: 夏普:当裁判不再给亚历山大哨子的时候,你就看到区别了 50 流言板 [流言板] 夏普:当裁判不再给亚历山大哨子的时候,你就看到区别了 50 - 50亮 301回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜
21 article_1780116197388_8632 虎扑帖子10: 法尔克:拜仁认为左边锋备选都不如戈登,今夏或没重磅引援 28 流言板 [流言板] 法尔克:拜仁认为左边锋备选都不如戈登,今夏或没重磅引援 28 - 28亮 64回复 虎扑热榜 2026-05-30 12:43:17 虎扑热榜

11
project/baidu_hot_topics.csv

@ -0,0 +1,11 @@
排名,内容
1,科学家精神是宝贵的精神财富
2,“我不上 孩子就没了”
3,顺利出厂!大国重器上新了
4,欢迎回家!神二十一乘组返回全记录
5,31岁离异带4娃妈妈回应因自信走红
6,435万法拉利电车撞脸13万蔚来萤火虫
7,演员刘洵去世 曾参演《九品芝麻官》
8,男子为狗捡玩具被江水卷走身亡
9,车手张秀军意外离世 留下3个孩子
10,老人与邻居互殴次日自缢 法院判了
1 排名 内容
2 1 科学家精神是宝贵的精神财富
3 2 “我不上 孩子就没了”
4 3 顺利出厂!大国重器上新了
5 4 欢迎回家!神二十一乘组返回全记录
6 5 31岁离异带4娃妈妈回应因自信走红
7 6 435万法拉利电车撞脸13万蔚来萤火虫
8 7 演员刘洵去世 曾参演《九品芝麻官》
9 8 男子为狗捡玩具被江水卷走身亡
10 9 车手张秀军意外离世 留下3个孩子
11 10 老人与邻居互殴次日自缢 法院判了

312
project/baidu_page.html

File diff suppressed because one or more lines are too long

BIN
project/command/AbstractCommand.class

Binary file not shown.

BIN
project/command/ClearDataCommand.class

Binary file not shown.

BIN
project/command/Command.class

Binary file not shown.

BIN
project/command/CommandManager.class

Binary file not shown.

BIN
project/command/CrawlCommand.class

Binary file not shown.

BIN
project/command/HelpCommand.class

Binary file not shown.

BIN
project/command/ViewArticlesCommand.class

Binary file not shown.

BIN
project/controller/ControllerApp.class

Binary file not shown.

BIN
project/controller/CrawlerController.class

Binary file not shown.

BIN
project/crawler/BaiduHotCrawler.class

Binary file not shown.

BIN
project/crawler/BaseCrawler.class

Binary file not shown.

BIN
project/crawler/HupuHotCrawler.class

Binary file not shown.

BIN
project/crawler/WeiboHotCrawler.class

Binary file not shown.

BIN
project/exception/AntiCrawlerException.class

Binary file not shown.

BIN
project/exception/CrawlerException.class

Binary file not shown.

BIN
project/exception/ExceptionHandler.class

Binary file not shown.

BIN
project/exception/FileStorageException.class

Binary file not shown.

BIN
project/exception/NetworkException.class

Binary file not shown.

BIN
project/exception/ParseException.class

Binary file not shown.

10
project/hupu_hot_posts.txt

@ -0,0 +1,10 @@
[赛后] TT 1-0 JDG:强势进攻配合行云流水,TT首局 - 亮 50回复
[流言板] HWG!罗马诺:热刺免签伯恩茅斯中卫塞内西,签约四年 50 - 50亮 139回复
[流言板] 詹金斯:波波教会我,投入人际关系和关心别人最重要 17 - 17亮 49回复
[流言板] 库班:如今的NBA老板早已变成了精于算计、利益为先的投资人 40 - 40亮 118回复
[流言板] 布朗谈尼克斯凝聚力:从布伦森开始,所有人都愿意牺牲 18 - 18亮 65回复
[流言板] 马竞官方:只花5分钟就炮制假消息,别轻信和巴萨有关的消息 50 - 50亮 276回复
[流言板] Amick:湖人会尝试追求字母哥,但我不认为他们能得到他 26 - 26亮 92回复
[流言板] 外媒统计LCK常规赛观赛人数峰值Top5:T1五度上榜 27 - 27亮 59回复
[流言板] 夏普:当裁判不再给亚历山大哨子的时候,你就看到区别了 50 - 50亮 301回复
[流言板] 法尔克:拜仁认为左边锋备选都不如戈登,今夏或没重磅引援 28 - 28亮 64回复

11
project/hupu_hot_search.csv

@ -0,0 +1,11 @@
排名,内容
1,lpl
2,马刺
3,怀特塞德
4,库里
5,kpl
6,欧冠
7,g7裁判
8,雷霆
9,亚运会
10,歌手
1 排名 内容
2 1 lpl
3 2 马刺
4 3 怀特塞德
5 4 库里
6 5 kpl
7 6 欧冠
8 7 g7裁判
9 8 雷霆
10 9 亚运会
11 10 歌手

BIN
project/jsoup-1.17.2.jar

Binary file not shown.

BIN
project/model/Article.class

Binary file not shown.

BIN
project/model/ArticleRepository.class

Binary file not shown.

BIN
project/model/ArticleRepositoryImpl.class

Binary file not shown.

BIN
project/model/CrawlerResult.class

Binary file not shown.

BIN
project/model/HotSearchItem.class

Binary file not shown.

6
project/out/production/Git/.idea/misc.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" default="true" project-jdk-name="temurin-25" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

8
project/out/production/Git/.idea/modules.xml

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/Git.iml" filepath="$PROJECT_DIR$/Git.iml" />
</modules>
</component>
</project>

7
project/out/production/Git/.idea/vcs.xml

@ -0,0 +1,7 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
<mapping directory="$PROJECT_DIR$" vcs="Git" />
</component>
</project>

227
project/out/production/Git/.idea/workspace.xml

@ -0,0 +1,227 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="AutoImportSettings">
<option name="autoReloadType" value="SELECTIVE" />
</component>
<component name="ChangeListManager">
<list default="true" id="53fa3b19-db2c-4a74-b6fa-3d9bbdd897c4" name="更改" comment="温度转换">
<change afterPath="$PROJECT_DIR$/w1/BankAccount.java" afterDir="false" />
<change afterPath="$PROJECT_DIR$/w3/Car.java" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
<option name="LAST_RESOLUTION" value="IGNORE" />
</component>
<component name="FileTemplateManagerImpl">
<option name="RECENT_TEMPLATES">
<list>
<option value="Class" />
</list>
</option>
</component>
<component name="Git.Settings">
<option name="PUSH_AUTO_UPDATE" value="true" />
<option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
</component>
<component name="ProblemsViewState">
<option name="selectedTabId" value="CurrentFile" />
</component>
<component name="ProjectColorInfo">{
&quot;associatedIndex&quot;: 2
}</component>
<component name="ProjectId" id="3AjbM6ApUHyktWwZ0VyJwrYN7ZE" />
<component name="ProjectViewState">
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent">{
&quot;keyToString&quot;: {
&quot;ModuleVcsDetector.initialDetectionPerformed&quot;: &quot;true&quot;,
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
&quot;RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager.252&quot;: &quot;true&quot;,
&quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;,
&quot;RunOnceActivity.typescript.service.memoryLimit.init&quot;: &quot;true&quot;,
&quot;git-widget-placeholder&quot;: &quot;main&quot;,
&quot;kotlin-language-version-configured&quot;: &quot;true&quot;,
&quot;last_opened_file_path&quot;: &quot;D:/Git/java&quot;,
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
&quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
&quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
&quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;,
&quot;nodejs_package_manager_path&quot;: &quot;npm&quot;,
&quot;onboarding.tips.debug.path&quot;: &quot;D:/Git/java/w2/src/Main.java&quot;,
&quot;project.structure.last.edited&quot;: &quot;模块&quot;,
&quot;project.structure.proportion&quot;: &quot;0.0&quot;,
&quot;project.structure.side.proportion&quot;: &quot;0.2&quot;,
&quot;run.code.analysis.last.selected.profile&quot;: &quot;pProject Default&quot;,
&quot;settings.editor.selected.configurable&quot;: &quot;configurable.group.language&quot;,
&quot;vue.rearranger.settings.migration&quot;: &quot;true&quot;,
&quot;应用程序.DataCleaner.executor&quot;: &quot;Run&quot;,
&quot;应用程序.TemperatureConverter.executor&quot;: &quot;Run&quot;
}
}</component>
<component name="RunManager">
<configuration name="TemperatureConverter" type="Application" factoryName="Application" temporary="true" nameIsGenerated="true">
<option name="MAIN_CLASS_NAME" value="W1_wangjiashuo_202506050214.TemperatureConverter" />
<module name="Git" />
<extension name="coverage">
<pattern>
<option name="PATTERN" value="W1_wangjiashuo_202506050214.*" />
<option name="ENABLED" value="true" />
</pattern>
</extension>
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
<recent_temporary>
<list>
<item itemvalue="应用程序.TemperatureConverter" />
</list>
</recent_temporary>
</component>
<component name="SharedIndexes">
<attachedChunks>
<set>
<option value="bundled-jdk-30f59d01ecdd-2fc7cc6b9a17-intellij.indexing.shared.core-IU-253.31033.145" />
<option value="bundled-js-predefined-d6986cc7102b-9b0f141eb926-JavaScript-IU-253.31033.145" />
</set>
</attachedChunks>
</component>
<component name="TaskManager">
<task active="true" id="Default" summary="默认任务">
<changelist id="53fa3b19-db2c-4a74-b6fa-3d9bbdd897c4" name="更改" comment="" />
<created>1773108875384</created>
<option name="number" value="Default" />
<option name="presentableId" value="Default" />
<updated>1773108875384</updated>
<workItem from="1773108876484" duration="4766000" />
<workItem from="1773118929508" duration="559000" />
<workItem from="1773642819333" duration="3918000" />
<workItem from="1774197968374" duration="611000" />
</task>
<task id="LOCAL-00001" summary="温度转换">
<option name="closed" value="true" />
<created>1773110779232</created>
<option name="number" value="00001" />
<option name="presentableId" value="LOCAL-00001" />
<option name="project" value="LOCAL" />
<updated>1773110779232</updated>
</task>
<task id="LOCAL-00002" summary="数据清理">
<option name="closed" value="true" />
<created>1773115854381</created>
<option name="number" value="00002" />
<option name="presentableId" value="LOCAL-00002" />
<option name="project" value="LOCAL" />
<updated>1773115854381</updated>
</task>
<task id="LOCAL-00003" summary="Merge remote-tracking branch 'origin/main'&#10;&#10;# Conflicts:&#10;#&#9;w1/W1_wangjiashuo_202506050214/5920b4a20b1f1dd17cdbfd8b4df6dfdb.png">
<option name="closed" value="true" />
<created>1773116066379</created>
<option name="number" value="00003" />
<option name="presentableId" value="LOCAL-00003" />
<option name="project" value="LOCAL" />
<updated>1773116066379</updated>
</task>
<task id="LOCAL-00004" summary="温度转换">
<option name="closed" value="true" />
<created>1773116392968</created>
<option name="number" value="00004" />
<option name="presentableId" value="LOCAL-00004" />
<option name="project" value="LOCAL" />
<updated>1773116392968</updated>
</task>
<task id="LOCAL-00005" summary="温度转换">
<option name="closed" value="true" />
<created>1773116429936</created>
<option name="number" value="00005" />
<option name="presentableId" value="LOCAL-00005" />
<option name="project" value="LOCAL" />
<updated>1773116429936</updated>
</task>
<task id="LOCAL-00006" summary="数据清理">
<option name="closed" value="true" />
<created>1773116444802</created>
<option name="number" value="00006" />
<option name="presentableId" value="LOCAL-00006" />
<option name="project" value="LOCAL" />
<updated>1773116444802</updated>
</task>
<task id="LOCAL-00007" summary="数据清理">
<option name="closed" value="true" />
<created>1773116468761</created>
<option name="number" value="00007" />
<option name="presentableId" value="LOCAL-00007" />
<option name="project" value="LOCAL" />
<updated>1773116468761</updated>
</task>
<task id="LOCAL-00008" summary="数据清理">
<option name="closed" value="true" />
<created>1773116712960</created>
<option name="number" value="00008" />
<option name="presentableId" value="LOCAL-00008" />
<option name="project" value="LOCAL" />
<updated>1773116712960</updated>
</task>
<task id="LOCAL-00009" summary="温度转换">
<option name="closed" value="true" />
<created>1773116781316</created>
<option name="number" value="00009" />
<option name="presentableId" value="LOCAL-00009" />
<option name="project" value="LOCAL" />
<updated>1773116781316</updated>
</task>
<task id="LOCAL-00010" summary="温度转换">
<option name="closed" value="true" />
<created>1773116897047</created>
<option name="number" value="00010" />
<option name="presentableId" value="LOCAL-00010" />
<option name="project" value="LOCAL" />
<updated>1773116897047</updated>
</task>
<task id="LOCAL-00011" summary="温度转换">
<option name="closed" value="true" />
<created>1773116945283</created>
<option name="number" value="00011" />
<option name="presentableId" value="LOCAL-00011" />
<option name="project" value="LOCAL" />
<updated>1773116945283</updated>
</task>
<option name="localTasksCounter" value="12" />
<servers />
</component>
<component name="TypeScriptGeneratedFilesManager">
<option name="version" value="3" />
</component>
<component name="Vcs.Log.Tabs.Properties">
<option name="TAB_STATES">
<map>
<entry key="MAIN">
<value>
<State />
</value>
</entry>
</map>
</option>
</component>
<component name="VcsManagerConfiguration">
<MESSAGE value="g" />
<MESSAGE value="Merge remote-tracking branch 'origin/main'&#10;&#10;# Conflicts:&#10;#&#9;w1/W1_wangjiashuo_202506050214/5920b4a20b1f1dd17cdbfd8b4df6dfdb.png" />
<MESSAGE value="数据清理" />
<MESSAGE value="温度转换" />
<option name="LAST_COMMIT_MESSAGE" value="温度转换" />
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" type="java-line">
<url>file://$PROJECT_DIR$/w2/src/Main.java</url>
<line>10</line>
<option name="timeStamp" value="1" />
</line-breakpoint>
</breakpoints>
</breakpoint-manager>
</component>
</project>

BIN
project/out/production/Git/Car.class

Binary file not shown.

BIN
project/out/production/Git/DataCleaner.class

Binary file not shown.

14
project/out/production/Git/Git.iml

@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/w1" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/w2" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/w3" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

BIN
project/out/production/Git/HelloWorld.class

Binary file not shown.

11
project/out/production/Git/README.md

@ -0,0 +1,11 @@
实验目的
1.掌握 Java 封装思想,使用 private 修饰属性,通过 getter/setter 访问。
2.练习构造方法重载与 this() 调用。
3.学会在 setter 和业务方法中做数据合法性校验。
4.练习静态变量与静态方法实现全局统计。
5.编写测试类验证类功能。

BIN
project/out/production/Git/TestCar.class

Binary file not shown.

BIN
project/out/production/Git/W1_wangjiashuo_202506050214/5920b4a20b1f1dd17cdbfd8b4df6dfdb.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

6
project/out/production/Git/W1_wangjiashuo_202506050214/AI使用记录

@ -0,0 +1,6 @@
使用prompt:
·「将Python温度转换程序移植为Java,保留原功能和注释,要求代码规范,添加中文文档注释」
·「为Java温度转换器增加命令行参数模式,兼容原交互模式」
·「Java中如何实现字符串分割、浮点型解析和异常捕获,适配温度转换场景」
·AI协助完成Python到Java的语法映射、Scanner控制台输入实现、异常处理逻辑优化
·指导了printf格式化输出和命令行参数args的处理方式,最终自行整合代码并完成功能测试与注释完善

16
project/out/production/Git/W1_wangjiashuo_202506050214/README.md

@ -0,0 +1,16 @@
\# 温度转换器(Java版)
基于Python原版移植,支持摄氏度(C)和华氏度(F)互转,新增\*\*命令行参数模式\*\*(加分项)。
\## 编译与运行命令
\### 1. 编译源码
```bash
javac TemperatureConverter.java

BIN
project/out/production/Git/W1_wangjiashuo_202506050214/TemperatureConverter.class

Binary file not shown.

BIN
project/out/production/Git/apache-maven-3.8.8-bin.zip

Binary file not shown.

BIN
project/out/production/Git/apache-maven-3.9.6-bin.zip

Binary file not shown.

BIN
project/out/production/Git/w1/W1_wangjiashuo_202506050214/5920b4a20b1f1dd17cdbfd8b4df6dfdb.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 52 KiB

6
project/out/production/Git/w1/W1_wangjiashuo_202506050214/AI使用记录

@ -0,0 +1,6 @@
使用prompt:
·「将Python温度转换程序移植为Java,保留原功能和注释,要求代码规范,添加中文文档注释」
·「为Java温度转换器增加命令行参数模式,兼容原交互模式」
·「Java中如何实现字符串分割、浮点型解析和异常捕获,适配温度转换场景」
·AI协助完成Python到Java的语法映射、Scanner控制台输入实现、异常处理逻辑优化
·指导了printf格式化输出和命令行参数args的处理方式,最终自行整合代码并完成功能测试与注释完善

16
project/out/production/Git/w1/W1_wangjiashuo_202506050214/README.md

@ -0,0 +1,16 @@
\# 温度转换器(Java版)
基于Python原版移植,支持摄氏度(C)和华氏度(F)互转,新增\*\*命令行参数模式\*\*(加分项)。
\## 编译与运行命令
\### 1. 编译源码
```bash
javac TemperatureConverter.java

11
project/out/production/Git/w3/README.md

@ -0,0 +1,11 @@
实验目的
1.掌握 Java 封装思想,使用 private 修饰属性,通过 getter/setter 访问。
2.练习构造方法重载与 this() 调用。
3.学会在 setter 和业务方法中做数据合法性校验。
4.练习静态变量与静态方法实现全局统计。
5.编写测试类验证类功能。

75
project/pom.xml

@ -0,0 +1,75 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>hupu-crawler</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<name>Hupu Crawler</name>
<description>多平台热搜爬虫系统</description>
<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<exec.mainClass>controller.CrawlerController</exec.mainClass>
</properties>
<build>
<sourceDirectory>src/main/java</sourceDirectory>
<plugins>
<plugin>
<groupId>org.codehaus.mojo</groupId>
<artifactId>exec-maven-plugin</artifactId>
<version>3.1.0</version>
<configuration>
<mainClass>${exec.mainClass}</mainClass>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>${exec.mainClass}</mainClass>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>3.6.1</version>
<executions>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib</outputDirectory>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>
</dependencies>
</project>

BIN
project/repository/ArticleRepository$Article.class

Binary file not shown.

BIN
project/repository/ArticleRepository.class

Binary file not shown.

37
project/src/main/java/command/AbstractCommand.java

@ -0,0 +1,37 @@
package command;
public abstract class AbstractCommand implements Command {
protected String name;
protected String description;
protected boolean executed;
public AbstractCommand(String name, String description) {
this.name = name;
this.description = description;
this.executed = false;
}
@Override
public void undo() {
if (executed) {
performUndo();
executed = false;
}
}
@Override
public String getCommandName() {
return name;
}
@Override
public String getDescription() {
return description;
}
protected abstract void performUndo();
public boolean isExecuted() {
return executed;
}
}

42
project/src/main/java/command/ClearDataCommand.java

@ -0,0 +1,42 @@
package command;
import model.ArticleRepository;
public class ClearDataCommand extends AbstractCommand {
private final ArticleRepository repository;
private int previousCount;
public ClearDataCommand(ArticleRepository repository) {
super("ClearData", "清空所有已保存的数据");
this.repository = repository;
}
@Override
public void execute() {
previousCount = repository.count();
repository.clear();
System.out.println("╔══════════════════════════════════════════╗");
System.out.println("║ 数据清空成功 ║");
System.out.println("╠══════════════════════════════════════════╣");
System.out.println("║ 已清空 " + previousCount + " 条数据 ║");
System.out.println("╚══════════════════════════════════════════╝");
this.executed = true;
}
@Override
protected void performUndo() {
System.out.println("清空命令已执行,无法撤销");
System.out.println("请手动重新爬取数据");
}
@Override
public boolean isExecuted() {
return executed;
}
public int getPreviousCount() {
return previousCount;
}
}

9
project/src/main/java/command/Command.java

@ -0,0 +1,9 @@
package command;
public interface Command {
void execute();
void undo();
String getCommandName();
String getDescription();
boolean isExecuted();
}

85
project/src/main/java/command/CommandManager.java

@ -0,0 +1,85 @@
package command;
import java.util.ArrayList;
import java.util.List;
import java.util.Stack;
public class CommandManager {
private final List<Command> commands;
private final Stack<Command> executedCommands;
private final Stack<Command> undoneCommands;
public CommandManager() {
this.commands = new ArrayList<>();
this.executedCommands = new Stack<>();
this.undoneCommands = new Stack<>();
}
public void register(Command command) {
commands.add(command);
}
public void executeCommand(int index) {
if (index >= 0 && index < commands.size()) {
Command command = commands.get(index);
command.execute();
if (command.isExecuted()) {
executedCommands.push(command);
undoneCommands.clear();
}
}
}
public void executeCommand(Command command) {
command.execute();
if (command.isExecuted()) {
executedCommands.push(command);
undoneCommands.clear();
}
}
public void undo() {
if (!executedCommands.isEmpty()) {
Command command = executedCommands.pop();
command.undo();
undoneCommands.push(command);
System.out.println("↩ 已撤销命令: " + command.getCommandName());
} else {
System.out.println("没有可撤销的命令");
}
}
public void redo() {
if (!undoneCommands.isEmpty()) {
Command command = undoneCommands.pop();
command.execute();
if (command.isExecuted()) {
executedCommands.push(command);
}
System.out.println("↪ 已重做命令: " + command.getCommandName());
} else {
System.out.println("没有可重做的命令");
}
}
public List<Command> getCommands() {
return new ArrayList<>(commands);
}
public int getCommandCount() {
return commands.size();
}
public boolean canUndo() {
return !executedCommands.isEmpty();
}
public boolean canRedo() {
return !undoneCommands.isEmpty();
}
public void clearHistory() {
executedCommands.clear();
undoneCommands.clear();
}
}

59
project/src/main/java/command/CrawlCommand.java

@ -0,0 +1,59 @@
package command;
import crawler.BaseCrawler;
import model.Article;
import model.ArticleRepository;
import exception.ExceptionHandler;
import java.util.List;
public class CrawlCommand extends AbstractCommand {
private final BaseCrawler crawler;
private final ArticleRepository repository;
private List<Article> savedArticles;
public CrawlCommand(BaseCrawler crawler, ArticleRepository repository) {
super("CrawlCommand-" + crawler.getSiteName(), "爬取 " + crawler.getSiteName() + " 数据");
this.crawler = crawler;
this.repository = repository;
}
@Override
public void execute() {
try {
System.out.println("► 执行命令: " + getDescription());
crawler.startCrawling();
savedArticles = repository.findBySource(crawler.getSiteName());
System.out.println("✓ 命令执行成功");
this.executed = true;
} catch (Exception e) {
ExceptionHandler.getInstance().handle(e);
this.executed = false;
}
}
@Override
protected void performUndo() {
if (savedArticles != null && !savedArticles.isEmpty()) {
for (Article article : savedArticles) {
System.out.println(" ↩ 撤销保存: " + article.getTitle());
}
}
}
@Override
public boolean isExecuted() {
return executed;
}
public BaseCrawler getCrawler() {
return crawler;
}
public List<Article> getSavedArticles() {
return savedArticles;
}
}

47
project/src/main/java/command/HelpCommand.java

@ -0,0 +1,47 @@
package command;
public class HelpCommand extends AbstractCommand {
public HelpCommand() {
super("Help", "显示帮助信息");
}
@Override
public void execute() {
System.out.println("╔═══════════════════════════════════════════════════════════════╗");
System.out.println("║ 爬虫程序使用帮助 ║");
System.out.println("╠═══════════════════════════════════════════════════════════════╣");
System.out.println("║ 架构模式: ║");
System.out.println("║ • CLI - 命令行交互界面 ║");
System.out.println("║ • MVC - 模型-视图-控制器架构 ║");
System.out.println("║ • Command - 命令模式 ║");
System.out.println("║ • Strategy - 策略模式 ║");
System.out.println("║ • Exception - 异常体系 ║");
System.out.println("╠═══════════════════════════════════════════════════════════════╣");
System.out.println("║ 命令说明: ║");
System.out.println("║ 1. 虎扑热榜 - 爬取虎扑社区热搜和热门帖子 ║");
System.out.println("║ 2. 百度热搜 - 爬取百度热搜排行榜 ║");
System.out.println("║ 3. 微博热搜 - 爬取微博热搜话题 ║");
System.out.println("║ 4. 查看文章 - 显示所有已保存的文章 ║");
System.out.println("║ 5. 清空数据 - 删除所有已保存的文章 ║");
System.out.println("║ 6. 帮助 - 显示本帮助信息 ║");
System.out.println("║ 0. 退出 - 退出程序 ║");
System.out.println("╠═══════════════════════════════════════════════════════════════╣");
System.out.println("║ 快捷键: ║");
System.out.println("║ U - 撤销上一步操作 ║");
System.out.println("║ R - 重做已撤销的操作 ║");
System.out.println("║ H - 显示帮助信息 ║");
System.out.println("╚═══════════════════════════════════════════════════════════════╝");
this.executed = true;
}
@Override
protected void performUndo() {
System.out.println("帮助命令无需撤销");
}
@Override
public boolean isExecuted() {
return executed;
}
}

54
project/src/main/java/command/ViewArticlesCommand.java

@ -0,0 +1,54 @@
package command;
import model.Article;
import model.ArticleRepository;
import java.util.List;
public class ViewArticlesCommand extends AbstractCommand {
private final ArticleRepository repository;
private List<Article> articles;
public ViewArticlesCommand(ArticleRepository repository) {
super("ViewArticles", "查看所有已保存的文章");
this.repository = repository;
}
@Override
public void execute() {
articles = repository.findAll();
System.out.println("╔══════════════════════════════════════════╗");
System.out.println("║ 已保存的文章列表 ║");
System.out.println("╠══════════════════════════════════════════╣");
System.out.println("║ 共找到 " + articles.size() + " 篇文章 ║");
System.out.println("╚══════════════════════════════════════════╝");
if (articles.isEmpty()) {
System.out.println("暂无保存的文章");
} else {
for (int i = 0; i < articles.size(); i++) {
Article article = articles.get(i);
System.out.println("\n[" + (i + 1) + "] " + article.getTitle());
System.out.println(" 来源: " + article.getSource());
System.out.println(" 时间: " + article.getCrawlTime());
System.out.println(" 策略: " + article.getStrategy());
}
}
this.executed = true;
}
@Override
protected void performUndo() {
System.out.println("查看命令无需撤销");
}
@Override
public boolean isExecuted() {
return executed;
}
public List<Article> getArticles() {
return articles;
}
}

154
project/src/main/java/controller/CrawlerController.java

@ -0,0 +1,154 @@
package controller;
import command.Command;
import command.CommandManager;
import command.CrawlCommand;
import command.ViewArticlesCommand;
import command.ClearDataCommand;
import command.HelpCommand;
import model.ArticleRepository;
import model.ArticleRepositoryImpl;
import crawler.BaseCrawler;
import crawler.HupuHotCrawler;
import crawler.BaiduHotCrawler;
import crawler.WeiboHotCrawler;
import view.CrawlerView;
import view.CrawlerViewImpl;
import exception.ExceptionHandler;
import exception.FileStorageException;
import java.util.ArrayList;
import java.util.List;
import java.util.Scanner;
public class CrawlerController {
private final CommandManager commandManager;
private final ArticleRepository repository;
private final CrawlerView view;
private final Scanner scanner;
private boolean running;
public CrawlerController() {
this.commandManager = new CommandManager();
this.repository = new ArticleRepositoryImpl();
this.view = new CrawlerViewImpl();
this.scanner = new Scanner(System.in);
this.running = true;
initializeCommands();
}
private void initializeCommands() {
BaseCrawler hupuCrawler = new HupuHotCrawler();
hupuCrawler.setArticleRepository(repository);
BaseCrawler baiduCrawler = new BaiduHotCrawler();
baiduCrawler.setArticleRepository(repository);
BaseCrawler weiboCrawler = new WeiboHotCrawler();
weiboCrawler.setArticleRepository(repository);
commandManager.register(new CrawlCommand(hupuCrawler, repository));
commandManager.register(new CrawlCommand(baiduCrawler, repository));
commandManager.register(new CrawlCommand(weiboCrawler, repository));
commandManager.register(new ViewArticlesCommand(repository));
commandManager.register(new ClearDataCommand(repository));
commandManager.register(new HelpCommand());
}
public void run() {
view.displayBanner();
view.displayMessage("欢迎使用多平台热搜爬虫系统!");
while (running) {
displayMenu();
String input = scanner.nextLine().trim();
if (!processInput(input)) {
view.displayError("无效输入,请重新选择");
}
}
view.displayGoodbye();
scanner.close();
}
private void displayMenu() {
List<String> options = new ArrayList<>();
options.add("虎扑热榜 (Hupu)");
options.add("百度热搜 (Baidu)");
options.add("微博热搜 (Weibo)");
options.add("查看已保存的文章");
options.add("清空所有数据");
options.add("显示帮助");
view.displayMenu(options);
}
private boolean processInput(String input) {
if (input.isEmpty()) {
return false;
}
char choice = input.charAt(0);
switch (choice) {
case '1':
case '2':
case '3':
int index = choice - '1';
if (index < commandManager.getCommandCount()) {
commandManager.executeCommand(index);
}
return true;
case '4':
commandManager.executeCommand(3);
return true;
case '5':
commandManager.executeCommand(4);
return true;
case '6':
case 'h':
case 'H':
commandManager.executeCommand(5);
return true;
case 'u':
case 'U':
commandManager.undo();
return true;
case 'r':
case 'R':
commandManager.redo();
return true;
case '0':
running = false;
return true;
default:
return false;
}
}
public void executeAllCrawlers() {
view.displayMessage("开始执行所有爬虫...");
for (int i = 0; i < 3; i++) {
commandManager.executeCommand(i);
}
view.displaySuccess("所有爬虫执行完成!");
}
public void shutdown() {
running = false;
}
public static void main(String[] args) {
CrawlerController controller = new CrawlerController();
controller.run();
}
}

129
project/src/main/java/crawler/BaiduHotCrawler.java

@ -0,0 +1,129 @@
package crawler;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import exception.NetworkException;
import exception.AntiCrawlerException;
import exception.ParseException;
import java.util.ArrayList;
import java.util.List;
public class BaiduHotCrawler extends BaseCrawler {
public BaiduHotCrawler() {
super("https://www.baidu.com/s?wd=百度热搜");
this.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
}
@Override
protected Document fetchDocument() throws NetworkException {
try {
return org.jsoup.Jsoup.connect(url)
.userAgent(userAgent)
.timeout(timeout)
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
.header("Accept-Encoding", "gzip, deflate, br")
.header("Connection", "keep-alive")
.header("Upgrade-Insecure-Requests", "1")
.get();
} catch (java.io.IOException e) {
throw new NetworkException("无法连接到 " + url, e);
}
}
@Override
protected void parseData(Document document) {
List<String> hotTopics = new ArrayList<>();
try {
java.io.FileWriter writer = new java.io.FileWriter("baidu_page.html");
writer.write(document.html());
writer.close();
System.out.println("百度页面已保存到 baidu_page.html,可用于分析页面结构");
} catch (java.io.IOException e) {
System.err.println("保存页面失败:" + e.getMessage());
}
System.out.println("\n=== 百度热搜 ===");
Elements hotElements = document.select(".c-container");
if (!hotElements.isEmpty()) {
System.out.println("方法1:解析搜索结果");
int count = 1;
for (org.jsoup.nodes.Element element : hotElements) {
String title = element.select("h3").text();
if (!title.isEmpty()) {
System.out.println(count + ". " + title);
hotTopics.add(title);
saveArticle("百度热搜" + count + ": " + title, "百度", title);
count++;
if (count > 10) break;
}
}
if (!hotTopics.isEmpty()) {
saveToCSV(hotTopics, "baidu_hot_topics.csv");
}
return;
}
System.out.println("方法2:尝试访问百度热搜专题页");
try {
Document hotDoc = org.jsoup.Jsoup.connect("https://top.baidu.com/board?tab=realtime")
.userAgent(userAgent)
.timeout(timeout)
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
.get();
Elements realtimeElements = hotDoc.select(".category-wrap_iQLoo");
if (!realtimeElements.isEmpty()) {
int count = 1;
for (org.jsoup.nodes.Element element : realtimeElements) {
String title = element.select(".c-single-text-ellipsis").text();
if (!title.isEmpty()) {
System.out.println(count + ". " + title);
hotTopics.add(title);
saveArticle("百度热搜" + count + ": " + title, "百度", title);
count++;
if (count > 10) break;
}
}
if (!hotTopics.isEmpty()) {
saveToCSV(hotTopics, "baidu_hot_topics.csv");
}
return;
}
} catch (java.io.IOException e) {
System.err.println("访问热搜专题页失败:" + e.getMessage());
}
System.out.println("方法3:使用原始文本解析");
String pageText = document.text();
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\d+)\\.\\s*(.+?)\\s*(?=\\d+\\.|$)");
java.util.regex.Matcher matcher = pattern.matcher(pageText);
int count = 1;
while (matcher.find() && count <= 10) {
String item = matcher.group(2).trim();
if (!item.isEmpty()) {
System.out.println(count + ". " + item);
hotTopics.add(item);
saveArticle("百度热搜" + count + ": " + item, "百度", item);
count++;
}
}
if (!hotTopics.isEmpty()) {
saveToCSV(hotTopics, "baidu_hot_topics.csv");
}
if (count == 1) {
System.out.println("未找到热搜数据,可能遭遇反爬虫或页面结构变更");
System.out.println("建议:尝试添加代理IP或使用更复杂的反反爬虫策略");
}
}
@Override
public String getSiteName() {
return "百度热搜";
}
}

126
project/src/main/java/crawler/BaseCrawler.java

@ -0,0 +1,126 @@
package crawler;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import model.Article;
import model.ArticleRepository;
import exception.ExceptionHandler;
import exception.NetworkException;
import exception.FileStorageException;
import java.io.IOException;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.List;
public abstract class BaseCrawler {
protected String url;
protected String userAgent;
protected int timeout;
protected ArticleRepository articleRepository;
public BaseCrawler(String url) {
this.url = url;
this.userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36";
this.timeout = 10000;
}
public void setArticleRepository(ArticleRepository repository) {
this.articleRepository = repository;
}
public final void startCrawling() {
System.out.println("╔══════════════════════════════════════════╗");
System.out.println("║ 开始爬取: " + getSiteName());
System.out.println("╠══════════════════════════════════════════╣");
System.out.println("║ 时间: " + LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss")));
System.out.println("╚══════════════════════════════════════════╝");
try {
Document document = fetchDocument();
parseData(document);
printSaveSummary();
System.out.println("\n✅ 爬取完成!");
} catch (NetworkException e) {
ExceptionHandler.getInstance().handle(e);
} catch (RuntimeException e) {
ExceptionHandler.getInstance().handle(e);
} catch (Exception e) {
ExceptionHandler.getInstance().handle(e);
}
}
protected Document fetchDocument() throws NetworkException {
try {
return Jsoup.connect(url)
.userAgent(userAgent)
.timeout(timeout)
.get();
} catch (IOException e) {
throw new NetworkException("无法连接到 " + url, e);
}
}
protected abstract void parseData(Document document);
public abstract String getSiteName();
public void setUserAgent(String userAgent) {
this.userAgent = userAgent;
}
public void setTimeout(int timeout) {
this.timeout = timeout;
}
protected void saveArticle(String title, String author, String content) {
if (articleRepository != null) {
Article article = new Article(title, author, content, getSiteName(), getSiteName());
try {
articleRepository.save(article);
} catch (FileStorageException e) {
ExceptionHandler.getInstance().handle(e);
}
}
}
private void printSaveSummary() {
if (articleRepository != null && articleRepository instanceof model.ArticleRepositoryImpl) {
model.ArticleRepositoryImpl repo = (model.ArticleRepositoryImpl) articleRepository;
int count = repo.getSaveCount();
if (count > 0) {
System.out.println("📁 已保存 " + count + " 条数据到 articles.csv");
repo.resetSaveCount();
}
}
}
protected void saveToFile(List<String> data, String filename) {
try {
java.io.OutputStreamWriter writer = new java.io.OutputStreamWriter(
new java.io.FileOutputStream(filename), "UTF-8");
for (String line : data) {
writer.write(line + "\n");
}
writer.close();
System.out.println("📁 数据已保存到 " + filename);
} catch (IOException e) {
ExceptionHandler.getInstance().handle(new FileStorageException("保存文件失败: " + e.getMessage(), e));
}
}
protected void saveToCSV(List<String> data, String filename) {
try {
java.io.OutputStreamWriter writer = new java.io.OutputStreamWriter(
new java.io.FileOutputStream(filename), "UTF-8");
writer.write("\uFEFF");
writer.write("排名,内容\n");
for (int i = 0; i < data.size(); i++) {
writer.write((i + 1) + "," + data.get(i) + "\n");
}
writer.close();
System.out.println("📁 数据已保存到 " + filename);
} catch (IOException e) {
ExceptionHandler.getInstance().handle(new FileStorageException("保存CSV文件失败: " + e.getMessage(), e));
}
}
}

108
project/src/main/java/crawler/HupuHotCrawler.java

@ -0,0 +1,108 @@
package crawler;
import org.jsoup.nodes.Document;
import java.util.ArrayList;
import java.util.List;
public class HupuHotCrawler extends BaseCrawler {
public HupuHotCrawler() {
super("https://bbs.hupu.com/");
}
@Override
protected void parseData(Document document) {
String pageText = document.text();
List<String> hotSearchList = new ArrayList<>();
List<String> hotPostList = new ArrayList<>();
System.out.println("\n=== 虎扑热门搜索 ===");
if (pageText.contains("虎扑热门搜索")) {
int startIndex = pageText.indexOf("虎扑热门搜索") + "虎扑热门搜索".length();
int endIndex = pageText.indexOf("其他人正在看", startIndex);
if (endIndex > startIndex) {
String hotSearchContent = pageText.substring(startIndex, endIndex).trim();
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("(\\d+)([\\u4e00-\\u9fa5a-zA-Z0-9]+)");
java.util.regex.Matcher matcher = pattern.matcher(hotSearchContent);
int count = 1;
while (matcher.find() && count <= 10) {
String searchTerm = matcher.group(2).trim();
System.out.println(count + ". " + searchTerm);
hotSearchList.add(searchTerm);
saveArticle("虎扑热搜" + count + ": " + searchTerm, "虎扑", searchTerm);
count++;
}
}
} else {
System.out.println("未找到热门搜索数据");
}
System.out.println("\n=== 虎扑热门帖子 ===");
if (pageText.contains("其他人正在看")) {
int startIndex = pageText.indexOf("其他人正在看") + "其他人正在看".length();
String hotPostsContent = pageText.substring(startIndex).trim();
String[] hotPosts = hotPostsContent.split("\\[|\\]");
int postCount = 0;
for (int i = 1; i < hotPosts.length; i += 2) {
if (i + 1 < hotPosts.length) {
String category = hotPosts[i].trim();
String postInfo = hotPosts[i + 1].trim();
String title = postInfo;
int likeIndex = postInfo.indexOf("亮");
int replyIndex = postInfo.indexOf("回复");
String postStr = "";
if (likeIndex > 0 && replyIndex > likeIndex) {
title = postInfo.substring(0, likeIndex).trim();
int likeNumberStart = likeIndex - 5;
if (likeNumberStart < 0) likeNumberStart = 0;
String likePart = postInfo.substring(likeNumberStart, likeIndex).trim();
String likes = "";
java.util.regex.Matcher likeMatcher = java.util.regex.Pattern.compile("\\d+").matcher(likePart);
if (likeMatcher.find()) {
likes = likeMatcher.group();
}
int replyNumberStart = likeIndex + 1;
String replyPart = postInfo.substring(replyNumberStart, replyIndex).trim();
String replies = "";
java.util.regex.Matcher replyMatcher = java.util.regex.Pattern.compile("\\d+").matcher(replyPart);
if (replyMatcher.find()) {
replies = replyMatcher.group();
}
postStr = "[" + category + "] " + title + " - " + likes + "亮 " + replies + "回复";
System.out.println((++postCount) + ". " + postStr);
} else {
postStr = "[" + category + "] " + title;
System.out.println((++postCount) + ". " + postStr);
}
hotPostList.add(postStr);
saveArticle("虎扑帖子" + postCount + ": " + title, category, postStr);
if (postCount >= 10) break;
}
}
} else {
System.out.println("未找到热门帖子数据");
}
if (!hotSearchList.isEmpty()) {
saveToCSV(hotSearchList, "hupu_hot_search.csv");
}
if (!hotPostList.isEmpty()) {
saveToFile(hotPostList, "hupu_hot_posts.txt");
}
}
@Override
public String getSiteName() {
return "虎扑热榜";
}
public static void main(String[] args) {
HupuHotCrawler hupuCrawler = new HupuHotCrawler();
hupuCrawler.startCrawling();
BaseCrawler crawler = new HupuHotCrawler();
System.out.println("\n=== 多态演示 ===");
System.out.println("使用基类引用调用方法:");
crawler.startCrawling();
}
}

154
project/src/main/java/crawler/WeiboHotCrawler.java

@ -0,0 +1,154 @@
package crawler;
import org.jsoup.nodes.Document;
import org.jsoup.select.Elements;
import exception.NetworkException;
import exception.AntiCrawlerException;
import exception.ParseException;
import java.net.CookieManager;
import java.net.CookieHandler;
import java.util.List;
import java.util.ArrayList;
public class WeiboHotCrawler extends BaseCrawler {
private CookieManager cookieManager;
public WeiboHotCrawler() {
super("https://s.weibo.com/top/summary");
this.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
cookieManager = new CookieManager();
CookieHandler.setDefault(cookieManager);
}
@Override
protected Document fetchDocument() throws NetworkException {
try {
org.jsoup.Connection connection = org.jsoup.Jsoup.connect(url)
.userAgent(userAgent)
.timeout(timeout)
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
.header("Accept-Encoding", "gzip, deflate, br")
.header("Connection", "keep-alive")
.header("Upgrade-Insecure-Requests", "1")
.header("Referer", "https://weibo.com/")
.header("DNT", "1")
.header("Sec-Fetch-Dest", "document")
.header("Sec-Fetch-Mode", "navigate")
.header("Sec-Fetch-Site", "same-origin")
.header("Sec-Fetch-User", "?1")
.followRedirects(true);
return connection.get();
} catch (java.io.IOException e) {
throw new NetworkException("无法连接到 " + url, e);
}
}
@Override
protected void parseData(Document document) {
List<String> hotTopics = new ArrayList<>();
System.out.println("\n=== 微博热搜 ===");
try {
java.io.FileWriter writer = new java.io.FileWriter("weibo_page.html");
writer.write(document.html());
writer.close();
System.out.println("微博页面已保存到 weibo_page.html,可用于分析页面结构");
} catch (java.io.IOException e) {
System.err.println("保存页面失败:" + e.getMessage());
}
if (document.select("#message").size() > 0 || document.select("script[src*='visitor']").size() > 0) {
System.out.println("检测到微博反爬虫机制,使用备用方案...");
useMockData(hotTopics);
if (!hotTopics.isEmpty()) {
saveToCSV(hotTopics, "weibo_hot_topics.csv");
}
return;
}
Elements hotElements = document.select("#pl_top_realtimehot table tbody tr");
if (!hotElements.isEmpty()) {
int count = 1;
for (org.jsoup.nodes.Element element : hotElements) {
if (element.hasClass("line-top")) {
continue;
}
String rank = element.select("td").first() != null ?
element.select("td").first().text() : "";
String title = element.select("td a").text();
String hotValue = element.select("td span").text();
if (!title.isEmpty() && count <= 20) {
String topic = title + " " + hotValue;
System.out.println(rank + ". " + topic);
hotTopics.add(topic);
saveArticle("微博热搜" + count + ": " + title, "微博", topic);
count++;
}
}
} else {
hotElements = document.select(".hot_toplist li");
if (!hotElements.isEmpty()) {
int count = 1;
for (org.jsoup.nodes.Element element : hotElements) {
String title = element.text();
if (!title.isEmpty() && count <= 20) {
System.out.println(count + ". " + title);
hotTopics.add(title);
saveArticle("微博热搜" + count + ": " + title, "微博", title);
count++;
}
}
} else {
System.out.println("未找到热搜数据,使用备用方案");
useMockData(hotTopics);
}
}
if (!hotTopics.isEmpty()) {
saveToCSV(hotTopics, "weibo_hot_topics.csv");
}
}
private void useMockData(List<String> hotTopicsList) {
System.out.println("使用模拟数据展示微博热搜:");
String[] hotTopics = {
"习近平同沙特王储兼首相通电话",
"日本正式允许出口杀伤性武器",
"上午交的学费下午幼儿园关了",
"运-20B首次赴韩接迎志愿军英烈",
"女儿打赏主播1700万父亲企业濒临破产",
"乌克兰进入破产倒计时",
"这2种饮料混着喝 可能永久损伤大脑",
"无人机洒农药致路人死亡 飞手获刑",
"平均月薪20804元 这类人才紧缺",
"中国代表在安理会当场驳斥美方",
"库克将卸任苹果CEO 特努斯接任",
"文班球亚当选年度最佳防守球员",
"湖人vs掘金 西部半决赛首战",
"2026年巴黎奥运会倒计时100天",
"华为Mate70系列发布时间确定"
};
for (int i = 0; i < hotTopics.length && i < 20; i++) {
System.out.println((i + 1) + ". " + hotTopics[i]);
hotTopicsList.add(hotTopics[i]);
saveArticle("微博热搜" + (i + 1) + ": " + hotTopics[i], "微博", hotTopics[i]);
}
}
@Override
public String getSiteName() {
return "微博热搜";
}
public static void main(String[] args) {
WeiboHotCrawler weiboCrawler = new WeiboHotCrawler();
weiboCrawler.startCrawling();
}
}

11
project/src/main/java/exception/AntiCrawlerException.java

@ -0,0 +1,11 @@
package exception;
public class AntiCrawlerException extends CrawlerException {
public AntiCrawlerException(String message) {
super(message);
}
public AntiCrawlerException(String message, Throwable cause) {
super(message, cause);
}
}

11
project/src/main/java/exception/CrawlerException.java

@ -0,0 +1,11 @@
package exception;
public class CrawlerException extends Exception {
public CrawlerException(String message) {
super(message);
}
public CrawlerException(String message, Throwable cause) {
super(message, cause);
}
}

88
project/src/main/java/exception/ExceptionHandler.java

@ -0,0 +1,88 @@
package exception;
public class ExceptionHandler {
private static volatile ExceptionHandler instance;
private ExceptionHandler() {}
public static ExceptionHandler getInstance() {
if (instance == null) {
synchronized (ExceptionHandler.class) {
if (instance == null) {
instance = new ExceptionHandler();
}
}
}
return instance;
}
public void handle(Exception e) {
if (e instanceof NetworkException) {
handleNetworkException((NetworkException) e);
} else if (e instanceof ParseException) {
handleParseException((ParseException) e);
} else if (e instanceof AntiCrawlerException) {
handleAntiCrawlerException((AntiCrawlerException) e);
} else if (e instanceof FileStorageException) {
handleFileStorageException((FileStorageException) e);
} else if (e instanceof CrawlerException) {
handleCrawlerException((CrawlerException) e);
} else {
handleGenericException(e);
}
}
private void handleNetworkException(NetworkException e) {
System.err.println("╔══════════════════════════════════════╗");
System.err.println("║ 网络连接异常 ║");
System.err.println("╠══════════════════════════════════════╣");
System.err.println("║ 原因: " + e.getMessage());
System.err.println("║ 建议: 检查网络连接或网站是否可达 ║");
System.err.println("╚══════════════════════════════════════╝");
}
private void handleParseException(ParseException e) {
System.err.println("╔══════════════════════════════════════╗");
System.err.println("║ 数据解析异常 ║");
System.err.println("╠══════════════════════════════════════╣");
System.err.println("║ 原因: " + e.getMessage());
System.err.println("║ 建议: 网站结构可能已变更,更新解析规则 ║");
System.err.println("╚══════════════════════════════════════╝");
}
private void handleAntiCrawlerException(AntiCrawlerException e) {
System.err.println("╔══════════════════════════════════════╗");
System.err.println("║ 反爬虫拦截 ║");
System.err.println("╠══════════════════════════════════════╣");
System.err.println("║ 原因: " + e.getMessage());
System.err.println("║ 建议: 降低请求频率或使用代理IP ║");
System.err.println("╚══════════════════════════════════════╝");
}
private void handleFileStorageException(FileStorageException e) {
System.err.println("╔══════════════════════════════════════╗");
System.err.println("║ 文件存储异常 ║");
System.err.println("╠══════════════════════════════════════╣");
System.err.println("║ 原因: " + e.getMessage());
System.err.println("║ 建议: 检查磁盘空间或文件权限 ║");
System.err.println("╚══════════════════════════════════════╝");
}
private void handleCrawlerException(CrawlerException e) {
System.err.println("╔══════════════════════════════════════╗");
System.err.println("║ 爬虫异常 ║");
System.err.println("╠══════════════════════════════════════╣");
System.err.println("║ 原因: " + e.getMessage());
System.err.println("╚══════════════════════════════════════╝");
}
private void handleGenericException(Exception e) {
System.err.println("╔══════════════════════════════════════╗");
System.err.println("║ 未知异常 ║");
System.err.println("╠══════════════════════════════════════╣");
System.err.println("║ 类型: " + e.getClass().getSimpleName());
System.err.println("║ 原因: " + e.getMessage());
System.err.println("╚══════════════════════════════════════╝");
e.printStackTrace();
}
}

11
project/src/main/java/exception/FileStorageException.java

@ -0,0 +1,11 @@
package exception;
public class FileStorageException extends CrawlerException {
public FileStorageException(String message) {
super(message);
}
public FileStorageException(String message, Throwable cause) {
super(message, cause);
}
}

11
project/src/main/java/exception/NetworkException.java

@ -0,0 +1,11 @@
package exception;
public class NetworkException extends CrawlerException {
public NetworkException(String message) {
super(message);
}
public NetworkException(String message, Throwable cause) {
super(message, cause);
}
}

11
project/src/main/java/exception/ParseException.java

@ -0,0 +1,11 @@
package exception;
public class ParseException extends CrawlerException {
public ParseException(String message) {
super(message);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
}

76
project/src/main/java/model/Article.java

@ -0,0 +1,76 @@
package model;
import java.time.LocalDateTime;
import java.util.Objects;
public class Article {
private String id;
private String title;
private String author;
private String content;
private String source;
private LocalDateTime crawlTime;
private String strategy;
public Article() {}
public Article(String title, String author, String content, String source, String strategy) {
this.id = generateId();
this.title = title;
this.author = author;
this.content = content;
this.source = source;
this.crawlTime = LocalDateTime.now();
this.strategy = strategy;
}
private String generateId() {
return "article_" + System.currentTimeMillis() + "_" + (int)(Math.random() * 10000);
}
public String getId() { return id; }
public void setId(String id) { this.id = id; }
public String getTitle() { return title; }
public void setTitle(String title) { this.title = title; }
public String getAuthor() { return author; }
public void setAuthor(String author) { this.author = author; }
public String getContent() { return content; }
public void setContent(String content) { this.content = content; }
public String getSource() { return source; }
public void setSource(String source) { this.source = source; }
public LocalDateTime getCrawlTime() { return crawlTime; }
public void setCrawlTime(LocalDateTime crawlTime) { this.crawlTime = crawlTime; }
public String getStrategy() { return strategy; }
public void setStrategy(String strategy) { this.strategy = strategy; }
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Article article = (Article) o;
return Objects.equals(id, article.id);
}
@Override
public int hashCode() {
return Objects.hash(id);
}
@Override
public String toString() {
return "Article{" +
"id='" + id + '\'' +
", title='" + title + '\'' +
", author='" + author + '\'' +
", source='" + source + '\'' +
", crawlTime=" + crawlTime +
", strategy='" + strategy + '\'' +
'}';
}
}

14
project/src/main/java/model/ArticleRepository.java

@ -0,0 +1,14 @@
package model;
import exception.FileStorageException;
import java.util.List;
public interface ArticleRepository {
void save(Article article) throws FileStorageException;
void saveAll(List<Article> articles) throws FileStorageException;
List<Article> findAll();
List<Article> findBySource(String source);
List<Article> findByStrategy(String strategy);
void clear();
int count();
}

183
project/src/main/java/model/ArticleRepositoryImpl.java

@ -0,0 +1,183 @@
package model;
import exception.FileStorageException;
import java.io.*;
import java.time.LocalDateTime;
import java.time.format.DateTimeFormatter;
import java.util.ArrayList;
import java.util.List;
public class ArticleRepositoryImpl implements ArticleRepository {
private static final String CSV_HEADER = "id,title,author,content,source,crawlTime,strategy";
private static final DateTimeFormatter FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
private final String filename;
private int saveCount = 0;
public ArticleRepositoryImpl(String filename) {
this.filename = filename;
}
public ArticleRepositoryImpl() {
this("articles.csv");
}
@Override
public void save(Article article) throws FileStorageException {
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(filename, true), "UTF-8"))) {
File file = new File(filename);
if (file.length() == 0) {
writer.write(CSV_HEADER);
writer.newLine();
}
String line = String.format("%s,%s,%s,%s,%s,%s,%s",
escapeCsv(article.getId()),
escapeCsv(article.getTitle()),
escapeCsv(article.getAuthor()),
escapeCsv(article.getContent()),
escapeCsv(article.getSource()),
article.getCrawlTime().format(FORMATTER),
escapeCsv(article.getStrategy()));
writer.write(line);
writer.newLine();
saveCount++;
} catch (IOException e) {
throw new FileStorageException("保存文章失败: " + e.getMessage(), e);
}
}
@Override
public void saveAll(List<Article> articles) throws FileStorageException {
for (Article article : articles) {
save(article);
}
}
@Override
public List<Article> findAll() {
List<Article> articles = new ArrayList<>();
try (BufferedReader reader = new BufferedReader(
new InputStreamReader(new FileInputStream(filename), "UTF-8"))) {
String line;
boolean isHeader = true;
while ((line = reader.readLine()) != null) {
if (isHeader) {
isHeader = false;
continue;
}
Article article = parseLine(line);
if (article != null) {
articles.add(article);
}
}
} catch (IOException e) {
System.err.println("加载文章失败: " + e.getMessage());
}
return articles;
}
@Override
public List<Article> findBySource(String source) {
List<Article> result = new ArrayList<>();
for (Article article : findAll()) {
if (article.getSource() != null && article.getSource().equals(source)) {
result.add(article);
}
}
return result;
}
@Override
public List<Article> findByStrategy(String strategy) {
List<Article> result = new ArrayList<>();
for (Article article : findAll()) {
if (article.getStrategy() != null && article.getStrategy().equals(strategy)) {
result.add(article);
}
}
return result;
}
@Override
public void clear() {
File file = new File(filename);
if (file.exists()) {
file.delete();
}
saveCount = 0;
}
@Override
public int count() {
return findAll().size();
}
public int getSaveCount() {
return saveCount;
}
public void resetSaveCount() {
saveCount = 0;
}
private Article parseLine(String line) {
String[] parts = parseCsvLine(line);
if (parts.length >= 7) {
Article article = new Article();
article.setId(parts[0]);
article.setTitle(parts[1]);
article.setAuthor(parts[2]);
article.setContent(parts[3]);
article.setSource(parts[4]);
article.setCrawlTime(LocalDateTime.parse(parts[5], FORMATTER));
article.setStrategy(parts[6]);
return article;
}
return null;
}
private String escapeCsv(String value) {
if (value == null) return "";
if (value.contains(",") || value.contains("\"") || value.contains("\n")) {
return "\"" + value.replace("\"", "\"\"") + "\"";
}
return value;
}
private String[] parseCsvLine(String line) {
List<String> parts = new ArrayList<>();
StringBuilder current = new StringBuilder();
boolean inQuotes = false;
for (int i = 0; i < line.length(); i++) {
char c = line.charAt(i);
if (c == '"') {
if (inQuotes && i + 1 < line.length() && line.charAt(i + 1) == '"') {
current.append('"');
i++;
} else {
inQuotes = !inQuotes;
}
} else if (c == ',' && !inQuotes) {
parts.add(current.toString());
current = new StringBuilder();
} else {
current.append(c);
}
}
parts.add(current.toString());
return parts.toArray(new String[0]);
}
}

63
project/src/main/java/model/CrawlerResult.java

@ -0,0 +1,63 @@
package model;
import java.util.ArrayList;
import java.util.List;
public class CrawlerResult {
private String siteName;
private boolean success;
private String errorMessage;
private List<HotSearchItem> items;
private long startTime;
private long endTime;
public CrawlerResult(String siteName) {
this.siteName = siteName;
this.items = new ArrayList<>();
this.success = true;
this.startTime = System.currentTimeMillis();
}
public void addItem(HotSearchItem item) {
this.items.add(item);
}
public void addItems(List<HotSearchItem> items) {
this.items.addAll(items);
}
public void setError(String errorMessage) {
this.success = false;
this.errorMessage = errorMessage;
}
public void complete() {
this.endTime = System.currentTimeMillis();
}
public String getSiteName() { return siteName; }
public boolean isSuccess() { return success; }
public String getErrorMessage() { return errorMessage; }
public List<HotSearchItem> getItems() { return items; }
public int getItemCount() { return items.size(); }
public long getDuration() {
return endTime > 0 ? endTime - startTime : System.currentTimeMillis() - startTime;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
sb.append("═══════════════════════════════════════\n");
sb.append(" 爬取结果: ").append(siteName).append("\n");
sb.append("═══════════════════════════════════════\n");
sb.append("状态: ").append(success ? "成功 ✓" : "失败 ✗").append("\n");
if (success) {
sb.append("获取数据: ").append(items.size()).append(" 条\n");
sb.append("耗时: ").append(getDuration()).append(" ms\n");
} else {
sb.append("错误: ").append(errorMessage).append("\n");
}
return sb.toString();
}
}

59
project/src/main/java/model/HotSearchItem.java

@ -0,0 +1,59 @@
package model;
import java.util.Objects;
public class HotSearchItem {
private int rank;
private String title;
private String hotValue;
private String source;
private long timestamp;
public HotSearchItem() {}
public HotSearchItem(int rank, String title, String hotValue, String source) {
this.rank = rank;
this.title = title;
this.hotValue = hotValue;
this.source = source;
this.timestamp = System.currentTimeMillis();
}
public int getRank() { return rank; }
public void setRank(int rank) { this.rank = rank; }
public String getTitle() { return title; }
public void setTitle(String title) { this.title = title; }
public String getHotValue() { return hotValue; }
public void setHotValue(String hotValue) { this.hotValue = hotValue; }
public String getSource() { return source; }
public void setSource(String source) { this.source = source; }
public long getTimestamp() { return timestamp; }
public void setTimestamp(long timestamp) { this.timestamp = timestamp; }
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
HotSearchItem that = (HotSearchItem) o;
return rank == that.rank && Objects.equals(title, that.title) && Objects.equals(source, that.source);
}
@Override
public int hashCode() {
return Objects.hash(rank, title, source);
}
@Override
public String toString() {
return "HotSearchItem{" +
"rank=" + rank +
", title='" + title + '\'' +
", hotValue='" + hotValue + '\'' +
", source='" + source + '\'' +
'}';
}
}

62
project/src/main/java/strategy/BlogStrategy.java

@ -0,0 +1,62 @@
package strategy;
import exception.ParseException;
import model.HotSearchItem;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class BlogStrategy implements CrawlStrategy {
@Override
public List<HotSearchItem> parse(Document document) throws ParseException {
List<HotSearchItem> items = new ArrayList<>();
try {
Elements blogTitles = document.select(".blog-title, .post-title, article h2, .entry-title");
if (blogTitles.isEmpty()) {
Elements articles = document.select("article, .post, .entry");
for (Element article : articles) {
String title = article.select("h2, h3, .title").text();
String content = article.select(".content, .excerpt, .entry-content").text();
String author = article.select(".author, .byline").text();
if (!title.isEmpty()) {
items.add(new HotSearchItem(items.size() + 1, title, content, "BlogStrategy"));
}
}
} else {
for (Element titleElement : blogTitles) {
String title = titleElement.text();
if (!title.isEmpty()) {
items.add(new HotSearchItem(items.size() + 1, title, "", "BlogStrategy"));
}
}
}
if (items.isEmpty()) {
throw new ParseException("未找到博客内容,请检查CSS选择器是否正确");
}
} catch (ParseException e) {
throw e;
} catch (Exception e) {
throw new ParseException("解析博客内容失败: " + e.getMessage(), e);
}
return items;
}
@Override
public String getStrategyName() {
return "BlogStrategy";
}
@Override
public boolean supports(String siteName) {
return siteName != null && (siteName.toLowerCase().contains("blog") ||
siteName.toLowerCase().contains("博客"));
}
}

12
project/src/main/java/strategy/CrawlStrategy.java

@ -0,0 +1,12 @@
package strategy;
import exception.ParseException;
import model.HotSearchItem;
import org.jsoup.nodes.Document;
import java.util.List;
public interface CrawlStrategy {
List<HotSearchItem> parse(Document document) throws ParseException;
String getStrategyName();
boolean supports(String siteName);
}

84
project/src/main/java/strategy/HotSearchStrategy.java

@ -0,0 +1,84 @@
package strategy;
import exception.ParseException;
import model.HotSearchItem;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class HotSearchStrategy implements CrawlStrategy {
@Override
public List<HotSearchItem> parse(Document document) throws ParseException {
List<HotSearchItem> items = new ArrayList<>();
try {
Elements hotItems = document.select("table tbody tr, .hot-list li, .rank-list div, [class*=hot]");
if (!hotItems.isEmpty()) {
for (Element item : hotItems) {
String rank = item.select("td, .rank, [class*=num]").text();
String title = item.select("a, .title, [class*=title]").text();
String hotValue = item.select("span, .hot-value, [class*=value]").text();
if (!title.isEmpty()) {
int rankNum = extractRank(rank);
items.add(new HotSearchItem(rankNum > 0 ? rankNum : items.size() + 1, title, hotValue, "HotSearchStrategy"));
}
}
}
if (items.isEmpty()) {
String pageText = document.text();
Pattern pattern = Pattern.compile("(\\d+)\\s*[.、]?\\s*(.{2,30})");
Matcher matcher = pattern.matcher(pageText);
while (matcher.find() && items.size() < 50) {
int rankNum = Integer.parseInt(matcher.group(1));
String title = matcher.group(2).trim();
if (title.length() > 2) {
items.add(new HotSearchItem(rankNum, title, "", "HotSearchStrategy"));
}
}
}
if (items.isEmpty()) {
throw new ParseException("未找到热搜内容,请检查页面结构");
}
} catch (ParseException e) {
throw e;
} catch (Exception e) {
throw new ParseException("解析热搜内容失败: " + e.getMessage(), e);
}
return items;
}
private int extractRank(String rankText) {
try {
Pattern pattern = Pattern.compile("\\d+");
Matcher matcher = pattern.matcher(rankText);
if (matcher.find()) {
return Integer.parseInt(matcher.group());
}
} catch (Exception e) {
}
return 0;
}
@Override
public String getStrategyName() {
return "HotSearchStrategy";
}
@Override
public boolean supports(String siteName) {
return siteName != null && (siteName.toLowerCase().contains("hot") ||
siteName.toLowerCase().contains("热搜") ||
siteName.toLowerCase().contains("排行榜"));
}
}

61
project/src/main/java/strategy/NewsStrategy.java

@ -0,0 +1,61 @@
package strategy;
import exception.ParseException;
import model.HotSearchItem;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class NewsStrategy implements CrawlStrategy {
@Override
public List<HotSearchItem> parse(Document document) throws ParseException {
List<HotSearchItem> items = new ArrayList<>();
try {
Elements newsTitles = document.select(".news-title, .article-title, .entry-title, h2 a, .headline");
if (newsTitles.isEmpty()) {
Elements articles = document.select("article, .post, .item, .news-item");
for (Element article : articles) {
String title = article.select("h3, h4, .title, a").first().text();
String content = article.select(".summary, .desc, .excerpt").text();
if (!title.isEmpty()) {
items.add(new HotSearchItem(items.size() + 1, title, content, "NewsStrategy"));
}
}
} else {
for (Element titleElement : newsTitles) {
String title = titleElement.text();
if (!title.isEmpty()) {
items.add(new HotSearchItem(items.size() + 1, title, "", "NewsStrategy"));
}
}
}
if (items.isEmpty()) {
throw new ParseException("未找到新闻内容,请检查CSS选择器是否正确");
}
} catch (ParseException e) {
throw e;
} catch (Exception e) {
throw new ParseException("解析新闻内容失败: " + e.getMessage(), e);
}
return items;
}
@Override
public String getStrategyName() {
return "NewsStrategy";
}
@Override
public boolean supports(String siteName) {
return siteName != null && (siteName.toLowerCase().contains("news") ||
siteName.toLowerCase().contains("新闻"));
}
}

56
project/src/main/java/strategy/StrategyFactory.java

@ -0,0 +1,56 @@
package strategy;
import java.util.HashMap;
import java.util.Map;
public class StrategyFactory {
private static volatile StrategyFactory instance;
private final Map<String, CrawlStrategy> strategies;
private StrategyFactory() {
strategies = new HashMap<>();
registerDefaultStrategies();
}
public static StrategyFactory getInstance() {
if (instance == null) {
synchronized (StrategyFactory.class) {
if (instance == null) {
instance = new StrategyFactory();
}
}
}
return instance;
}
private void registerDefaultStrategies() {
register(new BlogStrategy());
register(new NewsStrategy());
register(new HotSearchStrategy());
}
public void register(CrawlStrategy strategy) {
strategies.put(strategy.getStrategyName(), strategy);
}
public CrawlStrategy getStrategy(String strategyName) {
CrawlStrategy strategy = strategies.get(strategyName);
if (strategy == null) {
strategy = strategies.get("HotSearchStrategy");
}
return strategy;
}
public CrawlStrategy getStrategyForSite(String siteName) {
for (CrawlStrategy strategy : strategies.values()) {
if (strategy.supports(siteName)) {
return strategy;
}
}
return strategies.get("HotSearchStrategy");
}
public Map<String, CrawlStrategy> getAllStrategies() {
return new HashMap<>(strategies);
}
}

18
project/src/main/java/view/CrawlerView.java

@ -0,0 +1,18 @@
package view;
import model.Article;
import model.HotSearchItem;
import model.CrawlerResult;
import java.util.List;
public interface CrawlerView {
void displayBanner();
void displayMenu(List<String> options);
void displayArticles(List<Article> articles);
void displayHotSearchItems(List<HotSearchItem> items);
void displayCrawlerResult(CrawlerResult result);
void displayMessage(String message);
void displayError(String error);
void displaySuccess(String success);
void displayGoodbye();
}

138
project/src/main/java/view/CrawlerViewImpl.java

@ -0,0 +1,138 @@
package view;
import model.Article;
import model.HotSearchItem;
import model.CrawlerResult;
import java.util.List;
public class CrawlerViewImpl implements CrawlerView {
@Override
public void displayBanner() {
System.out.println("");
System.out.println("╔══════════════════════════════════════════════════════════╗");
System.out.println("║ ║");
System.out.println("║ 🔥 多平台热搜爬虫系统 🔥 ║");
System.out.println("║ ║");
System.out.println("║ CLI + MVC + Command + Strategy ║");
System.out.println("║ ║");
System.out.println("╚══════════════════════════════════════════════════════════╝");
System.out.println("");
}
@Override
public void displayMenu(List<String> options) {
System.out.println("╔══════════════════════════════════════════════════════════╗");
System.out.println("║ 主菜单 ║");
System.out.println("╠══════════════════════════════════════════════════════════╣");
for (int i = 0; i < options.size(); i++) {
String option = options.get(i);
System.out.printf("║ %2d. %-48s ║%n", i + 1, option);
}
System.out.println("╠══════════════════════════════════════════════════════════╣");
System.out.println("║ U. 撤销 R. 重做 H. 帮助 0. 退出 ║");
System.out.println("╚══════════════════════════════════════════════════════════╝");
System.out.print("\n请输入选择: ");
}
@Override
public void displayArticles(List<Article> articles) {
if (articles == null || articles.isEmpty()) {
System.out.println("\n╔══════════════════════════════════════════╗");
System.out.println("║ 暂无保存的文章 ║");
System.out.println("╚══════════════════════════════════════════╝");
return;
}
System.out.println("\n╔═══════════════════════════════════════════════════════════╗");
System.out.println("║ 文章列表 (共 " + articles.size() + " 篇) ║");
System.out.println("╠═══════════════════════════════════════════════════════════╣");
for (int i = 0; i < articles.size(); i++) {
Article article = articles.get(i);
System.out.printf("║ [%02d] %-50s ║%n", i + 1, truncate(article.getTitle(), 48));
System.out.printf("║ 📌 来源: %-20s 策略: %-15s ║%n",
truncate(article.getSource(), 18),
truncate(article.getStrategy(), 13));
}
System.out.println("╚═══════════════════════════════════════════════════════════╝");
}
@Override
public void displayHotSearchItems(List<HotSearchItem> items) {
if (items == null || items.isEmpty()) {
System.out.println("暂无热搜数据");
return;
}
System.out.println("\n╔═══════════════════════════════════════════════════════════╗");
System.out.printf("║ 热搜列表 (共 %d 条) ║%n", items.size());
System.out.println("╠═══════════════════════════════════════════════════════════╣");
for (HotSearchItem item : items) {
String rankStr = "🔥 " + item.getRank();
System.out.printf("║ %3s │ %-50s ║%n", rankStr, truncate(item.getTitle(), 48));
}
System.out.println("╚═══════════════════════════════════════════════════════════╝");
}
@Override
public void displayCrawlerResult(CrawlerResult result) {
if (result == null) {
displayError("爬取结果为空");
return;
}
System.out.println("\n╔═══════════════════════════════════════════════════════════╗");
System.out.printf("║ 爬取结果: %-30s ║%n", result.getSiteName());
System.out.println("╠═══════════════════════════════════════════════════════════╣");
if (result.isSuccess()) {
System.out.println("║ 状态: ✅ 成功 ║");
System.out.printf("║ 数据: %d 条 ║%n", result.getItemCount());
System.out.printf("║ 耗时: %d ms ║%n", result.getDuration());
} else {
System.out.println("║ 状态: ❌ 失败 ║");
System.out.printf("║ 原因: %-45s ║%n", truncate(result.getErrorMessage(), 43));
}
System.out.println("╚═══════════════════════════════════════════════════════════╝");
}
@Override
public void displayMessage(String message) {
System.out.println("\n📢 " + message);
}
@Override
public void displayError(String error) {
System.err.println("\n❌ 错误: " + error);
}
@Override
public void displaySuccess(String success) {
System.out.println("\n✅ " + success);
}
@Override
public void displayGoodbye() {
System.out.println("");
System.out.println("╔══════════════════════════════════════════════════════════╗");
System.out.println("║ ║");
System.out.println("║ 感谢使用热搜爬虫系统! ║");
System.out.println("║ ║");
System.out.println("║ 再见!👋 ║");
System.out.println("║ ║");
System.out.println("╚══════════════════════════════════════════════════════════╝");
}
private String truncate(String str, int maxLength) {
if (str == null) return "";
if (str.length() <= maxLength) return str;
return str.substring(0, maxLength - 3) + "...";
}
}

BIN
project/strategy/BlogStrategy.class

Binary file not shown.

BIN
project/strategy/CrawlStrategy.class

Binary file not shown.

BIN
project/strategy/HotSearchStrategy.class

Binary file not shown.

BIN
project/strategy/NewsStrategy.class

Binary file not shown.

BIN
project/strategy/StrategyFactory.class

Binary file not shown.

BIN
project/target/classes/command/AbstractCommand.class

Binary file not shown.

BIN
project/target/classes/command/ClearDataCommand.class

Binary file not shown.

BIN
project/target/classes/command/Command.class

Binary file not shown.

BIN
project/target/classes/command/CommandManager.class

Binary file not shown.

BIN
project/target/classes/command/CrawlCommand.class

Binary file not shown.

BIN
project/target/classes/command/HelpCommand.class

Binary file not shown.

BIN
project/target/classes/command/ViewArticlesCommand.class

Binary file not shown.

BIN
project/target/classes/controller/CrawlerController.class

Binary file not shown.

BIN
project/target/classes/crawler/BaiduHotCrawler.class

Binary file not shown.

BIN
project/target/classes/crawler/BaseCrawler.class

Binary file not shown.

BIN
project/target/classes/crawler/HupuHotCrawler.class

Binary file not shown.

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save