Browse Source

期末作业更新数据

main
wanglixia 3 weeks ago
parent
commit
ac748c59da
  1. BIN
      project/202506050130-王立霞-期末实验报告.docx
  2. 51
      project/spider/data/books.csv
  3. 51
      project/spider/data/hotsearch.csv
  4. 251
      project/spider/data/movies.csv
  5. 56
      project/spider/dependency-reduced-pom.xml
  6. 14731
      project/spider/logs/spider-2026-05-31.0.log
  7. 1828
      project/spider/logs/spider-crawl-2026-05-31.0.log
  8. 114
      project/spider/logs/spider-crawl.log
  9. 817
      project/spider/logs/spider-error.log
  10. 1170
      project/spider/logs/spider.log
  11. 99
      project/spider/pom.xml
  12. 7
      project/spider/src/main/java/com/spider/command/Command.java
  13. 69
      project/spider/src/main/java/com/spider/command/ConfigCommand.java
  14. 94
      project/spider/src/main/java/com/spider/command/CrawlCommand.java
  15. 81
      project/spider/src/main/java/com/spider/command/HelpCommand.java
  16. 104
      project/spider/src/main/java/com/spider/command/ListCommand.java
  17. 56
      project/spider/src/main/java/com/spider/command/LoadCommand.java
  18. 66
      project/spider/src/main/java/com/spider/command/SaveCommand.java
  19. 31
      project/spider/src/main/java/com/spider/command/StartCommand.java
  20. 26
      project/spider/src/main/java/com/spider/command/StatusCommand.java
  21. 26
      project/spider/src/main/java/com/spider/command/StopCommand.java
  22. 25
      project/spider/src/main/java/com/spider/controller/ControllerFactory.java
  23. 19
      project/spider/src/main/java/com/spider/controller/ControllerInitializer.java
  24. 330
      project/spider/src/main/java/com/spider/controller/SpiderController.java
  25. 115
      project/spider/src/main/java/com/spider/core/CommandExecutor.java
  26. 86
      project/spider/src/main/java/com/spider/core/SpiderRunner.java
  27. 41
      project/spider/src/main/java/com/spider/exception/DataException.java
  28. 23
      project/spider/src/main/java/com/spider/exception/NetworkException.java
  29. 53
      project/spider/src/main/java/com/spider/exception/ParseException.java
  30. 37
      project/spider/src/main/java/com/spider/exception/SpiderException.java
  31. 89
      project/spider/src/main/java/com/spider/model/Book.java
  32. 13
      project/spider/src/main/java/com/spider/model/DataItem.java
  33. 70
      project/spider/src/main/java/com/spider/model/HotSearch.java
  34. 82
      project/spider/src/main/java/com/spider/model/Movie.java
  35. 60
      project/spider/src/main/java/com/spider/model/SpiderConfig.java
  36. 170
      project/spider/src/main/java/com/spider/repository/ArticleRepository.java
  37. 49
      project/spider/src/main/java/com/spider/service/AbstractSpider.java
  38. 144
      project/spider/src/main/java/com/spider/service/BaiduHotSearchSpider.java
  39. 155
      project/spider/src/main/java/com/spider/service/DataStorageService.java
  40. 312
      project/spider/src/main/java/com/spider/service/DoubanBookSpider.java
  41. 158
      project/spider/src/main/java/com/spider/service/DoubanMovieSpider.java
  42. 7
      project/spider/src/main/java/com/spider/service/Spider.java
  43. 55
      project/spider/src/main/java/com/spider/test/DebugDoubanBooks.java
  44. 118
      project/spider/src/main/java/com/spider/utils/HttpClientUtil.java
  45. 84
      project/spider/src/main/java/com/spider/utils/RetryUtils.java
  46. 133
      project/spider/src/main/java/com/spider/view/ConsoleView.java
  47. 7
      project/spider/src/main/java/com/spider/view/ViewFactory.java
  48. 89
      project/spider/src/main/resources/logback.xml
  49. BIN
      project/spider/target/classes/com/spider/command/Command.class
  50. BIN
      project/spider/target/classes/com/spider/command/ConfigCommand.class
  51. BIN
      project/spider/target/classes/com/spider/command/CrawlCommand.class
  52. BIN
      project/spider/target/classes/com/spider/command/HelpCommand.class
  53. BIN
      project/spider/target/classes/com/spider/command/ListCommand.class
  54. BIN
      project/spider/target/classes/com/spider/command/LoadCommand.class
  55. BIN
      project/spider/target/classes/com/spider/command/SaveCommand.class
  56. BIN
      project/spider/target/classes/com/spider/command/StartCommand.class
  57. BIN
      project/spider/target/classes/com/spider/command/StatusCommand.class
  58. BIN
      project/spider/target/classes/com/spider/command/StopCommand.class
  59. BIN
      project/spider/target/classes/com/spider/controller/ControllerFactory.class
  60. BIN
      project/spider/target/classes/com/spider/controller/ControllerInitializer.class
  61. BIN
      project/spider/target/classes/com/spider/controller/SpiderController.class
  62. BIN
      project/spider/target/classes/com/spider/core/CommandExecutor.class
  63. BIN
      project/spider/target/classes/com/spider/core/SpiderRunner.class
  64. BIN
      project/spider/target/classes/com/spider/exception/DataException.class
  65. BIN
      project/spider/target/classes/com/spider/exception/NetworkException.class
  66. BIN
      project/spider/target/classes/com/spider/exception/ParseException.class
  67. BIN
      project/spider/target/classes/com/spider/exception/SpiderException.class
  68. BIN
      project/spider/target/classes/com/spider/model/Book.class
  69. BIN
      project/spider/target/classes/com/spider/model/DataItem.class
  70. BIN
      project/spider/target/classes/com/spider/model/HotSearch.class
  71. BIN
      project/spider/target/classes/com/spider/model/Movie.class
  72. BIN
      project/spider/target/classes/com/spider/model/SpiderConfig.class
  73. BIN
      project/spider/target/classes/com/spider/repository/ArticleRepository.class
  74. BIN
      project/spider/target/classes/com/spider/service/AbstractSpider.class
  75. BIN
      project/spider/target/classes/com/spider/service/BaiduHotSearchSpider.class
  76. BIN
      project/spider/target/classes/com/spider/service/DataStorageService.class
  77. BIN
      project/spider/target/classes/com/spider/service/DoubanBookSpider.class
  78. BIN
      project/spider/target/classes/com/spider/service/DoubanMovieSpider.class
  79. BIN
      project/spider/target/classes/com/spider/service/Spider.class
  80. BIN
      project/spider/target/classes/com/spider/test/DebugDoubanBooks.class
  81. BIN
      project/spider/target/classes/com/spider/utils/HttpClientUtil.class
  82. BIN
      project/spider/target/classes/com/spider/utils/RetryUtils.class
  83. BIN
      project/spider/target/classes/com/spider/view/ConsoleView.class
  84. BIN
      project/spider/target/classes/com/spider/view/ViewFactory.class
  85. 89
      project/spider/target/classes/logback.xml
  86. 3
      project/spider/target/maven-archiver/pom.properties
  87. 36
      project/spider/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
  88. 36
      project/spider/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
  89. 0
      project/spider/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst
  90. 0
      project/spider/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst
  91. BIN
      project/spider/target/original-spider-1.0.0.jar
  92. BIN
      project/spider/target/spider-1.0.0-shaded.jar
  93. BIN
      project/spider/target/spider-1.0.0.jar

BIN
project/202506050130-王立霞-期末实验报告.docx

Binary file not shown.

51
project/spider/data/books.csv

@ -0,0 +1,51 @@
书名,评分,作者,简介,评价数
安定此心:我当精神科医生的12000天,8.6,姜涛,社会纪实,2837
挽救计划,9.1,[美] 安迪·威尔,小说,1249
咸的玩笑,8.5,刘震云,文学,13689
真事隐:康熙废储与正史虚构,8.7,孙立天,历史,2313
大厂小民:我在互联网公司的1480天,8.5,张小满,社会纪实,748
天色已晚,8.3,[爱尔兰]克莱尔·吉根,小说,1554
她和她的决心,8.3,东来,小说,770
凯罗斯,8.5,[德] 燕妮·埃彭贝克,小说,1225
幸福蒙太奇,7.7,马凌云,小说,414
螃蟹的邀请,8.7,[日] panpanya,绘本,486
抄写员巴托比,8.3,[美] 赫尔曼·梅尔维尔,小说,409
我收养了一个朋友,7.7,[韩]银曙澜,社会纪实,768
哈萨比斯:谷歌AI之脑,9.1,塞巴斯蒂安·马拉比,科学,567
像女孩那样丢球,9.1,[美] 艾丽斯·玛丽恩·杨,历史,401
刚刚离开的世界,8.3,[韩] 金草叶,小说,468
故纸浮生.1-2,9.3,(日)儿岛青,绘本,2594
在世与认知,8.4,陈嘉映,0 / 平装,406
呼啸山庄,7.9,[英] 艾米莉·勃朗特,小说,1015
我们如何理解这个世界:与齐格蒙特·鲍曼对谈,9.0,[英] 齐格蒙特·鲍曼 / [英] 基思·特斯特,0 / 精装,307
刮风下雨,8.8,李静睿,小说,273
为什么要孩子?,0.0,未知作者,,0
谁让历史流行起来,0.0,未知作者,历史,0
察金,0.0,未知作者,,0
翠鸟谋杀案,0.0,未知作者,,0
病鹤,0.0,未知作者,,0
闪闪发光的日子,0.0,未知作者,,0
转型时代的阅读与感怀,0.0,未知作者,,0
科学与我们,0.0,未知作者,科学,0
妄想是我们共有的秘密,0.0,未知作者,,0
王尔德童话,0.0,未知作者,,0
安徒生童话故事集,0.0,未知作者,,0
雷雨,0.0,未知作者,,0
父权制与资本主义,0.0,未知作者,,0
沉默的大多数,0.0,未知作者,,0
贾想 I,0.0,未知作者,,0
麦琪的礼物,0.0,未知作者,,0
新名字的故事,0.0,未知作者,,0
窗边的小豆豆,0.0,未知作者,,0
安定此心:我当精神科医生的12000天,8.6,姜涛,社会纪实,2837
挽救计划,9.1,[美] 安迪·威尔,小说,1249
咸的玩笑,8.5,刘震云,文学,13689
真事隐:康熙废储与正史虚构,8.7,孙立天,历史,2313
大厂小民:我在互联网公司的1480天,8.5,张小满,社会纪实,748
天色已晚,8.3,[爱尔兰]克莱尔·吉根,小说,1554
她和她的决心,8.3,东来,小说,770
凯罗斯,8.5,[德] 燕妮·埃彭贝克,小说,1225
幸福蒙太奇,7.7,马凌云,小说,414
螃蟹的邀请,8.7,[日] panpanya,绘本,486
抄写员巴托比,8.3,[美] 赫尔曼·梅尔维尔,小说,409
我收养了一个朋友,7.7,[韩]银曙澜,社会纪实,768
1 书名 评分 作者 简介 评价数
2 安定此心:我当精神科医生的12000天 8.6 姜涛 社会纪实 2837
3 挽救计划 9.1 [美] 安迪·威尔 小说 1249
4 咸的玩笑 8.5 刘震云 文学 13689
5 真事隐:康熙废储与正史虚构 8.7 孙立天 历史 2313
6 大厂小民:我在互联网公司的1480天 8.5 张小满 社会纪实 748
7 天色已晚 8.3 [爱尔兰]克莱尔·吉根 小说 1554
8 她和她的决心 8.3 东来 小说 770
9 凯罗斯 8.5 [德] 燕妮·埃彭贝克 小说 1225
10 幸福蒙太奇 7.7 马凌云 小说 414
11 螃蟹的邀请 8.7 [日] panpanya 绘本 486
12 抄写员巴托比 8.3 [美] 赫尔曼·梅尔维尔 小说 409
13 我收养了一个朋友 7.7 [韩]银曙澜 社会纪实 768
14 哈萨比斯:谷歌AI之脑 9.1 塞巴斯蒂安·马拉比 科学 567
15 像女孩那样丢球 9.1 [美] 艾丽斯·玛丽恩·杨 历史 401
16 刚刚离开的世界 8.3 [韩] 金草叶 小说 468
17 故纸浮生.1-2 9.3 (日)儿岛青 绘本 2594
18 在世与认知 8.4 陈嘉映 0 / 平装 406
19 呼啸山庄 7.9 [英] 艾米莉·勃朗特 小说 1015
20 我们如何理解这个世界:与齐格蒙特·鲍曼对谈 9.0 [英] 齐格蒙特·鲍曼 / [英] 基思·特斯特 0 / 精装 307
21 刮风下雨 8.8 李静睿 小说 273
22 为什么要孩子? 0.0 未知作者 0
23 谁让历史流行起来 0.0 未知作者 历史 0
24 察金 0.0 未知作者 0
25 翠鸟谋杀案 0.0 未知作者 0
26 病鹤 0.0 未知作者 0
27 闪闪发光的日子 0.0 未知作者 0
28 转型时代的阅读与感怀 0.0 未知作者 0
29 科学与我们 0.0 未知作者 科学 0
30 妄想是我们共有的秘密 0.0 未知作者 0
31 王尔德童话 0.0 未知作者 0
32 安徒生童话故事集 0.0 未知作者 0
33 雷雨 0.0 未知作者 0
34 父权制与资本主义 0.0 未知作者 0
35 沉默的大多数 0.0 未知作者 0
36 贾想 I 0.0 未知作者 0
37 麦琪的礼物 0.0 未知作者 0
38 新名字的故事 0.0 未知作者 0
39 窗边的小豆豆 0.0 未知作者 0
40 安定此心:我当精神科医生的12000天 8.6 姜涛 社会纪实 2837
41 挽救计划 9.1 [美] 安迪·威尔 小说 1249
42 咸的玩笑 8.5 刘震云 文学 13689
43 真事隐:康熙废储与正史虚构 8.7 孙立天 历史 2313
44 大厂小民:我在互联网公司的1480天 8.5 张小满 社会纪实 748
45 天色已晚 8.3 [爱尔兰]克莱尔·吉根 小说 1554
46 她和她的决心 8.3 东来 小说 770
47 凯罗斯 8.5 [德] 燕妮·埃彭贝克 小说 1225
48 幸福蒙太奇 7.7 马凌云 小说 414
49 螃蟹的邀请 8.7 [日] panpanya 绘本 486
50 抄写员巴托比 8.3 [美] 赫尔曼·梅尔维尔 小说 409
51 我收养了一个朋友 7.7 [韩]银曙澜 社会纪实 768

51
project/spider/data/hotsearch.csv

@ -0,0 +1,51 @@
排名,关键词
1,“自古英雄出少年”
2,车辆越来越大 车位要不要扩容
3,天涯社区重启首日崩了
4,中国科技期刊影响力提升 底气何来
5,鞠萍姐姐今日退休
6,还记得沙包、羊拐、铁皮青蛙吗
7,妈妈摆摊带女儿冲刺高考:均患罕见病
8,你有多久没让心里的小孩出来玩了
9,让你变丑的不是“碳水” 而是这些
10,入户调查今日启动 请积极配合
11,男子称为KTV工作女友讨说法被打
12,网民编造“红绿灯倒计时取消”被罚
13,天涯社区回归
14,女子在家洗澡醉酒男闯入欲行不轨
15,奚梦瑶何猷君今日大婚 晚宴照片曝光
16,成本几毛钱 央视曝光劣质驱蚊产品
17,美国博主来中国看街头电车破防了
18,宝妈每天用84拖地致肺炎
19,天天吃鸡蛋vs很少吃鸡蛋 谁更健康
20,孙颖莎樊振东全红婵们等比例长大
21,《给阿嬷的情书》删减内幕曝光
22,贵州大学回应“萝卜岗”招聘质疑
23,中国驻菲大使:中国人不是好欺负的
24,曝闲鱼将用户手机内照片“自动上架”
25,我的童年毕业了
26,这届小朋友真宝藏
27,哈尔滨为何遭遇沙尘暴突袭
28,海警位中国台湾岛以东海域执法巡查
29,歌手白举纲宣布得女
30,菲防长香会大放厥词当天 解放军亮剑
31,防晒衣乱象调查:300元可买合格报告
32,传销组织者3年内不得担任企业高管
33,14至16周岁未成年人犯罪人数下降
34,00后骑手笑着吃外卖走红:平时不舍得
35,高温来了 家中药品要不要放进冰箱
36,战火中的儿童节:活着已是拼尽全力
37,日韩股市均创历史新高
38,王鹤棣直播致谢粉丝:谢谢支持
39,A股“股王”大战 普通人能赚到钱吗
40,20年青梅竹马演唱会上求婚 邓超祝福
41,樊振东和队友模仿C罗庆祝
42,儿童节 祝大朋友小朋友节日快乐
43,董亦军尹春燕贺娇龙等拟获表彰
44,U19国足1比0战胜沙特U21
45,“今天 我们不做大人”
46,樊振东回应率俱乐部首夺“三冠王”
47,伊朗总统府再度否认总统辞职传闻
48,瑜伽爱好者在长城体验太极融合瑜伽
49,假如回到童年 你希望度过怎样的时光
50,2男子故意制造车辆追尾事故骗保获刑
1 排名 关键词
2 1 “自古英雄出少年”
3 2 车辆越来越大 车位要不要扩容
4 3 天涯社区重启首日崩了
5 4 中国科技期刊影响力提升 底气何来
6 5 鞠萍姐姐今日退休
7 6 还记得沙包、羊拐、铁皮青蛙吗
8 7 妈妈摆摊带女儿冲刺高考:均患罕见病
9 8 你有多久没让心里的小孩出来玩了
10 9 让你变丑的不是“碳水” 而是这些
11 10 入户调查今日启动 请积极配合
12 11 男子称为KTV工作女友讨说法被打
13 12 网民编造“红绿灯倒计时取消”被罚
14 13 天涯社区回归
15 14 女子在家洗澡醉酒男闯入欲行不轨
16 15 奚梦瑶何猷君今日大婚 晚宴照片曝光
17 16 成本几毛钱 央视曝光劣质驱蚊产品
18 17 美国博主来中国看街头电车破防了
19 18 宝妈每天用84拖地致肺炎
20 19 天天吃鸡蛋vs很少吃鸡蛋 谁更健康
21 20 孙颖莎樊振东全红婵们等比例长大
22 21 《给阿嬷的情书》删减内幕曝光
23 22 贵州大学回应“萝卜岗”招聘质疑
24 23 中国驻菲大使:中国人不是好欺负的
25 24 曝闲鱼将用户手机内照片“自动上架”
26 25 我的童年毕业了
27 26 这届小朋友真宝藏
28 27 哈尔滨为何遭遇沙尘暴突袭
29 28 海警位中国台湾岛以东海域执法巡查
30 29 歌手白举纲宣布得女
31 30 菲防长香会大放厥词当天 解放军亮剑
32 31 防晒衣乱象调查:300元可买合格报告
33 32 传销组织者3年内不得担任企业高管
34 33 14至16周岁未成年人犯罪人数下降
35 34 00后骑手笑着吃外卖走红:平时不舍得
36 35 高温来了 家中药品要不要放进冰箱
37 36 战火中的儿童节:活着已是拼尽全力
38 37 日韩股市均创历史新高
39 38 王鹤棣直播致谢粉丝:谢谢支持
40 39 A股“股王”大战 普通人能赚到钱吗
41 40 20年青梅竹马演唱会上求婚 邓超祝福
42 41 樊振东和队友模仿C罗庆祝
43 42 儿童节 祝大朋友小朋友节日快乐
44 43 董亦军尹春燕贺娇龙等拟获表彰
45 44 U19国足1比0战胜沙特U21
46 45 “今天 我们不做大人”
47 46 樊振东回应率俱乐部首夺“三冠王”
48 47 伊朗总统府再度否认总统辞职传闻
49 48 瑜伽爱好者在长城体验太极融合瑜伽
50 49 假如回到童年 你希望度过怎样的时光
51 50 2男子故意制造车辆追尾事故骗保获刑

251
project/spider/data/movies.csv

@ -0,0 +1,251 @@
电影名,评分,导演
肖申克的救赎,9.7,弗兰克·德拉邦特 Frank Darabont
霸王别姬,9.6,陈凯歌 Kaige Chen
泰坦尼克号,9.5,詹姆斯·卡梅隆 James Cameron
阿甘正传,9.5,罗伯特·泽米吉斯 Robert Zemeckis
千与千寻,9.4,宫崎骏 Hayao Miyazaki
美丽人生,9.5,罗伯托·贝尼尼 Roberto Benigni
星际穿越,9.4,克里斯托弗·诺兰 Christopher Nolan
这个杀手不太冷,9.4,吕克·贝松 Luc Besson
盗梦空间,9.4,克里斯托弗·诺兰 Christopher Nolan
楚门的世界,9.4,彼得·威尔 Peter Weir
辛德勒的名单,9.5,史蒂文·斯皮尔伯格 Steven Spielberg
忠犬八公的故事,9.4,莱塞·霍尔斯道姆 Lasse Hallström
海上钢琴师,9.3,朱塞佩·托纳多雷 Giuseppe Tornatore
疯狂动物城,9.3,拜伦·霍华德 Byron Howard
三傻大闹宝莱坞,9.2,拉库马·希拉尼 Rajkumar Hirani
机器人总动员,9.3,安德鲁·斯坦顿 Andrew Stanton
放牛班的春天,9.3,克里斯托夫·巴拉蒂 Christophe Barratier
无间道,9.3,刘伟强
控方证人,9.6,比利·怀尔德 Billy Wilder
寻梦环游记,9.1,李·昂克里奇 Lee Unkrich
大话西游之大圣娶亲,9.2,刘镇伟 Jeffrey Lau
熔炉,9.3,黄东赫 Dong-hyuk Hwang
触不可及,9.3,奥利维·那卡什 Olivier Nakache
教父,9.3,弗朗西斯·福特·科波拉 Francis Ford Coppola
末代皇帝,9.3,贝纳尔多·贝托鲁奇 Bernardo Bertolucci
哈利·波特与魔法石,9.2,Chris Columbus
当幸福来敲门,9.1,加布里尔·穆奇诺 Gabriele Muccino
龙猫,9.2,宫崎骏 Hayao Miyazaki
活着,9.3,张艺谋 Yimou Zhang
怦然心动,9.1,罗伯·莱纳 Rob Reiner
蝙蝠侠:黑暗骑士,9.2,克里斯托弗·诺兰 Christopher Nolan
指环王3:王者无敌,9.3,彼得·杰克逊 Peter Jackson
我不是药神,9.0,文牧野 Muye Wen
乱世佳人,9.3,维克多·弗莱明 Victor Fleming
让子弹飞,9.0,姜文 Wen Jiang
飞屋环游记,9.1,彼特·道格特 Pete Docter
哈尔的移动城堡,9.1,宫崎骏 Hayao Miyazaki
十二怒汉,9.4,西德尼·吕美特 Sidney Lumet
海蒂和爷爷,9.3,阿兰·葛斯彭纳 Alain Gsponer
素媛,9.3,李濬益 Jun-ik Lee
猫鼠游戏,9.1,史蒂文·斯皮尔伯格 Steven Spielberg
天空之城,9.2,宫崎骏 Hayao Miyazaki
鬼子来了,9.3,姜文 Wen Jiang
摔跤吧!爸爸,9.0,涅提·蒂瓦里 Nitesh Tiwari
少年派的奇幻漂流,9.1,李安 Ang Lee
钢琴家,9.3,罗曼·波兰斯基 Roman Polanski
死亡诗社,9.2,彼得·威尔 Peter Weir
指环王2:双塔奇兵,9.2,彼得·杰克逊 Peter Jackson
大话西游之月光宝盒,9.0,刘镇伟 Jeffrey Lau
绿皮书,8.9,彼得·法雷里 Peter Farrelly
何以为家,9.1,娜丁·拉巴基 Nadine Labaki
闻香识女人,9.1,马丁·布莱斯 Martin Brest
大闹天宫,9.4,万籁鸣 Laiming Wan
黑客帝国,9.1,安迪·沃卓斯基 Andy Wachowski
指环王1:护戒使者,9.1,彼得·杰克逊 Peter Jackson
罗马假日,9.1,威廉·惠勒 William Wyler
教父2,9.3,弗朗西斯·福特·科波拉 Francis Ford Coppola
狮子王,9.1,Roger Allers
天堂电影院,9.2,朱塞佩·托纳多雷 Giuseppe Tornatore
饮食男女,9.2,李安 Ang Lee
辩护人,9.2,杨宇硕 Woo-seok Yang
本杰明·巴顿奇事,9.0,大卫·芬奇 David Fincher
搏击俱乐部,9.0,大卫·芬奇 David Fincher
美丽心灵,9.1,朗·霍华德 Ron Howard
穿条纹睡衣的男孩,9.2,马克·赫尔曼 Mark Herman
哈利·波特与死亡圣器(下),9.0,大卫·叶茨 David Yates
情书,8.9,岩井俊二 Shunji Iwai
两杆大烟枪,9.1,盖·里奇 Guy Ritchie
窃听风暴,9.2,
功夫,8.9,周星驰 Stephen Chow
音乐之声,9.1,罗伯特·怀斯 Robert Wise
哈利·波特与阿兹卡班的囚徒,9.0,阿方索·卡隆 Alfonso Cuarón
阿凡达,8.8,詹姆斯·卡梅隆 James Cameron
西西里的美丽传说,8.9,朱塞佩·托纳多雷 Giuseppe Tornatore
看不见的客人,8.8,奥里奥尔·保罗 Oriol Paulo
拯救大兵瑞恩,9.1,史蒂文·斯皮尔伯格 Steven Spielberg
沉默的羔羊,8.9,乔纳森·戴米 Jonathan Demme
小鞋子,9.2,马基德·马基迪 Majid Majidi
蝴蝶效应,8.9,埃里克·布雷斯 Eric Bress
布达佩斯大饭店,8.9,韦斯·安德森 Wes Anderson
飞越疯人院,9.1,米洛斯·福尔曼 Miloš Forman
还有明天,9.3,宝拉·柯特莱西 Paola Cortellesi
禁闭岛,8.9,Martin Scorsese
心灵捕手,9.0,格斯·范·桑特 Gus Van Sant
致命魔术,8.9,克里斯托弗·诺兰 Christopher Nolan
低俗小说,8.9,昆汀·塔伦蒂诺 Quentin Tarantino
哈利·波特与密室,8.9,Chris Columbus
超脱,9.0,托尼·凯耶 Tony Kaye
一一,9.1,杨德昌 Edward Yang
喜剧之王,8.8,周星驰 Stephen Chow
杀人回忆,8.9,奉俊昊 Joon-ho Bong
致命ID,8.9,詹姆斯·曼高德 James Mangold
摩登时代,9.3,查理·卓别林 Charles Chaplin
春光乍泄,9.0,王家卫 Kar Wai Wong
加勒比海盗,8.8,戈尔·维宾斯基 Gore Verbinski
海豚湾,9.3,路易·西霍尤斯 Louie Psihoyos
美国往事,9.1,赛尔乔·莱翁内 Sergio Leone
红辣椒,9.0,今敏 Satoshi Kon
七宗罪,8.8,大卫·芬奇 David Fincher
唐伯虎点秋香,8.8,李力持 Lik-Chi Lee
狩猎,9.1,托马斯·温特伯格 Thomas Vinterberg
幽灵公主,8.9,宫崎骏 Hayao Miyazaki
寄生虫,8.8,奉俊昊 Joon-ho Bong
甜蜜蜜,8.9,陈可辛 Peter Chan
天书奇谭,9.2,王树忱 Shuchen Wang
蝙蝠侠:黑暗骑士崛起,8.9,克里斯托弗·诺兰 Christopher Nolan
超能陆战队,8.8,唐·霍尔 Don Hall
7号房的礼物,8.9,李焕庆 Hwan-kyeong Lee
茶馆,9.5,谢添 Tian Xie
第六感,8.9,M·奈特·沙马兰 M. Night Shyamalan
爱在黎明破晓前,8.8,理查德·林克莱特 Richard Linklater
爱在日落黄昏时,8.9,理查德·林克莱特 Richard Linklater
头脑特工队,8.8,
被嫌弃的松子的一生,8.8,中岛哲也 Tetsuya Nakashima
哈利·波特与火焰杯,8.8,迈克·内威尔 Mike Newell
未麻的部屋,9.1,今敏 Satoshi Kon
重庆森林,8.8,王家卫 Kar Wai Wong
借东西的小人阿莉埃蒂,8.9,米林宏昌 Hiromasa Yonebayashi
菊次郎的夏天,8.9,北野武 Takeshi Kitano
入殓师,8.9,泷田洋二郎 Yôjirô Takita
剪刀手爱德华,8.7,蒂姆·波顿 Tim Burton
断背山,8.8,李安 Ang Lee
勇敢的心,8.9,梅尔·吉布森 Mel Gibson
时空恋旅人,8.8,理查德·柯蒂斯 Richard Curtis
驯龙高手,8.8,迪恩·德布洛斯 Dean DeBlois
傲慢与偏见,8.7,乔·怀特 Joe Wright
无人知晓,9.1,是枝裕和 Hirokazu Koreeda
消失的爱人,8.7,大卫·芬奇 David Fincher
倩女幽魂,8.8,程小东 Siu-Tung Ching
新世界,8.9,朴勋政 Hoon-jung Park
花样年华,8.8,王家卫 Kar Wai Wong
玩具总动员3,8.9,李·昂克里奇 Lee Unkrich
一个叫欧维的男人决定去死,8.9,汉内斯·赫尔姆 Hannes Holm
色,戒,8.7,李安 Ang Lee
完美的世界,9.1,克林特·伊斯特伍德 Clint Eastwood
怪兽电力公司,8.8,彼特·道格特 Pete Docter
教父3,9.0,弗朗西斯·福特·科波拉 Francis Ford Coppola
阳光灿烂的日子,8.8,姜文 Wen Jiang
小森林 夏秋篇,9.0,森淳一 Junichi Mori
天使爱美丽,8.7,让-皮埃尔·热内 Jean-Pierre Jeunet
侧耳倾听,8.9,近藤喜文 Yoshifumi Kondo
哪吒闹海,9.2,王树忱 Shuchen Wang
九品芝麻官,8.8,王晶 Jing Wong
被解救的姜戈,8.8,昆汀·塔伦蒂诺 Quentin Tarantino
请以你的名字呼唤我,8.8,卢卡·瓜达尼诺 Luca Guadagnino
幸福终点站,8.8,史蒂文·斯皮尔伯格 Steven Spielberg
釜山行,8.6,延尚昊 Sang-ho Yeon
神偷奶爸,8.7,皮艾尔·柯芬 Pierre Coffin
小森林 冬春篇,9.0,森淳一 Junichi Mori
喜宴,9.0,李安 Ang Lee
萤火之森,8.8,大森贵弘 Takahiro Omori
告白,8.8,中岛哲也 Tetsuya Nakashima
七武士,9.3,黑泽明 Akira Kurosawa
玛丽和麦克斯,9.0,亚当·艾略特 Adam Elliot
头号玩家,8.6,史蒂文·斯皮尔伯格 Steven Spielberg
模仿游戏,8.8,莫滕·泰杜姆 Morten Tyldum
惊魂记,9.0,阿尔弗雷德·希区柯克 Alfred Hitchcock
机器人之梦,9.1,巴勃罗·贝格尔 Pablo Berger
大鱼,8.8,蒂姆·波顿 Tim Burton
心灵奇旅,8.7,彼特·道格特 Pete Docter
背靠背,脸对脸,9.5,黄建新 Jianxin Huang
你的名字。,8.5,新海诚 Makoto Shinkai
血战钢锯岭,8.7,梅尔·吉布森 Mel Gibson
射雕英雄传之东成西就,8.7,刘镇伟 Jeffrey Lau
我是山姆,9.0,杰茜·尼尔森 Jessie Nelson
阳光姐妹淘,8.8,姜炯哲 Hyeong-Cheol Kang
末路狂花,9.0,雷德利·斯科特 Ridley Scott
恐怖直播,8.7,金秉祐 Byeong-woo Kim
黑客帝国3:矩阵革命,8.8,
高山下的花环,9.5,谢晋 Jin Xie
小丑,8.7,托德·菲利普斯 Todd Phillips
谍影重重3,8.9,保罗·格林格拉斯 Paul Greengrass
三块广告牌,8.7,马丁·麦克唐纳 Martin McDonagh
电锯惊魂,8.7,詹姆斯·温 James Wan
无间道2,8.8,刘伟强 Andrew Lau
达拉斯买家俱乐部,8.8,让-马克·瓦雷 Jean-Marc Vallée
疯狂原始人,8.7,科克·德·米科 Kirk De Micco
绿里奇迹,8.9,弗兰克·德拉邦特 Frank Darabont
爱在午夜降临前,8.9,理查德·林克莱特 Richard Linklater
疯狂的石头,8.6,宁浩 Hao Ning
雨中曲,9.1,斯坦利·多南 Stanley Donen
2001太空漫游,8.9,斯坦利·库布里克 Stanley Kubrick
海街日记,8.8,是枝裕和 Hirokazu Koreeda
上帝之城,9.0,
风之谷,8.9,宫崎骏 Hayao Miyazaki
心迷宫,8.7,忻钰坤 Yukun Xin
英雄本色,8.6,吴宇森 John Woo
记忆碎片,8.7,克里斯托弗·诺兰 Christopher Nolan
纵横四海,8.8,吴宇森 John Woo
无敌破坏王,8.7,瑞奇·莫尔 Rich Moore
卢旺达饭店,8.9,特瑞·乔治 Terry George
恐怖游轮,8.5,克里斯托弗·史密斯 Christopher Smith
牯岭街少年杀人事件,8.9,杨德昌 Edward Yang
东京教父,9.0,今敏 Satoshi Kon
小偷家族,8.7,是枝裕和 Hirokazu Koreeda
魔女宅急便,8.7,宫崎骏 Hayao Miyazaki
芙蓉镇,9.3,谢晋 Jin Xie
冰川时代,8.7,卡洛斯·沙尔丹哈 Carlos Saldanha
忠犬八公物语,9.2,神山征二郎 Seijirô Kôyama
岁月神偷,8.7,罗启锐 Alex Law
遗愿清单,8.7,罗伯·莱纳 Rob Reiner
荒蛮故事,8.7,达米安·斯兹弗隆 Damián Szifron
大佛普拉斯,8.7,黄信尧 Hsin-yao Huang
源代码,8.6,邓肯·琼斯 Duncan Jones
花束般的恋爱,8.6,土井裕泰 Nobuhiro Doi
白日梦想家,8.6,本·斯蒂勒 Ben Stiller
爱乐之城,8.4,达米恩·查泽雷 Damien Chazelle
疯狂的麦克斯4:狂暴之路,8.7,乔治·米勒 George Miller
可可西里,8.9,陆川 Chuan Lu
你看起来好像很好吃,8.9,藤森雅也 Masaya Fujimori
贫民窟的百万富翁,8.6,丹尼·鲍尔 Danny Boyle
波西米亚狂想曲,8.6,布莱恩·辛格 Bryan Singer
城市之光,9.3,查理·卓别林 Charles Chaplin
爆裂鼓手,8.6,达米恩·查泽雷 Damien Chazelle
哈利·波特与死亡圣器(上),8.6,大卫·叶茨 David Yates
大红灯笼高高挂,8.8,张艺谋 Yimou Zhang
无耻混蛋,8.7,昆汀·塔伦蒂诺 Quentin Tarantino
青蛇,8.6,徐克 Hark Tsui
终结者2:审判日,8.8,詹姆斯·卡梅隆 James Cameron
东邪西毒,8.6,王家卫 Kar Wai Wong
黑天鹅,8.6,达伦·阿罗诺夫斯基 Darren Aronofsky
新龙门客栈,8.7,李惠民 Raymond Lee
初恋这件小事,8.5,
千钧一发,8.8,安德鲁·尼科尔 Andrew Niccol
人工智能,8.7,史蒂文·斯皮尔伯格 Steven Spielberg
崖上的波妞,8.6,宫崎骏 Hayao Miyazaki
虎口脱险,8.9,杰拉尔·乌里 Gérard Oury
雨人,8.7,巴瑞·莱文森 Barry Levinson
哈利·波特与凤凰社,8.6,大卫·叶茨 David Yates
彗星来的那一夜,8.6,詹姆斯·沃德·布柯特 James Ward Byrkit
罗生门,8.8,黑泽明 Akira Kurosawa
海边的曼彻斯特,8.6,肯尼斯·罗纳根 Kenneth Lonergan
恋恋笔记本,8.5,尼克·卡索维茨 Nick Cassavetes
火星救援,8.5,雷德利·斯科特 Ridley Scott
真爱至上,8.5,理查德·柯蒂斯 Richard Curtis
黑客帝国2:重装上阵,8.7,
冰雪奇缘,8.5,克里斯·巴克 Chris Buck
步履不停,8.8,是枝裕和 Hirokazu Koreeda
千年女优,8.8,今敏 Satoshi Kon
奇迹男孩,8.6,斯蒂芬·卓博斯基 Stephen Chbosky
谍影重重2,8.7,保罗·格林格拉斯 Paul Greengrass
蜘蛛侠:平行宇宙,8.6,鲍勃·佩尔西凯蒂 Bob Persichetti
战争之王,8.7,安德鲁·尼科尔 Andrew Niccol
攻壳机动队,9.0,押井守 Mamoru Oshii
血钻,8.7,爱德华·兹威克 Edward Zwick
小姐,8.5,朴赞郁 Chan-wook Park
血观音,8.6,杨雅喆 Ya-che Yang
隐藏人物,8.9,特奥多尔·梅尔菲 Theodore Melfi
魂断蓝桥,8.8,茂文·勒鲁瓦 Mervyn LeRoy
房间,8.7,伦尼·阿伯拉罕森 Lenny Abrahamson
1 电影名 评分 导演
2 肖申克的救赎 9.7 弗兰克·德拉邦特 Frank Darabont
3 霸王别姬 9.6 陈凯歌 Kaige Chen
4 泰坦尼克号 9.5 詹姆斯·卡梅隆 James Cameron
5 阿甘正传 9.5 罗伯特·泽米吉斯 Robert Zemeckis
6 千与千寻 9.4 宫崎骏 Hayao Miyazaki
7 美丽人生 9.5 罗伯托·贝尼尼 Roberto Benigni
8 星际穿越 9.4 克里斯托弗·诺兰 Christopher Nolan
9 这个杀手不太冷 9.4 吕克·贝松 Luc Besson
10 盗梦空间 9.4 克里斯托弗·诺兰 Christopher Nolan
11 楚门的世界 9.4 彼得·威尔 Peter Weir
12 辛德勒的名单 9.5 史蒂文·斯皮尔伯格 Steven Spielberg
13 忠犬八公的故事 9.4 莱塞·霍尔斯道姆 Lasse Hallström
14 海上钢琴师 9.3 朱塞佩·托纳多雷 Giuseppe Tornatore
15 疯狂动物城 9.3 拜伦·霍华德 Byron Howard
16 三傻大闹宝莱坞 9.2 拉库马·希拉尼 Rajkumar Hirani
17 机器人总动员 9.3 安德鲁·斯坦顿 Andrew Stanton
18 放牛班的春天 9.3 克里斯托夫·巴拉蒂 Christophe Barratier
19 无间道 9.3 刘伟强
20 控方证人 9.6 比利·怀尔德 Billy Wilder
21 寻梦环游记 9.1 李·昂克里奇 Lee Unkrich
22 大话西游之大圣娶亲 9.2 刘镇伟 Jeffrey Lau
23 熔炉 9.3 黄东赫 Dong-hyuk Hwang
24 触不可及 9.3 奥利维·那卡什 Olivier Nakache
25 教父 9.3 弗朗西斯·福特·科波拉 Francis Ford Coppola
26 末代皇帝 9.3 贝纳尔多·贝托鲁奇 Bernardo Bertolucci
27 哈利·波特与魔法石 9.2 Chris Columbus
28 当幸福来敲门 9.1 加布里尔·穆奇诺 Gabriele Muccino
29 龙猫 9.2 宫崎骏 Hayao Miyazaki
30 活着 9.3 张艺谋 Yimou Zhang
31 怦然心动 9.1 罗伯·莱纳 Rob Reiner
32 蝙蝠侠:黑暗骑士 9.2 克里斯托弗·诺兰 Christopher Nolan
33 指环王3:王者无敌 9.3 彼得·杰克逊 Peter Jackson
34 我不是药神 9.0 文牧野 Muye Wen
35 乱世佳人 9.3 维克多·弗莱明 Victor Fleming
36 让子弹飞 9.0 姜文 Wen Jiang
37 飞屋环游记 9.1 彼特·道格特 Pete Docter
38 哈尔的移动城堡 9.1 宫崎骏 Hayao Miyazaki
39 十二怒汉 9.4 西德尼·吕美特 Sidney Lumet
40 海蒂和爷爷 9.3 阿兰·葛斯彭纳 Alain Gsponer
41 素媛 9.3 李濬益 Jun-ik Lee
42 猫鼠游戏 9.1 史蒂文·斯皮尔伯格 Steven Spielberg
43 天空之城 9.2 宫崎骏 Hayao Miyazaki
44 鬼子来了 9.3 姜文 Wen Jiang
45 摔跤吧!爸爸 9.0 涅提·蒂瓦里 Nitesh Tiwari
46 少年派的奇幻漂流 9.1 李安 Ang Lee
47 钢琴家 9.3 罗曼·波兰斯基 Roman Polanski
48 死亡诗社 9.2 彼得·威尔 Peter Weir
49 指环王2:双塔奇兵 9.2 彼得·杰克逊 Peter Jackson
50 大话西游之月光宝盒 9.0 刘镇伟 Jeffrey Lau
51 绿皮书 8.9 彼得·法雷里 Peter Farrelly
52 何以为家 9.1 娜丁·拉巴基 Nadine Labaki
53 闻香识女人 9.1 马丁·布莱斯 Martin Brest
54 大闹天宫 9.4 万籁鸣 Laiming Wan
55 黑客帝国 9.1 安迪·沃卓斯基 Andy Wachowski
56 指环王1:护戒使者 9.1 彼得·杰克逊 Peter Jackson
57 罗马假日 9.1 威廉·惠勒 William Wyler
58 教父2 9.3 弗朗西斯·福特·科波拉 Francis Ford Coppola
59 狮子王 9.1 Roger Allers
60 天堂电影院 9.2 朱塞佩·托纳多雷 Giuseppe Tornatore
61 饮食男女 9.2 李安 Ang Lee
62 辩护人 9.2 杨宇硕 Woo-seok Yang
63 本杰明·巴顿奇事 9.0 大卫·芬奇 David Fincher
64 搏击俱乐部 9.0 大卫·芬奇 David Fincher
65 美丽心灵 9.1 朗·霍华德 Ron Howard
66 穿条纹睡衣的男孩 9.2 马克·赫尔曼 Mark Herman
67 哈利·波特与死亡圣器(下) 9.0 大卫·叶茨 David Yates
68 情书 8.9 岩井俊二 Shunji Iwai
69 两杆大烟枪 9.1 盖·里奇 Guy Ritchie
70 窃听风暴 9.2
71 功夫 8.9 周星驰 Stephen Chow
72 音乐之声 9.1 罗伯特·怀斯 Robert Wise
73 哈利·波特与阿兹卡班的囚徒 9.0 阿方索·卡隆 Alfonso Cuarón
74 阿凡达 8.8 詹姆斯·卡梅隆 James Cameron
75 西西里的美丽传说 8.9 朱塞佩·托纳多雷 Giuseppe Tornatore
76 看不见的客人 8.8 奥里奥尔·保罗 Oriol Paulo
77 拯救大兵瑞恩 9.1 史蒂文·斯皮尔伯格 Steven Spielberg
78 沉默的羔羊 8.9 乔纳森·戴米 Jonathan Demme
79 小鞋子 9.2 马基德·马基迪 Majid Majidi
80 蝴蝶效应 8.9 埃里克·布雷斯 Eric Bress
81 布达佩斯大饭店 8.9 韦斯·安德森 Wes Anderson
82 飞越疯人院 9.1 米洛斯·福尔曼 Miloš Forman
83 还有明天 9.3 宝拉·柯特莱西 Paola Cortellesi
84 禁闭岛 8.9 Martin Scorsese
85 心灵捕手 9.0 格斯·范·桑特 Gus Van Sant
86 致命魔术 8.9 克里斯托弗·诺兰 Christopher Nolan
87 低俗小说 8.9 昆汀·塔伦蒂诺 Quentin Tarantino
88 哈利·波特与密室 8.9 Chris Columbus
89 超脱 9.0 托尼·凯耶 Tony Kaye
90 一一 9.1 杨德昌 Edward Yang
91 喜剧之王 8.8 周星驰 Stephen Chow
92 杀人回忆 8.9 奉俊昊 Joon-ho Bong
93 致命ID 8.9 詹姆斯·曼高德 James Mangold
94 摩登时代 9.3 查理·卓别林 Charles Chaplin
95 春光乍泄 9.0 王家卫 Kar Wai Wong
96 加勒比海盗 8.8 戈尔·维宾斯基 Gore Verbinski
97 海豚湾 9.3 路易·西霍尤斯 Louie Psihoyos
98 美国往事 9.1 赛尔乔·莱翁内 Sergio Leone
99 红辣椒 9.0 今敏 Satoshi Kon
100 七宗罪 8.8 大卫·芬奇 David Fincher
101 唐伯虎点秋香 8.8 李力持 Lik-Chi Lee
102 狩猎 9.1 托马斯·温特伯格 Thomas Vinterberg
103 幽灵公主 8.9 宫崎骏 Hayao Miyazaki
104 寄生虫 8.8 奉俊昊 Joon-ho Bong
105 甜蜜蜜 8.9 陈可辛 Peter Chan
106 天书奇谭 9.2 王树忱 Shuchen Wang
107 蝙蝠侠:黑暗骑士崛起 8.9 克里斯托弗·诺兰 Christopher Nolan
108 超能陆战队 8.8 唐·霍尔 Don Hall
109 7号房的礼物 8.9 李焕庆 Hwan-kyeong Lee
110 茶馆 9.5 谢添 Tian Xie
111 第六感 8.9 M·奈特·沙马兰 M. Night Shyamalan
112 爱在黎明破晓前 8.8 理查德·林克莱特 Richard Linklater
113 爱在日落黄昏时 8.9 理查德·林克莱特 Richard Linklater
114 头脑特工队 8.8
115 被嫌弃的松子的一生 8.8 中岛哲也 Tetsuya Nakashima
116 哈利·波特与火焰杯 8.8 迈克·内威尔 Mike Newell
117 未麻的部屋 9.1 今敏 Satoshi Kon
118 重庆森林 8.8 王家卫 Kar Wai Wong
119 借东西的小人阿莉埃蒂 8.9 米林宏昌 Hiromasa Yonebayashi
120 菊次郎的夏天 8.9 北野武 Takeshi Kitano
121 入殓师 8.9 泷田洋二郎 Yôjirô Takita
122 剪刀手爱德华 8.7 蒂姆·波顿 Tim Burton
123 断背山 8.8 李安 Ang Lee
124 勇敢的心 8.9 梅尔·吉布森 Mel Gibson
125 时空恋旅人 8.8 理查德·柯蒂斯 Richard Curtis
126 驯龙高手 8.8 迪恩·德布洛斯 Dean DeBlois
127 傲慢与偏见 8.7 乔·怀特 Joe Wright
128 无人知晓 9.1 是枝裕和 Hirokazu Koreeda
129 消失的爱人 8.7 大卫·芬奇 David Fincher
130 倩女幽魂 8.8 程小东 Siu-Tung Ching
131 新世界 8.9 朴勋政 Hoon-jung Park
132 花样年华 8.8 王家卫 Kar Wai Wong
133 玩具总动员3 8.9 李·昂克里奇 Lee Unkrich
134 一个叫欧维的男人决定去死 8.9 汉内斯·赫尔姆 Hannes Holm
135 色,戒 8.7 李安 Ang Lee
136 完美的世界 9.1 克林特·伊斯特伍德 Clint Eastwood
137 怪兽电力公司 8.8 彼特·道格特 Pete Docter
138 教父3 9.0 弗朗西斯·福特·科波拉 Francis Ford Coppola
139 阳光灿烂的日子 8.8 姜文 Wen Jiang
140 小森林 夏秋篇 9.0 森淳一 Junichi Mori
141 天使爱美丽 8.7 让-皮埃尔·热内 Jean-Pierre Jeunet
142 侧耳倾听 8.9 近藤喜文 Yoshifumi Kondo
143 哪吒闹海 9.2 王树忱 Shuchen Wang
144 九品芝麻官 8.8 王晶 Jing Wong
145 被解救的姜戈 8.8 昆汀·塔伦蒂诺 Quentin Tarantino
146 请以你的名字呼唤我 8.8 卢卡·瓜达尼诺 Luca Guadagnino
147 幸福终点站 8.8 史蒂文·斯皮尔伯格 Steven Spielberg
148 釜山行 8.6 延尚昊 Sang-ho Yeon
149 神偷奶爸 8.7 皮艾尔·柯芬 Pierre Coffin
150 小森林 冬春篇 9.0 森淳一 Junichi Mori
151 喜宴 9.0 李安 Ang Lee
152 萤火之森 8.8 大森贵弘 Takahiro Omori
153 告白 8.8 中岛哲也 Tetsuya Nakashima
154 七武士 9.3 黑泽明 Akira Kurosawa
155 玛丽和麦克斯 9.0 亚当·艾略特 Adam Elliot
156 头号玩家 8.6 史蒂文·斯皮尔伯格 Steven Spielberg
157 模仿游戏 8.8 莫滕·泰杜姆 Morten Tyldum
158 惊魂记 9.0 阿尔弗雷德·希区柯克 Alfred Hitchcock
159 机器人之梦 9.1 巴勃罗·贝格尔 Pablo Berger
160 大鱼 8.8 蒂姆·波顿 Tim Burton
161 心灵奇旅 8.7 彼特·道格特 Pete Docter
162 背靠背,脸对脸 9.5 黄建新 Jianxin Huang
163 你的名字。 8.5 新海诚 Makoto Shinkai
164 血战钢锯岭 8.7 梅尔·吉布森 Mel Gibson
165 射雕英雄传之东成西就 8.7 刘镇伟 Jeffrey Lau
166 我是山姆 9.0 杰茜·尼尔森 Jessie Nelson
167 阳光姐妹淘 8.8 姜炯哲 Hyeong-Cheol Kang
168 末路狂花 9.0 雷德利·斯科特 Ridley Scott
169 恐怖直播 8.7 金秉祐 Byeong-woo Kim
170 黑客帝国3:矩阵革命 8.8
171 高山下的花环 9.5 谢晋 Jin Xie
172 小丑 8.7 托德·菲利普斯 Todd Phillips
173 谍影重重3 8.9 保罗·格林格拉斯 Paul Greengrass
174 三块广告牌 8.7 马丁·麦克唐纳 Martin McDonagh
175 电锯惊魂 8.7 詹姆斯·温 James Wan
176 无间道2 8.8 刘伟强 Andrew Lau
177 达拉斯买家俱乐部 8.8 让-马克·瓦雷 Jean-Marc Vallée
178 疯狂原始人 8.7 科克·德·米科 Kirk De Micco
179 绿里奇迹 8.9 弗兰克·德拉邦特 Frank Darabont
180 爱在午夜降临前 8.9 理查德·林克莱特 Richard Linklater
181 疯狂的石头 8.6 宁浩 Hao Ning
182 雨中曲 9.1 斯坦利·多南 Stanley Donen
183 2001太空漫游 8.9 斯坦利·库布里克 Stanley Kubrick
184 海街日记 8.8 是枝裕和 Hirokazu Koreeda
185 上帝之城 9.0
186 风之谷 8.9 宫崎骏 Hayao Miyazaki
187 心迷宫 8.7 忻钰坤 Yukun Xin
188 英雄本色 8.6 吴宇森 John Woo
189 记忆碎片 8.7 克里斯托弗·诺兰 Christopher Nolan
190 纵横四海 8.8 吴宇森 John Woo
191 无敌破坏王 8.7 瑞奇·莫尔 Rich Moore
192 卢旺达饭店 8.9 特瑞·乔治 Terry George
193 恐怖游轮 8.5 克里斯托弗·史密斯 Christopher Smith
194 牯岭街少年杀人事件 8.9 杨德昌 Edward Yang
195 东京教父 9.0 今敏 Satoshi Kon
196 小偷家族 8.7 是枝裕和 Hirokazu Koreeda
197 魔女宅急便 8.7 宫崎骏 Hayao Miyazaki
198 芙蓉镇 9.3 谢晋 Jin Xie
199 冰川时代 8.7 卡洛斯·沙尔丹哈 Carlos Saldanha
200 忠犬八公物语 9.2 神山征二郎 Seijirô Kôyama
201 岁月神偷 8.7 罗启锐 Alex Law
202 遗愿清单 8.7 罗伯·莱纳 Rob Reiner
203 荒蛮故事 8.7 达米安·斯兹弗隆 Damián Szifron
204 大佛普拉斯 8.7 黄信尧 Hsin-yao Huang
205 源代码 8.6 邓肯·琼斯 Duncan Jones
206 花束般的恋爱 8.6 土井裕泰 Nobuhiro Doi
207 白日梦想家 8.6 本·斯蒂勒 Ben Stiller
208 爱乐之城 8.4 达米恩·查泽雷 Damien Chazelle
209 疯狂的麦克斯4:狂暴之路 8.7 乔治·米勒 George Miller
210 可可西里 8.9 陆川 Chuan Lu
211 你看起来好像很好吃 8.9 藤森雅也 Masaya Fujimori
212 贫民窟的百万富翁 8.6 丹尼·鲍尔 Danny Boyle
213 波西米亚狂想曲 8.6 布莱恩·辛格 Bryan Singer
214 城市之光 9.3 查理·卓别林 Charles Chaplin
215 爆裂鼓手 8.6 达米恩·查泽雷 Damien Chazelle
216 哈利·波特与死亡圣器(上) 8.6 大卫·叶茨 David Yates
217 大红灯笼高高挂 8.8 张艺谋 Yimou Zhang
218 无耻混蛋 8.7 昆汀·塔伦蒂诺 Quentin Tarantino
219 青蛇 8.6 徐克 Hark Tsui
220 终结者2:审判日 8.8 詹姆斯·卡梅隆 James Cameron
221 东邪西毒 8.6 王家卫 Kar Wai Wong
222 黑天鹅 8.6 达伦·阿罗诺夫斯基 Darren Aronofsky
223 新龙门客栈 8.7 李惠民 Raymond Lee
224 初恋这件小事 8.5
225 千钧一发 8.8 安德鲁·尼科尔 Andrew Niccol
226 人工智能 8.7 史蒂文·斯皮尔伯格 Steven Spielberg
227 崖上的波妞 8.6 宫崎骏 Hayao Miyazaki
228 虎口脱险 8.9 杰拉尔·乌里 Gérard Oury
229 雨人 8.7 巴瑞·莱文森 Barry Levinson
230 哈利·波特与凤凰社 8.6 大卫·叶茨 David Yates
231 彗星来的那一夜 8.6 詹姆斯·沃德·布柯特 James Ward Byrkit
232 罗生门 8.8 黑泽明 Akira Kurosawa
233 海边的曼彻斯特 8.6 肯尼斯·罗纳根 Kenneth Lonergan
234 恋恋笔记本 8.5 尼克·卡索维茨 Nick Cassavetes
235 火星救援 8.5 雷德利·斯科特 Ridley Scott
236 真爱至上 8.5 理查德·柯蒂斯 Richard Curtis
237 黑客帝国2:重装上阵 8.7
238 冰雪奇缘 8.5 克里斯·巴克 Chris Buck
239 步履不停 8.8 是枝裕和 Hirokazu Koreeda
240 千年女优 8.8 今敏 Satoshi Kon
241 奇迹男孩 8.6 斯蒂芬·卓博斯基 Stephen Chbosky
242 谍影重重2 8.7 保罗·格林格拉斯 Paul Greengrass
243 蜘蛛侠:平行宇宙 8.6 鲍勃·佩尔西凯蒂 Bob Persichetti
244 战争之王 8.7 安德鲁·尼科尔 Andrew Niccol
245 攻壳机动队 9.0 押井守 Mamoru Oshii
246 血钻 8.7 爱德华·兹威克 Edward Zwick
247 小姐 8.5 朴赞郁 Chan-wook Park
248 血观音 8.6 杨雅喆 Ya-che Yang
249 隐藏人物 8.9 特奥多尔·梅尔菲 Theodore Melfi
250 魂断蓝桥 8.8 茂文·勒鲁瓦 Mervyn LeRoy
251 房间 8.7 伦尼·阿伯拉罕森 Lenny Abrahamson

56
project/spider/dependency-reduced-pom.xml

@ -0,0 +1,56 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.spider</groupId>
<artifactId>spider</artifactId>
<name>Spider Project</name>
<version>1.0.0</version>
<description>A Java Web Spider Framework</description>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>11</source>
<target>11</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.spider.core.SpiderRunner</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>3.5.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer>
<mainClass>com.spider.core.SpiderRunner</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<properties>
<maven.compiler.target>11</maven.compiler.target>
<maven.compiler.source>11</maven.compiler.source>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
</project>

14731
project/spider/logs/spider-2026-05-31.0.log

File diff suppressed because it is too large

1828
project/spider/logs/spider-crawl-2026-05-31.0.log

File diff suppressed because it is too large

114
project/spider/logs/spider-crawl.log

@ -0,0 +1,114 @@
2026-06-01 12:11:34.447 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书...
2026-06-01 12:11:34.450 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1
2026-06-01 12:11:37.452 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签
2026-06-01 12:11:37.466 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 2 页: https://book.douban.com/chart?sub_type=1&page=2
2026-06-01 12:11:39.733 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签
2026-06-01 12:11:39.738 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 50 本书
2026-06-01 12:11:39.760 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books.csv
2026-06-01 12:11:39.761 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250...
2026-06-01 12:11:39.761 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250
2026-06-01 12:11:42.200 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25
2026-06-01 12:11:44.360 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50
2026-06-01 12:11:46.508 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75
2026-06-01 12:11:48.740 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100
2026-06-01 12:11:50.890 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125
2026-06-01 12:11:53.041 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150
2026-06-01 12:11:55.290 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 8 页 (176): https://movie.douban.com/top250?start=175
2026-06-01 12:11:57.549 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 9 页 (201): https://movie.douban.com/top250?start=200
2026-06-01 12:11:59.800 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 10 页 (226): https://movie.douban.com/top250?start=225
2026-06-01 12:12:02.148 [main] INFO com.spider.service.DoubanMovieSpider - 豆瓣电影爬取完成,共获取 250 部电影
2026-06-01 12:12:02.224 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies.csv
2026-06-01 12:12:02.225 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条...
2026-06-01 12:12:02.225 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime
2026-06-01 12:12:04.523 [main] INFO com.spider.service.BaiduHotSearchSpider - 获取到HTML长度: 195924 字节
2026-06-01 12:12:04.523 [main] INFO com.spider.service.BaiduHotSearchSpider - HTML内容前2000字符:
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html;charset=utf-8">
<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1">
<meta content="always" name="referrer">
<meta name="theme-color" content="#2932e1">
<link rel="shortcut icon" href="//www.baidu.com/favicon.ico" type="image/x-icon" />
<link rel="icon" sizes="any" mask href="//www.baidu.com/img/baidu_85beaf5496f291521eb75ba38eacbd87.svg">
<link rel="dns-prefetch" href="//fyb-pc-static.cdn.bcebos.com"/>
<meta name="keywords" content="百度热搜,百度热搜榜,百度搜索排行榜,搜索排行榜,百度热门搜索,今日热搜,今日热点,排行榜,热搜榜,热词榜,热门话题,网络热点,实时热点,热门事件,热点">
<meta name="description" content="百度热搜以数亿用户海量的真实数据为基础,通过专业的数据挖掘方法,计算关键词的热搜指数,旨在建立权威、全面、热门、时效的各类关键词排行榜,引领热词阅读时代。">
<title>百度热搜</title>
<style data-vue-ssr-id="22cfed39:0">
.c-gap-top-small {
margin-top: 3px;
}
.c-gap-top {
margin-top: 7px;
}
.c-gap-top-large {
margin-top: 11px;
}
.c-gap-top-mini {
margin-top: 2px;
}
.c-gap-top-xsmall {
margin-top: 4px;
}
.c-gap-top-middle {
margin-top: 10px;
}
.c-gap-bottom-small {
margin-bottom: 3px;
}
.c-gap-bottom {
margin-bottom: 7px;
}
.c-gap-bottom-large {
margin-bottom: 11px;
}
.c-gap-bottom-mini {
margin-bottom: 2px;
}
.c-gap-bottom-xsmall {
margin-bottom: 4px;
}
.c-gap-bottom-middle {
margin-bottom: 10px;
}
.c-gap-left {
margin-left: 12px;
}
.c-gap-left-small {
margin-left: 8px;
}
.c-gap-left-xsmall {
margin-left: 4px;
}
.c-gap-left-mini {
margin-left: 2px;
}
.c-gap-left-large {
margin-left: 16px;
}
.c-gap-left-middle {
margin-left: 10px;
}
.c-gap-right {
margin-right: 12px;
}
.c-gap-right-small {
margin-right: 8px;
}
.c-gap-right-xsmall {
margin-right: 4px;
}
.c-gap-right-mini {
margin-right: 2px;
}
.c-gap-right-large {
margin-right: 16px;
}
.c-gap-right-middle {
margin-right: 10
2026-06-01 12:12:04.529 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜
2026-06-01 12:12:04.532 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch.csv
2026-06-01 12:12:04.533 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books.csv
2026-06-01 12:12:04.533 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies.csv
2026-06-01 12:12:04.534 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch.csv

817
project/spider/logs/spider-error.log

@ -0,0 +1,817 @@
2026-05-31 18:41:43.730 [main] ERROR com.spider.service.DataStorageService - 保存书籍数据失败
java.io.FileNotFoundException: data\books.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveBooksToCsv(DataStorageService.java:94)
at com.spider.service.DataStorageService.saveBooks(DataStorageService.java:50)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:69)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:44:20.334 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250?start=150, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:44:20.334 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250?start=150, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:44:20.335 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250?start=150, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:44:20.779 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:44:20.781 [main] ERROR com.spider.service.DataStorageService - 保存书籍数据失败
java.io.FileNotFoundException: data\books.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveBooksToCsv(DataStorageService.java:94)
at com.spider.service.DataStorageService.saveBooks(DataStorageService.java:50)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:148)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:44:20.782 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:45:00.572 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:45:00.573 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:45:00.573 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:45:01.081 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:45:01.082 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:45:01.082 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:45:01.661 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:45:01.663 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:45:55.048 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:45:55.050 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:45:55.050 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:45:55.558 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:45:55.558 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:45:55.558 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:45:55.954 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:45:55.956 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:47:33.048 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:47:33.050 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:47:33.050 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:47:33.557 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:47:33.559 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:47:33.560 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:47:34.257 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:47:34.259 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:49:01.109 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:49:01.111 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:49:01.111 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:49:01.622 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:49:01.622 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:49:01.622 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:49:02.016 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:49:02.018 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:50:16.271 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:50:16.273 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:50:16.273 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:50:16.784 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:50:16.785 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:50:16.785 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 18:50:17.167 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 18:50:17.168 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:12:20.722 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:12:20.723 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:12:20.723 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:12:21.232 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:12:21.232 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:12:21.232 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:12:21.816 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:12:21.818 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:13:44.179 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:13:44.182 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:13:44.182 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:13:44.690 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:13:44.691 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:13:44.692 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:13:45.193 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:13:45.196 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:14:13.362 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:14:13.364 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:14:13.364 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:14:13.873 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:14:13.873 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:14:13.873 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:14:14.272 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:14:14.275 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。)
at java.base/java.io.FileOutputStream.open0(Native Method)
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235)
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123)
at java.base/java.io.FileWriter.<init>(FileWriter.java:66)
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128)
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84)
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:16:48.090 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:16:48.091 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:16:48.092 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:16:48.602 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:16:48.603 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:16:48.604 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:17:32.122 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:17:32.123 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:17:32.125 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:17:32.632 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:17:32.633 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:17:32.634 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:17:59.360 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:17:59.361 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:17:59.362 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:17:59.874 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 19:17:59.875 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 19:17:59.876 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:01.343 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:02:01.346 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:01.346 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:01.853 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:02:01.854 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:01.854 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:30.630 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:02:30.633 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:30.634 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:31.244 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:02:31.245 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:31.246 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:48.550 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:02:48.551 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:48.551 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:49.062 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:02:49.064 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:02:49.065 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:26:42.771 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:53)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:26:42.773 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:26:42.773 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:26:43.284 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:53)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:26:43.284 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:26:43.284 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:26:43.740 [main] ERROR com.spider.service.BaiduHotSearchSpider - 爬取百度热搜时出错
com.spider.exception.ParseException: 无法找到热搜数据元素
at com.spider.service.BaiduHotSearchSpider.crawlHotSearch(BaiduHotSearchSpider.java:62)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:126)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:26:43.741 [main] ERROR com.spider.controller.SpiderController - 解析异常 - Source: 百度热搜, Element: hotItems, Message: 无法找到热搜数据元素
2026-05-31 22:26:43.741 [main] ERROR com.spider.view.ConsoleView - 错误: 解析错误: 无法找到热搜数据元素
2026-05-31 22:27:39.399 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:53)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:27:39.401 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:27:39.402 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:27:40.120 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:53)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:27:40.121 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:27:40.121 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:27:40.573 [main] ERROR com.spider.service.BaiduHotSearchSpider - 爬取百度热搜时出错
com.spider.exception.ParseException: 无法找到热搜数据元素
at com.spider.service.BaiduHotSearchSpider.crawlHotSearch(BaiduHotSearchSpider.java:78)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:126)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:27:40.573 [main] ERROR com.spider.controller.SpiderController - 解析异常 - Source: 百度热搜, Element: hotItems, Message: 无法找到热搜数据元素
2026-05-31 22:27:40.573 [main] ERROR com.spider.view.ConsoleView - 错误: 解析错误: 无法找到热搜数据元素
2026-05-31 22:28:31.214 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:53)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:28:31.217 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:28:31.217 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:28:31.725 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:53)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:28:31.726 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:28:31.728 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:28:32.178 [main] ERROR com.spider.service.BaiduHotSearchSpider - 爬取百度热搜时出错
com.spider.exception.ParseException: 无法找到热搜数据元素
at com.spider.service.BaiduHotSearchSpider.crawlHotSearch(BaiduHotSearchSpider.java:85)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:126)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:28:32.178 [main] ERROR com.spider.controller.SpiderController - 解析异常 - Source: 百度热搜, Element: hotItems, Message: 无法找到热搜数据元素
2026-05-31 22:28:32.178 [main] ERROR com.spider.view.ConsoleView - 错误: 解析错误: 无法找到热搜数据元素
2026-05-31 22:29:13.301 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:50)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:29:13.303 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:29:13.303 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:29:13.811 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:50)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:29:13.812 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:29:13.812 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:30:07.165 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:50)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:30:07.168 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:30:07.168 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:30:07.777 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:50)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:30:07.778 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:30:07.779 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:31:02.051 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:50)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:31:02.053 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:31:02.054 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:31:02.664 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:50)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:31:02.665 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:31:02.665 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:31:55.707 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:50)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:31:55.709 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:31:55.709 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:31:56.221 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:50)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:31:56.224 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:31:56.224 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:33:20.291 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:39)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtmlWithHttpClient(HttpClientUtil.java:63)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:37)
... 4 common frames omitted
2026-05-31 22:33:20.293 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:33:20.293 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:33:21.519 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 所有获取方式都失败: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:39)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtmlWithHttpClient(HttpClientUtil.java:63)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:37)
... 4 common frames omitted
2026-05-31 22:33:21.520 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 所有获取方式都失败: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:33:21.520 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 所有获取方式都失败: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:33:51.729 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:39)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtmlWithHttpClient(HttpClientUtil.java:63)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:37)
... 4 common frames omitted
2026-05-31 22:33:51.731 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:33:51.731 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:33:52.955 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 所有获取方式都失败: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:39)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtmlWithHttpClient(HttpClientUtil.java:63)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:37)
... 4 common frames omitted
2026-05-31 22:33:52.956 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 所有获取方式都失败: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:33:52.957 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 所有获取方式都失败: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:43:59.474 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:47)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtmlWithHttpClient(HttpClientUtil.java:81)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:45)
... 4 common frames omitted
2026-05-31 22:43:59.477 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:43:59.477 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:44:03.775 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 所有获取方式都失败: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:47)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
at com.spider.utils.HttpClientUtil.fetchHtmlWithHttpClient(HttpClientUtil.java:81)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:45)
... 4 common frames omitted
2026-05-31 22:44:03.775 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 所有获取方式都失败: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:44:03.775 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 所有获取方式都失败: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间
2026-05-31 22:44:07.322 [main] ERROR com.spider.service.BaiduHotSearchSpider - 爬取百度热搜时出错
com.spider.exception.ParseException: 无法找到热搜数据元素
at com.spider.service.BaiduHotSearchSpider.crawlHotSearch(BaiduHotSearchSpider.java:81)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:126)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:44:07.323 [main] ERROR com.spider.controller.SpiderController - 解析异常 - Source: 百度热搜, Element: hotItems, Message: 无法找到热搜数据元素
2026-05-31 22:44:07.324 [main] ERROR com.spider.view.ConsoleView - 错误: 解析错误: 无法找到热搜数据元素
2026-05-31 22:45:21.397 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fbook.douban.com%2Fchart%3Fsub_type%3D1]
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:46)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: org.jsoup.HttpStatusException: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fbook.douban.com%2Fchart%3Fsub_type%3D1]
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:890)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:887)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:829)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:366)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:353)
at com.spider.utils.HttpClientUtil.fetchHtmlWithJsoup(HttpClientUtil.java:104)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:44)
... 4 common frames omitted
2026-05-31 22:45:21.399 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fbook.douban.com%2Fchart%3Fsub_type%3D1]
2026-05-31 22:45:21.399 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fbook.douban.com%2Fchart%3Fsub_type%3D1]
2026-05-31 22:45:24.672 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fmovie.douban.com%2Ftop250]
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:46)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: org.jsoup.HttpStatusException: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fmovie.douban.com%2Ftop250]
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:890)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:887)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:829)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:366)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:353)
at com.spider.utils.HttpClientUtil.fetchHtmlWithJsoup(HttpClientUtil.java:104)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:44)
... 4 common frames omitted
2026-05-31 22:45:24.673 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fmovie.douban.com%2Ftop250]
2026-05-31 22:45:24.673 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fmovie.douban.com%2Ftop250]
2026-05-31 22:45:27.175 [main] ERROR com.spider.service.BaiduHotSearchSpider - 爬取百度热搜时出错
com.spider.exception.ParseException: 无法找到热搜数据元素
at com.spider.service.BaiduHotSearchSpider.crawlHotSearch(BaiduHotSearchSpider.java:81)
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:126)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
2026-05-31 22:45:27.175 [main] ERROR com.spider.controller.SpiderController - 解析异常 - Source: 百度热搜, Element: hotItems, Message: 无法找到热搜数据元素
2026-05-31 22:45:27.175 [main] ERROR com.spider.view.ConsoleView - 错误: 解析错误: 无法找到热搜数据元素
2026-05-31 22:46:35.433 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fbook.douban.com%2Fchart%3Fsub_type%3D1]
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:46)
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55)
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: org.jsoup.HttpStatusException: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fbook.douban.com%2Fchart%3Fsub_type%3D1]
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:890)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:887)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:829)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:366)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:353)
at com.spider.utils.HttpClientUtil.fetchHtmlWithJsoup(HttpClientUtil.java:99)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:44)
... 4 common frames omitted
2026-05-31 22:46:35.436 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fbook.douban.com%2Fchart%3Fsub_type%3D1]
2026-05-31 22:46:35.437 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fbook.douban.com%2Fchart%3Fsub_type%3D1]
2026-05-31 22:46:38.708 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fmovie.douban.com%2Ftop250]
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:46)
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51)
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96)
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41)
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20)
Caused by: org.jsoup.HttpStatusException: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fmovie.douban.com%2Ftop250]
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:890)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:887)
at org.jsoup.helper.HttpConnection$Response.execute(HttpConnection.java:829)
at org.jsoup.helper.HttpConnection.execute(HttpConnection.java:366)
at org.jsoup.helper.HttpConnection.get(HttpConnection.java:353)
at com.spider.utils.HttpClientUtil.fetchHtmlWithJsoup(HttpClientUtil.java:99)
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:44)
... 4 common frames omitted
2026-05-31 22:46:38.709 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fmovie.douban.com%2Ftop250]
2026-05-31 22:46:38.710 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 所有获取方式都失败: HTTP error fetching URL. Status=418, URL=[https://sec.douban.com/b?r=https%3A%2F%2Fmovie.douban.com%2Ftop250]

1170
project/spider/logs/spider.log

File diff suppressed because it is too large

99
project/spider/pom.xml

@ -0,0 +1,99 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.spider</groupId>
<artifactId>spider</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<name>Spider Project</name>
<description>A Java Web Spider Framework</description>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
</properties>
<dependencies>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
</dependency>
<dependency>
<groupId>org.apache.httpcomponents.client5</groupId>
<artifactId>httpclient5</artifactId>
<version>5.2.1</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.7</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.4.11</version>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.15.2</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>11</source>
<target>11</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.spider.core.SpiderRunner</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.5.0</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.spider.core.SpiderRunner</mainClass>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

7
project/spider/src/main/java/com/spider/command/Command.java

@ -0,0 +1,7 @@
package com.spider.command;
public interface Command {
String getName();
String getDescription();
void execute(String[] args);
}

69
project/spider/src/main/java/com/spider/command/ConfigCommand.java

@ -0,0 +1,69 @@
package com.spider.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.controller.ControllerFactory;
import com.spider.view.ConsoleView;
import com.spider.view.ViewFactory;
public class ConfigCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ConfigCommand.class);
private final ConsoleView view;
public ConfigCommand() {
this.view = ViewFactory.createConsoleView();
}
@Override
public String getName() {
return "config";
}
@Override
public String getDescription() {
return "配置爬虫参数";
}
@Override
public void execute(String[] args) {
if (args.length < 2) {
showHelp();
return;
}
String action = args[1];
switch (action) {
case "show":
ControllerFactory.getSpiderController().showConfig();
break;
case "set":
if (args.length < 4) {
logger.error("用法: config set <key> <value>");
view.showHelp();
} else {
ControllerFactory.getSpiderController().updateConfig(args[2], args[3]);
}
break;
case "list":
listConfig();
break;
default:
logger.error("未知操作: {}", action);
showHelp();
}
}
private void listConfig() {
logger.info("可配置的参数:");
logger.info(" - thread.count : 线程数");
logger.info(" - timeout : 超时时间(ms)");
logger.info(" - retry.count : 重试次数");
logger.info(" - user.agent : User-Agent");
}
private void showHelp() {
view.showHelp();
}
}

94
project/spider/src/main/java/com/spider/command/CrawlCommand.java

@ -0,0 +1,94 @@
package com.spider.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.controller.ControllerFactory;
import com.spider.utils.RetryUtils;
public class CrawlCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
@Override
public String getName() {
return "crawl";
}
@Override
public String getDescription() {
return "爬取指定数据源";
}
@Override
public void execute(String[] args) {
if (args.length < 2) {
showHelp();
return;
}
String target = args[1].toLowerCase();
switch (target) {
case "books":
int bookLimit = args.length > 2 ? parseInt(args[2]) : 50;
logger.info("执行 crawl books 命令,数量: {}", bookLimit);
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlDoubanBooks(bookLimit));
break;
case "movies":
logger.info("执行 crawl movies 命令");
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlDoubanMovies());
break;
case "hotsearch":
int hotLimit = args.length > 2 ? parseInt(args[2]) : 50;
logger.info("执行 crawl hotsearch 命令,数量: {}", hotLimit);
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlBaiduHotSearch(hotLimit));
break;
case "all":
logger.info("执行 crawl all 命令,开始爬取所有数据源...");
crawlAllDataWithRetry();
break;
default:
logger.warn("未知爬取目标: {}", target);
showHelp();
}
}
private void crawlWithRetry(Runnable task) {
try {
RetryUtils.executeWithRetry(task, 3, 2000);
logger.info("爬取任务完成");
} catch (Exception e) {
logger.error("爬取任务失败: {}", e.getMessage());
}
}
private void crawlAllDataWithRetry() {
logger.info("=== 开始爬取所有数据 ===");
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlDoubanBooks(50));
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlDoubanMovies());
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlBaiduHotSearch(50));
logger.info("=== 所有数据爬取完成 ===");
}
private void showHelp() {
logger.info("crawl 命令用法:");
logger.info(" crawl books [数量] - 爬取豆瓣读书 TopN (默认50)");
logger.info(" crawl movies - 爬取豆瓣电影 Top250");
logger.info(" crawl hotsearch [数量]- 爬取百度热搜 TopN (默认50)");
logger.info(" crawl all - 爬取所有数据源");
}
private int parseInt(String str) {
try {
return Integer.parseInt(str);
} catch (NumberFormatException e) {
return 50;
}
}
}

81
project/spider/src/main/java/com/spider/command/HelpCommand.java

@ -0,0 +1,81 @@
package com.spider.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HelpCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class);
@Override
public String getName() {
return "help";
}
@Override
public String getDescription() {
return "显示帮助信息";
}
@Override
public void execute(String[] args) {
logger.info("\n┌─ Spider 多功能爬虫框架 帮助 ──────────────────────────┐");
logger.info("│ │");
logger.info("│ 【数据爬取命令】 │");
logger.info("│ crawl books [N] 爬取豆瓣读书 Top N (默认50) │");
logger.info("│ crawl movies 爬取豆瓣电影 Top250 │");
logger.info("│ crawl hotsearch [N] 爬取百度热搜 Top N (默认50) │");
logger.info("│ crawl all 爬取所有数据源 │");
logger.info("│ │");
logger.info("│ 【数据保存命令】 │");
logger.info("│ save books [fname] 保存书籍数据到文件 │");
logger.info("│ save movies [fname] 保存电影数据到文件 │");
logger.info("│ save hotsearch [fn] 保存热搜数据到文件 │");
logger.info("│ save all 保存所有已爬取的数据 │");
logger.info("│ save files 列出已保存的文件 │");
logger.info("│ │");
logger.info("│ 【数据加载命令】 │");
logger.info("│ load books <fname> 从文件加载书籍数据 │");
logger.info("│ load movies <fname> 从文件加载电影数据 │");
logger.info("│ load hotsearch <fn> 从文件加载热搜数据 │");
logger.info("│ │");
logger.info("│ 【数据列表命令】 │");
logger.info("│ list books 显示已爬取的书籍 │");
logger.info("│ list movies 显示已爬取的电影 │");
logger.info("│ list hotsearch 显示已爬取的热搜 │");
logger.info("│ list all 显示所有已爬取的数据 │");
logger.info("│ │");
logger.info("│ 【控制命令】 │");
logger.info("│ start [url] 启动爬虫任务 │");
logger.info("│ stop 停止爬虫任务 │");
logger.info("│ status 查看爬虫运行状态 │");
logger.info("│ │");
logger.info("│ 【配置命令】 │");
logger.info("│ config show 显示当前配置 │");
logger.info("│ config set <k> <v> 设置配置项 │");
logger.info("│ │");
logger.info("│ 【其他】 │");
logger.info("│ help 显示帮助信息 │");
logger.info("│ exit 退出程序 │");
logger.info("│ │");
logger.info("└────────────────────────────────────────────────────────────┘");
logger.info("\n使用示例:");
logger.info(" 爬取数据:");
logger.info(" crawl books 50 # 爬取豆瓣读书Top50");
logger.info(" crawl movies # 爬取豆瓣电影Top250");
logger.info(" crawl hotsearch 50 # 爬取百度热搜Top50");
logger.info(" crawl all # 爬取所有数据");
logger.info("");
logger.info(" 保存和加载:");
logger.info(" save all # 保存所有数据到文件");
logger.info(" save books my.json # 保存书籍到指定文件");
logger.info(" save files # 查看已保存的文件");
logger.info(" load books my.json # 加载书籍数据");
logger.info("");
logger.info(" 查看数据:");
logger.info(" list books # 查看已爬取的书籍");
logger.info(" list movies # 查看已爬取的电影");
logger.info("");
logger.info(" 配置:");
logger.info(" config set thread.count 10 # 设置线程数");
}
}

104
project/spider/src/main/java/com/spider/command/ListCommand.java

@ -0,0 +1,104 @@
package com.spider.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.controller.ControllerFactory;
public class ListCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ListCommand.class);
@Override
public String getName() {
return "list";
}
@Override
public String getDescription() {
return "列出爬取的数据";
}
@Override
public void execute(String[] args) {
if (args.length < 2) {
showHelp();
return;
}
String type = args[1].toLowerCase();
switch (type) {
case "books":
listBooks();
break;
case "movies":
listMovies();
break;
case "hotsearch":
listHotSearch();
break;
case "all":
listAll();
break;
default:
logger.error("未知数据类型: {}", type);
showHelp();
}
}
private void listBooks() {
var books = ControllerFactory.getSpiderController().getBooks();
if (books == null || books.isEmpty()) {
logger.info("暂无书籍数据,请先执行 crawl books 命令");
return;
}
logger.info("=== 已爬取的书籍 (共 {} 本) ===", books.size());
for (int i = 0; i < books.size(); i++) {
var book = books.get(i);
logger.info("{}. 《{}》 评分:{} 作者:{}",
i + 1, book.getTitle(), book.getRating(), book.getAuthor());
}
}
private void listMovies() {
var movies = ControllerFactory.getSpiderController().getMovies();
if (movies == null || movies.isEmpty()) {
logger.info("暂无电影数据,请先执行 crawl movies 命令");
return;
}
logger.info("=== 已爬取的电影 (共 {} 部) ===", movies.size());
for (var movie : movies) {
logger.info("Top{}. 《{}》 评分:{} 导演:{}",
movie.getRank(), movie.getTitle(), movie.getRating(), movie.getDirector());
}
}
private void listHotSearch() {
var hotSearches = ControllerFactory.getSpiderController().getHotSearches();
if (hotSearches == null || hotSearches.isEmpty()) {
logger.info("暂无热搜数据,请先执行 crawl hotsearch 命令");
return;
}
logger.info("=== 已爬取的热搜 (共 {} 条) ===", hotSearches.size());
for (var hotSearch : hotSearches) {
logger.info("{}. {}",
hotSearch.getRank(), hotSearch.getKeyword());
}
}
private void listAll() {
listBooks();
logger.info("");
listMovies();
logger.info("");
listHotSearch();
}
private void showHelp() {
logger.info("list 命令用法:");
logger.info(" list books - 列出已爬取的书籍");
logger.info(" list movies - 列出已爬取的电影");
logger.info(" list hotsearch - 列出已爬取的热搜");
logger.info(" list all - 列出所有已爬取的数据");
}
}

56
project/spider/src/main/java/com/spider/command/LoadCommand.java

@ -0,0 +1,56 @@
package com.spider.command;
import java.io.File;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.view.ConsoleView;
import com.spider.view.ViewFactory;
public class LoadCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(LoadCommand.class);
private final ConsoleView view;
public LoadCommand() {
this.view = ViewFactory.createConsoleView();
}
@Override
public String getName() {
return "load";
}
@Override
public String getDescription() {
return "查看已保存的CSV数据文件";
}
@Override
public void execute(String[] args) {
view.showInfo("数据已改为CSV格式自动保存,每次爬取会自动覆盖。\n");
view.showInfo("已保存的CSV文件位于 data 目录下:\n");
File dataDir = new File("data");
if (!dataDir.exists()) {
view.showInfo("data 目录不存在,请先运行爬虫。");
return;
}
File[] csvFiles = dataDir.listFiles((dir, name) -> name.endsWith(".csv"));
if (csvFiles == null || csvFiles.length == 0) {
view.showInfo("没有找到CSV文件,请先运行爬虫。");
return;
}
view.showInfo("┌─ 已保存的CSV数据文件 ─────────────────┐");
for (File file : csvFiles) {
long size = file.length();
String sizeStr = size < 1024 ? size + " B" :
size < 1024 * 1024 ? (size / 1024) + " KB" :
(size / 1024 / 1024) + " MB";
view.showInfo("│ " + file.getName() + " (" + sizeStr + ")");
}
view.showInfo("└─────────────────────────────────────────┘");
}
}

66
project/spider/src/main/java/com/spider/command/SaveCommand.java

@ -0,0 +1,66 @@
package com.spider.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.controller.ControllerFactory;
public class SaveCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(SaveCommand.class);
@Override
public String getName() {
return "save";
}
@Override
public String getDescription() {
return "保存爬取的数据到文件";
}
@Override
public void execute(String[] args) {
if (args.length < 2) {
showHelp();
return;
}
String target = args[1].toLowerCase();
String filename = args.length > 2 ? args[2] : null;
switch (target) {
case "books":
ControllerFactory.getSpiderController().saveBooks(filename);
break;
case "movies":
ControllerFactory.getSpiderController().saveMovies(filename);
break;
case "hotsearch":
ControllerFactory.getSpiderController().saveHotSearch(filename);
break;
case "all":
ControllerFactory.getSpiderController().saveAllData();
break;
case "files":
ControllerFactory.getSpiderController().listSavedFiles();
break;
default:
logger.error("未知保存目标: {}", target);
showHelp();
}
}
private void showHelp() {
logger.info("save 命令用法:");
logger.info(" save books [filename] - 保存书籍数据到文件");
logger.info(" save movies [filename] - 保存电影数据到文件");
logger.info(" save hotsearch [fname] - 保存热搜数据到文件");
logger.info(" save all - 保存所有数据");
logger.info(" save files - 列出已保存的文件");
logger.info("");
logger.info("示例:");
logger.info(" save books my_books.json");
logger.info(" save movies");
logger.info(" save all");
}
}

31
project/spider/src/main/java/com/spider/command/StartCommand.java

@ -0,0 +1,31 @@
package com.spider.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.controller.ControllerFactory;
public class StartCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(StartCommand.class);
@Override
public String getName() {
return "start";
}
@Override
public String getDescription() {
return "启动爬虫任务";
}
@Override
public void execute(String[] args) {
String url = "https://example.com";
if (args.length > 1) {
url = args[1];
}
logger.info("执行 start 命令,URL: {}", url);
ControllerFactory.getSpiderController().startSpider(url);
}
}

26
project/spider/src/main/java/com/spider/command/StatusCommand.java

@ -0,0 +1,26 @@
package com.spider.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.controller.ControllerFactory;
public class StatusCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(StatusCommand.class);
@Override
public String getName() {
return "status";
}
@Override
public String getDescription() {
return "查看爬虫状态";
}
@Override
public void execute(String[] args) {
logger.info("执行 status 命令");
ControllerFactory.getSpiderController().showStatus();
}
}

26
project/spider/src/main/java/com/spider/command/StopCommand.java

@ -0,0 +1,26 @@
package com.spider.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.controller.ControllerFactory;
public class StopCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(StopCommand.class);
@Override
public String getName() {
return "stop";
}
@Override
public String getDescription() {
return "停止爬虫任务";
}
@Override
public void execute(String[] args) {
logger.info("执行 stop 命令");
ControllerFactory.getSpiderController().stopSpider();
}
}

25
project/spider/src/main/java/com/spider/controller/ControllerFactory.java

@ -0,0 +1,25 @@
package com.spider.controller;
import com.spider.command.Command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ControllerFactory {
private static final Logger logger = LoggerFactory.getLogger(ControllerFactory.class);
private static SpiderController spiderController;
public static void initController() {
if (spiderController == null) {
spiderController = new ControllerInitializer().createController();
logger.info("控制器初始化完成");
}
}
public static SpiderController getSpiderController() {
if (spiderController == null) {
initController();
}
return spiderController;
}
}

19
project/spider/src/main/java/com/spider/controller/ControllerInitializer.java

@ -0,0 +1,19 @@
package com.spider.controller;
import com.spider.model.SpiderConfig;
import com.spider.view.ConsoleView;
import com.spider.view.ViewFactory;
public class ControllerInitializer {
public SpiderController createController() {
SpiderConfig config = new SpiderConfig();
config.setThreadCount(5);
config.setTimeout(30000);
config.setRetryCount(3);
config.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64)");
ConsoleView view = ViewFactory.createConsoleView();
return new SpiderController(config, view);
}
}

330
project/spider/src/main/java/com/spider/controller/SpiderController.java

@ -0,0 +1,330 @@
package com.spider.controller;
import java.io.File;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.exception.NetworkException;
import com.spider.exception.ParseException;
import com.spider.model.Book;
import com.spider.model.HotSearch;
import com.spider.model.Movie;
import com.spider.model.SpiderConfig;
import com.spider.service.BaiduHotSearchSpider;
import com.spider.service.DataStorageService;
import com.spider.service.DoubanBookSpider;
import com.spider.service.DoubanMovieSpider;
import com.spider.view.ConsoleView;
public class SpiderController {
private static final Logger logger = LoggerFactory.getLogger(SpiderController.class);
private final SpiderConfig config;
private final ConsoleView view;
private final AtomicBoolean isRunning;
private final AtomicInteger pagesCrawled;
private final AtomicInteger totalRequests;
private final AtomicInteger failedRequests;
private DoubanBookSpider doubanBookSpider;
private DoubanMovieSpider doubanMovieSpider;
private BaiduHotSearchSpider baiduHotSearchSpider;
private DataStorageService storageService;
private List<Book> books;
private List<Movie> movies;
private List<HotSearch> hotSearches;
public SpiderController(SpiderConfig config, ConsoleView view) {
this.config = config;
this.view = view;
this.isRunning = new AtomicBoolean(false);
this.pagesCrawled = new AtomicInteger(0);
this.totalRequests = new AtomicInteger(0);
this.failedRequests = new AtomicInteger(0);
this.doubanBookSpider = new DoubanBookSpider();
this.doubanMovieSpider = new DoubanMovieSpider();
this.baiduHotSearchSpider = new BaiduHotSearchSpider();
this.storageService = new DataStorageService();
}
public void crawlDoubanBooks(int limit) {
if (isRunning.get()) {
view.showError("爬虫已经在运行中");
return;
}
isRunning.set(true);
view.showInfo("=== 开始爬取豆瓣读书 Top" + limit + " ===");
try {
books = doubanBookSpider.crawlHotBooks(limit);
view.showBooks(books);
storageService.saveBooks(books);
logger.info("书籍数据已自动保存");
} catch (NetworkException e) {
logger.error("网络异常 - {}", e.getMessage());
view.showError("网络错误: " + e.getMessage());
} catch (ParseException e) {
logger.error("解析异常 - Source: {}, Element: {}, Message: {}",
e.getSource(), e.getElement(), e.getMessage());
view.showError("解析错误: " + e.getMessage());
} catch (Exception e) {
logger.error("爬取豆瓣读书失败", e);
view.showError("爬取失败: " + e.getMessage());
} finally {
isRunning.set(false);
}
}
public void crawlDoubanMovies() {
if (isRunning.get()) {
view.showError("爬虫已经在运行中");
return;
}
isRunning.set(true);
view.showInfo("=== 开始爬取豆瓣电影 Top250 ===");
try {
movies = doubanMovieSpider.crawlTop250();
view.showMovies(movies);
storageService.saveMovies(movies);
logger.info("电影数据已自动保存");
} catch (NetworkException e) {
logger.error("网络异常 - {}", e.getMessage());
view.showError("网络错误: " + e.getMessage());
} catch (ParseException e) {
logger.error("解析异常 - Source: {}, Element: {}, Message: {}",
e.getSource(), e.getElement(), e.getMessage());
view.showError("解析错误: " + e.getMessage());
} catch (Exception e) {
logger.error("爬取豆瓣电影失败", e);
view.showError("爬取失败: " + e.getMessage());
} finally {
isRunning.set(false);
}
}
public void crawlBaiduHotSearch(int limit) {
if (isRunning.get()) {
view.showError("爬虫已经在运行中");
return;
}
isRunning.set(true);
view.showInfo("=== 开始爬取百度热搜 Top" + limit + " ===");
try {
hotSearches = baiduHotSearchSpider.crawlHotSearch(limit);
view.showHotSearch(hotSearches);
storageService.saveHotSearch(hotSearches);
logger.info("热搜数据已自动保存");
} catch (NetworkException e) {
logger.error("网络异常 - {}", e.getMessage());
view.showError("网络错误: " + e.getMessage());
} catch (ParseException e) {
logger.error("解析异常 - Source: {}, Element: {}, Message: {}",
e.getSource(), e.getElement(), e.getMessage());
view.showError("解析错误: " + e.getMessage());
} catch (Exception e) {
logger.error("爬取百度热搜失败", e);
view.showError("爬取失败: " + e.getMessage());
} finally {
isRunning.set(false);
}
}
public void saveAllData() {
StringBuilder result = new StringBuilder();
String booksPath = storageService.saveBooks(books);
String moviesPath = storageService.saveMovies(movies);
String hotSearchPath = storageService.saveHotSearch(hotSearches);
if (booksPath != null) result.append("书籍: ").append(booksPath).append("\n");
if (moviesPath != null) result.append("电影: ").append(moviesPath).append("\n");
if (hotSearchPath != null) result.append("热搜: ").append(hotSearchPath);
view.showInfo(result.toString());
}
public void saveBooks(String filename) {
String path = storageService.saveBooks(books);
if (path != null) {
view.showInfo("书籍数据已保存到: " + path);
}
}
public void saveMovies(String filename) {
String path = storageService.saveMovies(movies);
if (path != null) {
view.showInfo("电影数据已保存到: " + path);
}
}
public void saveHotSearch(String filename) {
String path = storageService.saveHotSearch(hotSearches);
if (path != null) {
view.showInfo("热搜数据已保存到: " + path);
}
}
public void listSavedFiles() {
File dataDir = new File("data");
if (!dataDir.exists()) {
view.showInfo("数据目录不存在");
return;
}
File[] files = dataDir.listFiles((dir, name) -> name.endsWith(".csv"));
if (files == null || files.length == 0) {
view.showInfo("没有已保存的数据文件");
return;
}
StringBuilder sb = new StringBuilder("\n已保存的数据文件:\n");
for (File file : files) {
long size = file.length();
String sizeStr = size < 1024 ? size + " B" :
size < 1024 * 1024 ? (size / 1024) + " KB" :
(size / 1024 / 1024) + " MB";
sb.append(" ").append(file.getName()).append(" (").append(sizeStr).append(")\n");
}
view.showInfo(sb.toString());
}
public void startSpider(String url) {
if (isRunning.get()) {
view.showError("爬虫已经在运行中");
return;
}
isRunning.set(true);
pagesCrawled.set(0);
totalRequests.set(0);
failedRequests.set(0);
view.showInfo("=== 爬虫启动 ===");
logger.info("目标URL: {}", url);
logger.info("线程数: {}", config.getThreadCount());
logger.info("超时时间: {}ms", config.getTimeout());
logger.info("开始抓取...");
performCrawling(url);
view.showInfo("=== 爬虫任务完成 ===");
}
private void performCrawling(String url) {
logger.info("连接服务器: {}", url);
logger.info("发送HTTP请求...");
totalRequests.incrementAndGet();
pagesCrawled.incrementAndGet();
logger.info("接收响应: 200 OK");
logger.info("解析HTML内容...");
logger.info("提取数据...");
logger.info("保存到数据库...");
logger.info("页面已处理: {}", pagesCrawled.get());
view.showCrawlingResult(url, 10);
}
public void stopSpider() {
if (!isRunning.get()) {
view.showError("爬虫未在运行");
return;
}
isRunning.set(false);
view.showInfo("正在停止爬虫...");
logger.info("保存当前进度...");
logger.info("关闭网络连接...");
logger.info("释放资源...");
view.showInfo("=== 爬虫已停止 ===");
}
public void showStatus() {
double successRate = 0.0;
if (totalRequests.get() > 0) {
successRate = (double) (totalRequests.get() - failedRequests.get()) / totalRequests.get() * 100;
}
int memoryUsage = (int) (Runtime.getRuntime().totalMemory() / 1024 / 1024);
view.showStatus(pagesCrawled.get(), isRunning.get(), successRate, memoryUsage);
}
public void updateConfig(String key, String value) {
try {
switch (key) {
case "thread.count":
config.setThreadCount(Integer.parseInt(value));
break;
case "timeout":
config.setTimeout(Integer.parseInt(value));
break;
case "retry.count":
config.setRetryCount(Integer.parseInt(value));
break;
case "user.agent":
config.setUserAgent(value);
break;
default:
view.showError("未知配置项: " + key);
return;
}
view.showConfig(key, value);
} catch (NumberFormatException e) {
view.showError("无效的数值: " + value);
}
}
public void showConfig() {
view.showInfo("=== 当前配置 ===");
logger.info("线程数: {}", config.getThreadCount());
logger.info("超时时间: {}ms", config.getTimeout());
logger.info("重试次数: {}", config.getRetryCount());
logger.info("User-Agent: {}", config.getUserAgent());
logger.info("起始URL: {}", config.getStartUrl());
view.showInfo("================");
}
public boolean isRunning() {
return isRunning.get();
}
public List<Book> getBooks() {
return books;
}
public List<Movie> getMovies() {
return movies;
}
public List<HotSearch> getHotSearches() {
return hotSearches;
}
public void setBooks(List<Book> books) {
this.books = books;
}
public void setMovies(List<Movie> movies) {
this.movies = movies;
}
public void setHotSearches(List<HotSearch> hotSearches) {
this.hotSearches = hotSearches;
}
}

115
project/spider/src/main/java/com/spider/core/CommandExecutor.java

@ -0,0 +1,115 @@
package com.spider.core;
import java.util.HashMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.command.Command;
import com.spider.command.ConfigCommand;
import com.spider.command.CrawlCommand;
import com.spider.command.HelpCommand;
import com.spider.command.ListCommand;
import com.spider.command.LoadCommand;
import com.spider.command.SaveCommand;
import com.spider.command.StartCommand;
import com.spider.command.StatusCommand;
import com.spider.command.StopCommand;
public class CommandExecutor {
private static final Logger logger = LoggerFactory.getLogger(CommandExecutor.class);
private final Map<String, Command> commands;
public CommandExecutor() {
commands = new HashMap<>();
registerCommands();
}
private void registerCommands() {
commands.put("help", new HelpCommand());
commands.put("list", new ListCommand());
commands.put("crawl", new CrawlCommand());
commands.put("save", new SaveCommand());
commands.put("load", new LoadCommand());
commands.put("start", new StartCommand());
commands.put("stop", new StopCommand());
commands.put("status", new StatusCommand());
commands.put("config", new ConfigCommand());
logger.info("已注册 {} 个命令", commands.size());
}
public void execute(String input) {
if (input == null || input.trim().isEmpty()) {
return;
}
String[] parts = input.trim().split("\\s+");
String commandName = parts[0].toLowerCase();
if (commandName.equals("exit") || commandName.equals("quit")) {
logger.info("感谢使用,再见!");
System.exit(0);
}
Command command = commands.get(commandName);
if (command != null) {
try {
command.execute(parts);
} catch (Exception e) {
logger.error("执行命令时出错: {}", commandName, e);
}
} else {
logger.error("未知命令: {}", commandName);
showHelp();
}
}
public void showHelp() {
logger.info("\n┌─ 可用命令 ───────────────────────────────────────────┐");
logger.info("│ │");
logger.info("│ 【数据爬取】 │");
logger.info("│ crawl books [N] 爬取豆瓣读书 Top N (默认50) │");
logger.info("│ crawl movies 爬取豆瓣电影 Top250 │");
logger.info("│ crawl hotsearch [N] 爬取百度热搜 Top N (默认50) │");
logger.info("│ crawl all 爬取所有数据源 │");
logger.info("│ │");
logger.info("│ 【数据保存】 │");
logger.info("│ save books [fname] 保存书籍数据到文件 │");
logger.info("│ save movies [fname] 保存电影数据到文件 │");
logger.info("│ save hotsearch [fn] 保存热搜数据到文件 │");
logger.info("│ save all 保存所有已爬取的数据 │");
logger.info("│ save files 列出已保存的文件 │");
logger.info("│ │");
logger.info("│ 【数据加载】 │");
logger.info("│ load books <fname> 从文件加载书籍数据 │");
logger.info("│ load movies <fname> 从文件加载电影数据 │");
logger.info("│ load hotsearch <fn> 从文件加载热搜数据 │");
logger.info("│ │");
logger.info("│ 【数据列表】 │");
logger.info("│ list books 显示已爬取的书籍 │");
logger.info("│ list movies 显示已爬取的电影 │");
logger.info("│ list hotsearch 显示已爬取的热搜 │");
logger.info("│ list all 显示所有已爬取的数据 │");
logger.info("│ │");
logger.info("│ 【控制命令】 │");
logger.info("│ start [url] 启动爬虫任务 │");
logger.info("│ stop 停止爬虫任务 │");
logger.info("│ status 查看爬虫运行状态 │");
logger.info("│ │");
logger.info("│ 【配置命令】 │");
logger.info("│ config show 显示当前配置 │");
logger.info("│ config set <k> <v> 设置配置项 │");
logger.info("│ │");
logger.info("│ 【其他】 │");
logger.info("│ help 显示帮助信息 │");
logger.info("│ exit 退出程序 │");
logger.info("│ │");
logger.info("└──────────────────────────────────────────────────────────┘");
}
public Map<String, Command> getCommands() {
return commands;
}
}

86
project/spider/src/main/java/com/spider/core/SpiderRunner.java

@ -0,0 +1,86 @@
package com.spider.core;
import java.util.Scanner;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.controller.ControllerFactory;
import com.spider.view.ConsoleView;
import com.spider.view.ViewFactory;
public class SpiderRunner {
private static final Logger logger = LoggerFactory.getLogger(SpiderRunner.class);
private static CommandExecutor executor;
public static void main(String[] args) {
ConsoleView view = ViewFactory.createConsoleView();
if (args.length > 0 && "oneclick".equalsIgnoreCase(args[0])) {
runOneClickMode(view);
} else {
runInteractiveMode(view);
}
}
private static void runOneClickMode(ConsoleView view) {
logger.info("╔══════════════════════════════════════════╗");
logger.info("║ Spider 多功能爬虫框架 v2.0 ║");
logger.info("║ 一键爬取模式 ║");
logger.info("╚══════════════════════════════════════════╝");
ControllerFactory.initController();
try {
view.showInfo("=== 开始一键爬取所有数据 ===\n");
view.showInfo(">>> 第1步:爬取豆瓣读书 Top50...");
ControllerFactory.getSpiderController().crawlDoubanBooks(50);
view.showInfo("\n>>> 第2步:爬取豆瓣电影 Top250...");
ControllerFactory.getSpiderController().crawlDoubanMovies();
view.showInfo("\n>>> 第3步:爬取百度热搜 Top50...");
ControllerFactory.getSpiderController().crawlBaiduHotSearch(50);
view.showInfo("\n=== 数据爬取完成,开始保存... ===");
ControllerFactory.getSpiderController().saveAllData();
view.showInfo("\n=== 数据保存完成 ===");
view.showInfo("=== 一键爬取执行完毕! ===\n");
} catch (Exception e) {
logger.error("一键爬取出错: ", e);
view.showError("执行失败: " + e.getMessage());
}
System.exit(0);
}
private static void runInteractiveMode(ConsoleView view) {
view.showWelcome();
ControllerFactory.initController();
executor = new CommandExecutor();
executor.showHelp();
Scanner scanner = new Scanner(System.in);
while (true) {
try {
System.out.print("\nspider> ");
String input = scanner.nextLine();
if (input == null || input.trim().isEmpty()) {
continue;
}
executor.execute(input);
} catch (Exception e) {
logger.error("发生错误: ", e);
}
}
}
}

41
project/spider/src/main/java/com/spider/exception/DataException.java

@ -0,0 +1,41 @@
package com.spider.exception;
public class DataException extends SpiderException {
private final String dataType;
public DataException(String message) {
super("DATA_ERROR", message);
this.dataType = null;
}
public DataException(String message, Throwable cause) {
super("DATA_ERROR", message, cause);
this.dataType = null;
}
public DataException(String dataType, String message) {
super("DATA_ERROR", message);
this.dataType = dataType;
}
public DataException(String dataType, String message, Throwable cause) {
super("DATA_ERROR", message, cause);
this.dataType = dataType;
}
public String getDataType() {
return dataType;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("DataException{");
sb.append("errorCode='").append(getErrorCode()).append('\'');
if (dataType != null) {
sb.append(", dataType='").append(dataType).append('\'');
}
sb.append(", message='").append(getMessage()).append('\'');
sb.append('}');
return sb.toString();
}
}

23
project/spider/src/main/java/com/spider/exception/NetworkException.java

@ -0,0 +1,23 @@
package com.spider.exception;
public class NetworkException extends SpiderException {
public NetworkException(String message) {
super("NETWORK_ERROR", message);
}
public NetworkException(String message, Throwable cause) {
super("NETWORK_ERROR", message, cause);
}
public NetworkException(String url, String message, Throwable cause) {
super("NETWORK_ERROR", "URL: " + url + ", " + message, cause);
}
public NetworkException(String url, int statusCode) {
super("NETWORK_ERROR", "URL: " + url + ", HTTP状态码: " + statusCode);
}
public NetworkException(String url, String message) {
super("NETWORK_ERROR", "URL: " + url + ", " + message);
}
}

53
project/spider/src/main/java/com/spider/exception/ParseException.java

@ -0,0 +1,53 @@
package com.spider.exception;
public class ParseException extends SpiderException {
private final String source;
private final String element;
public ParseException(String message) {
super("PARSE_ERROR", message);
this.source = null;
this.element = null;
}
public ParseException(String message, Throwable cause) {
super("PARSE_ERROR", message, cause);
this.source = null;
this.element = null;
}
public ParseException(String source, String element, String message) {
super("PARSE_ERROR", message);
this.source = source;
this.element = element;
}
public ParseException(String source, String element, String message, Throwable cause) {
super("PARSE_ERROR", message, cause);
this.source = source;
this.element = element;
}
public String getSource() {
return source;
}
public String getElement() {
return element;
}
@Override
public String toString() {
StringBuilder sb = new StringBuilder("ParseException{");
sb.append("errorCode='").append(getErrorCode()).append('\'');
if (source != null) {
sb.append(", source='").append(source).append('\'');
}
if (element != null) {
sb.append(", element='").append(element).append('\'');
}
sb.append(", message='").append(getMessage()).append('\'');
sb.append('}');
return sb.toString();
}
}

37
project/spider/src/main/java/com/spider/exception/SpiderException.java

@ -0,0 +1,37 @@
package com.spider.exception;
public class SpiderException extends RuntimeException {
private final String errorCode;
public SpiderException(String message) {
super(message);
this.errorCode = "SPIDER_ERROR";
}
public SpiderException(String message, Throwable cause) {
super(message, cause);
this.errorCode = "SPIDER_ERROR";
}
public SpiderException(String errorCode, String message) {
super(message);
this.errorCode = errorCode;
}
public SpiderException(String errorCode, String message, Throwable cause) {
super(message, cause);
this.errorCode = errorCode;
}
public String getErrorCode() {
return errorCode;
}
@Override
public String toString() {
return "SpiderException{" +
"errorCode='" + errorCode + '\'' +
", message='" + getMessage() + '\'' +
'}';
}
}

89
project/spider/src/main/java/com/spider/model/Book.java

@ -0,0 +1,89 @@
package com.spider.model;
import java.util.Objects;
public class Book implements DataItem {
private String title;
private double rating;
private String author;
private String summary;
private int commentCount;
public Book() {
}
public Book(String title, double rating, String author, String summary, int commentCount) {
this.title = title;
this.rating = rating;
this.author = author;
this.summary = summary;
this.commentCount = commentCount;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public double getRating() {
return rating;
}
public void setRating(double rating) {
this.rating = rating;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getSummary() {
return summary;
}
public void setSummary(String summary) {
this.summary = summary;
}
public int getCommentCount() {
return commentCount;
}
public void setCommentCount(int commentCount) {
this.commentCount = commentCount;
}
@Override
public String toString() {
return "《" + title + "》" +
"\n 评分: " + rating +
"\n 作者: " + author +
"\n 评价数: " + commentCount +
"\n 简介: " + summary;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Book book = (Book) o;
return Objects.equals(title, book.title);
}
@Override
public int hashCode() {
return Objects.hash(title);
}
@Override
public String getSource() {
return "豆瓣读书";
}
}

13
project/spider/src/main/java/com/spider/model/DataItem.java

@ -0,0 +1,13 @@
package com.spider.model;
public interface DataItem {
String getTitle();
double getRating();
String getSummary();
default String getDisplayString() {
return String.format("[%s] %s (评分: %.1f)", getSource(), getTitle(), getRating());
}
String getSource();
}

70
project/spider/src/main/java/com/spider/model/HotSearch.java

@ -0,0 +1,70 @@
package com.spider.model;
import java.util.Objects;
public class HotSearch implements DataItem {
private int rank;
private String keyword;
public HotSearch() {
}
public HotSearch(int rank, String keyword) {
this.rank = rank;
this.keyword = keyword;
}
public int getRank() {
return rank;
}
public void setRank(int rank) {
this.rank = rank;
}
public String getKeyword() {
return keyword;
}
public void setKeyword(String keyword) {
this.keyword = keyword;
}
@Override
public String toString() {
return rank + ". " + keyword;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
HotSearch that = (HotSearch) o;
return rank == that.rank && Objects.equals(keyword, that.keyword);
}
@Override
public int hashCode() {
return Objects.hash(rank, keyword);
}
@Override
public String getTitle() {
return keyword;
}
@Override
public double getRating() {
return 0.0;
}
@Override
public String getSource() {
return "百度热搜";
}
@Override
public String getSummary() {
return "";
}
}

82
project/spider/src/main/java/com/spider/model/Movie.java

@ -0,0 +1,82 @@
package com.spider.model;
import java.util.Objects;
public class Movie implements DataItem {
private String title;
private double rating;
private String director;
private int rank;
public Movie() {
}
public Movie(String title, double rating, String director, int rank) {
this.title = title;
this.rating = rating;
this.director = director;
this.rank = rank;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public double getRating() {
return rating;
}
public void setRating(double rating) {
this.rating = rating;
}
public String getDirector() {
return director;
}
public void setDirector(String director) {
this.director = director;
}
public int getRank() {
return rank;
}
public void setRank(int rank) {
this.rank = rank;
}
@Override
public String getSummary() {
return "";
}
@Override
public String toString() {
return "Top" + rank + " 《" + title + "》" +
"\n 评分: " + rating +
"\n 导演: " + director;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
Movie movie = (Movie) o;
return Objects.equals(title, movie.title);
}
@Override
public int hashCode() {
return Objects.hash(title);
}
@Override
public String getSource() {
return "豆瓣电影";
}
}

60
project/spider/src/main/java/com/spider/model/SpiderConfig.java

@ -0,0 +1,60 @@
package com.spider.model;
public class SpiderConfig {
private int threadCount = 5;
private int timeout = 30000;
private int retryCount = 3;
private String userAgent = "Mozilla/5.0";
private String startUrl;
public int getThreadCount() {
return threadCount;
}
public void setThreadCount(int threadCount) {
this.threadCount = threadCount;
}
public int getTimeout() {
return timeout;
}
public void setTimeout(int timeout) {
this.timeout = timeout;
}
public int getRetryCount() {
return retryCount;
}
public void setRetryCount(int retryCount) {
this.retryCount = retryCount;
}
public String getUserAgent() {
return userAgent;
}
public void setUserAgent(String userAgent) {
this.userAgent = userAgent;
}
public String getStartUrl() {
return startUrl;
}
public void setStartUrl(String startUrl) {
this.startUrl = startUrl;
}
@Override
public String toString() {
return "SpiderConfig{" +
"threadCount=" + threadCount +
", timeout=" + timeout +
", retryCount=" + retryCount +
", userAgent='" + userAgent + '\'' +
", startUrl='" + startUrl + '\'' +
'}';
}
}

170
project/spider/src/main/java/com/spider/repository/ArticleRepository.java

@ -0,0 +1,170 @@
package com.spider.repository;
import java.util.ArrayList;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.model.Book;
import com.spider.model.HotSearch;
import com.spider.model.Movie;
public class ArticleRepository {
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
private final List<Book> books;
private final List<Movie> movies;
private final List<HotSearch> hotSearches;
public ArticleRepository() {
this.books = new ArrayList<>();
this.movies = new ArrayList<>();
this.hotSearches = new ArrayList<>();
}
public void addBook(Book book) {
if (book == null) {
logger.warn("添加书籍失败:书籍对象为空");
throw new IllegalArgumentException("书籍对象不能为空");
}
if (book.getTitle() == null || book.getTitle().trim().isEmpty()) {
logger.warn("添加书籍失败:书名不能为空");
throw new IllegalArgumentException("书名不能为空");
}
if (books.contains(book)) {
logger.debug("书籍已存在,跳过添加: {}", book.getTitle());
return;
}
books.add(book);
logger.debug("成功添加书籍: {}", book.getTitle());
}
public void addBooks(List<Book> bookList) {
if (bookList == null || bookList.isEmpty()) {
logger.warn("添加书籍列表失败:列表为空");
throw new IllegalArgumentException("书籍列表不能为空");
}
int added = 0;
for (Book book : bookList) {
try {
addBook(book);
added++;
} catch (Exception e) {
logger.warn("添加书籍失败: {}", e.getMessage());
}
}
logger.info("批量添加书籍完成,成功添加 {} 本", added);
}
public void addMovie(Movie movie) {
if (movie == null) {
logger.warn("添加电影失败:电影对象为空");
throw new IllegalArgumentException("电影对象不能为空");
}
if (movie.getTitle() == null || movie.getTitle().trim().isEmpty()) {
logger.warn("添加电影失败:电影名不能为空");
throw new IllegalArgumentException("电影名不能为空");
}
if (movies.contains(movie)) {
logger.debug("电影已存在,跳过添加: {}", movie.getTitle());
return;
}
movies.add(movie);
logger.debug("成功添加电影: {}", movie.getTitle());
}
public void addMovies(List<Movie> movieList) {
if (movieList == null || movieList.isEmpty()) {
logger.warn("添加电影列表失败:列表为空");
throw new IllegalArgumentException("电影列表不能为空");
}
int added = 0;
for (Movie movie : movieList) {
try {
addMovie(movie);
added++;
} catch (Exception e) {
logger.warn("添加电影失败: {}", e.getMessage());
}
}
logger.info("批量添加电影完成,成功添加 {} 部", added);
}
public void addHotSearch(HotSearch hotSearch) {
if (hotSearch == null) {
logger.warn("添加热搜失败:热搜对象为空");
throw new IllegalArgumentException("热搜对象不能为空");
}
if (hotSearch.getKeyword() == null || hotSearch.getKeyword().trim().isEmpty()) {
logger.warn("添加热搜失败:关键词不能为空");
throw new IllegalArgumentException("关键词不能为空");
}
if (hotSearches.contains(hotSearch)) {
logger.debug("热搜已存在,跳过添加: {}", hotSearch.getKeyword());
return;
}
hotSearches.add(hotSearch);
logger.debug("成功添加热搜: {}", hotSearch.getKeyword());
}
public void addHotSearches(List<HotSearch> hotSearchList) {
if (hotSearchList == null || hotSearchList.isEmpty()) {
logger.warn("添加热搜列表失败:列表为空");
throw new IllegalArgumentException("热搜列表不能为空");
}
int added = 0;
for (HotSearch hotSearch : hotSearchList) {
try {
addHotSearch(hotSearch);
added++;
} catch (Exception e) {
logger.warn("添加热搜失败: {}", e.getMessage());
}
}
logger.info("批量添加热搜完成,成功添加 {} 条", added);
}
public List<Book> getBooks() {
return new ArrayList<>(books);
}
public List<Movie> getMovies() {
return new ArrayList<>(movies);
}
public List<HotSearch> getHotSearches() {
return new ArrayList<>(hotSearches);
}
public int getBookCount() {
return books.size();
}
public int getMovieCount() {
return movies.size();
}
public int getHotSearchCount() {
return hotSearches.size();
}
public void clearAll() {
books.clear();
movies.clear();
hotSearches.clear();
logger.info("仓库已清空");
}
}

49
project/spider/src/main/java/com/spider/service/AbstractSpider.java

@ -0,0 +1,49 @@
package com.spider.service;
import java.util.function.Supplier;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.exception.NetworkException;
import com.spider.exception.ParseException;
import com.spider.utils.HttpClientUtil;
import com.spider.utils.RetryUtils;
public abstract class AbstractSpider<T> implements Spider<T> {
protected final Logger logger = LoggerFactory.getLogger(getClass());
protected Document fetchDocument(String url) {
return RetryUtils.executeWithRetry(() -> {
try {
String html = HttpClientUtil.fetchHtml(url);
if (html == null || html.isEmpty()) {
throw new NetworkException("NETWORK_ERROR", "获取页面内容为空: " + url);
}
return Jsoup.parse(html);
} catch (NetworkException e) {
throw e;
} catch (Exception e) {
throw new NetworkException("NETWORK_ERROR", "网络请求失败: " + e.getMessage(), e);
}
}, 3);
}
protected <R> R executeWithRetry(Supplier<R> operation, int maxRetries) {
return RetryUtils.executeWithRetry(operation, maxRetries);
}
protected void validateData(T data) throws ParseException {
if (data == null) {
throw new ParseException(getSourceName(), "validateData", "爬取数据为空");
}
}
@Override
public abstract String getSourceName();
@Override
public abstract int getDefaultLimit();
}

144
project/spider/src/main/java/com/spider/service/BaiduHotSearchSpider.java

@ -0,0 +1,144 @@
package com.spider.service;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.exception.ParseException;
import com.spider.model.HotSearch;
import com.spider.utils.HttpClientUtil;
public class BaiduHotSearchSpider extends AbstractSpider<HotSearch> implements Spider<HotSearch> {
private static final Logger logger = LoggerFactory.getLogger(BaiduHotSearchSpider.class);
private static final String HOT_SEARCH_URL = "https://top.baidu.com/board?tab=realtime";
private static final int DEFAULT_LIMIT = 50;
private List<HotSearch> hotSearches;
public BaiduHotSearchSpider() {
super();
this.hotSearches = new ArrayList<>();
}
@Override
public String getSourceName() {
return "百度热搜";
}
@Override
public int getDefaultLimit() {
return DEFAULT_LIMIT;
}
public List<HotSearch> crawlHotSearch(int limit) {
hotSearches.clear();
logger.info("开始爬取百度实时热搜榜前 {} 条...", limit);
try {
logger.info("正在抓取: {}", HOT_SEARCH_URL);
String html = HttpClientUtil.fetchHtml(HOT_SEARCH_URL);
if (html == null || html.isEmpty()) {
logger.error("获取热搜页面失败");
return hotSearches;
}
logger.info("获取到HTML长度: {} 字节", html.length());
logger.info("HTML内容前2000字符: {}", html.substring(0, Math.min(2000, html.length())));
Document doc = Jsoup.parse(html);
Elements hotItems = doc.select("div.c-single-text-ellipsis");
if (hotItems.isEmpty()) {
logger.warn("未能解析到热搜数据,尝试备用选择器...");
hotItems = doc.select(".list-item .topic-title");
}
if (hotItems.isEmpty()) {
hotItems = doc.select("div.hot-list div.item-title");
}
if (hotItems.isEmpty()) {
hotItems = doc.select(".topic-item .text");
}
if (hotItems.isEmpty()) {
hotItems = doc.select("[class*=title]");
}
if (hotItems.isEmpty()) {
hotItems = doc.select("div[class*=item] a");
}
if (hotItems.isEmpty()) {
throw new ParseException("百度热搜", "hotItems", "无法找到热搜数据元素");
}
int count = 0;
for (int i = 0; i < Math.min(hotItems.size(), limit); i++) {
HotSearch hotSearch = new HotSearch();
hotSearch.setRank(i + 1);
Element titleElement = hotItems.get(i);
String keyword = titleElement.text().trim();
if (keyword.isEmpty()) {
continue;
}
hotSearch.setKeyword(keyword);
hotSearches.add(hotSearch);
count++;
if (count >= limit) break;
logger.debug("已抓取热搜 #{}: {}", hotSearch.getRank(), hotSearch.getKeyword());
}
logger.info("百度热搜爬取完成,共获取 {} 条热搜", hotSearches.size());
} catch (Exception e) {
logger.error("爬取百度热搜时出错", e);
throw e;
}
return new ArrayList<>(hotSearches);
}
public List<HotSearch> getHotSearches() {
return hotSearches;
}
public String exportToJson() {
StringBuilder json = new StringBuilder();
json.append("[\n");
for (HotSearch hotSearch : hotSearches) {
json.append(" {\n");
json.append(" \"rank\": ").append(hotSearch.getRank()).append(",\n");
json.append(" \"keyword\": \"").append(escapeJson(hotSearch.getKeyword())).append("\"\n");
json.append(" },\n");
}
if (!hotSearches.isEmpty()) {
json.setLength(json.length() - 2);
}
json.append("\n]");
return json.toString();
}
private String escapeJson(String str) {
if (str == null) return "";
return str.replace("\\", "\\\\")
.replace("\"", "\\\"")
.replace("\n", "\\n")
.replace("\r", "\\r");
}
}

155
project/spider/src/main/java/com/spider/service/DataStorageService.java

@ -0,0 +1,155 @@
package com.spider.service;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.model.Book;
import com.spider.model.HotSearch;
import com.spider.model.Movie;
public class DataStorageService {
private static final Logger logger = LoggerFactory.getLogger(DataStorageService.class);
private static final String DATA_DIR = "data";
private static final String BOOKS_FILE = "books.csv";
private static final String MOVIES_FILE = "movies.csv";
private static final String HOTSEARCH_FILE = "hotsearch.csv";
public DataStorageService() {
createDataDirectory();
}
private void createDataDirectory() {
Path path = Paths.get(DATA_DIR);
if (!Files.exists(path)) {
try {
Files.createDirectories(path);
logger.info("创建数据目录: {}", DATA_DIR);
} catch (IOException e) {
logger.error("创建数据目录失败", e);
}
}
}
public String saveBooks(List<Book> books) {
if (books == null || books.isEmpty()) {
logger.warn("没有书籍数据可保存");
return null;
}
String fullPath = getFilePath(BOOKS_FILE);
try {
saveBooksToCsv(books, fullPath);
logger.info("书籍数据已保存到: {}", fullPath);
return fullPath;
} catch (IOException e) {
logger.error("保存书籍数据失败", e);
return null;
}
}
public String saveMovies(List<Movie> movies) {
if (movies == null || movies.isEmpty()) {
logger.warn("没有电影数据可保存");
return null;
}
String fullPath = getFilePath(MOVIES_FILE);
try {
saveMoviesToCsv(movies, fullPath);
logger.info("电影数据已保存到: {}", fullPath);
return fullPath;
} catch (IOException e) {
logger.error("保存电影数据失败", e);
return null;
}
}
public String saveHotSearch(List<HotSearch> hotSearches) {
if (hotSearches == null || hotSearches.isEmpty()) {
logger.warn("没有热搜数据可保存");
return null;
}
String fullPath = getFilePath(HOTSEARCH_FILE);
try {
saveHotSearchToCsv(hotSearches, fullPath);
logger.info("热搜数据已保存到: {}", fullPath);
return fullPath;
} catch (IOException e) {
logger.error("保存热搜数据失败", e);
return null;
}
}
private void saveBooksToCsv(List<Book> books, String fullPath) throws IOException {
try (OutputStreamWriter writer = new OutputStreamWriter(
new FileOutputStream(fullPath), "UTF-8")) {
writer.write("\uFEFF");
writer.write("书名,评分,作者,简介,评价数\n");
for (Book book : books) {
writer.write(escapeCsv(book.getTitle()));
writer.write(",");
writer.write(String.valueOf(book.getRating()));
writer.write(",");
writer.write(escapeCsv(book.getAuthor()));
writer.write(",");
writer.write(escapeCsv(book.getSummary()));
writer.write(",");
writer.write(String.valueOf(book.getCommentCount()));
writer.write("\n");
}
}
}
private void saveMoviesToCsv(List<Movie> movies, String fullPath) throws IOException {
try (OutputStreamWriter writer = new OutputStreamWriter(
new FileOutputStream(fullPath), "UTF-8")) {
writer.write("\uFEFF");
writer.write("电影名,评分,导演\n");
for (Movie movie : movies) {
writer.write(escapeCsv(movie.getTitle()));
writer.write(",");
writer.write(String.valueOf(movie.getRating()));
writer.write(",");
writer.write(escapeCsv(movie.getDirector()));
writer.write("\n");
}
}
}
private void saveHotSearchToCsv(List<HotSearch> hotSearches, String fullPath) throws IOException {
try (OutputStreamWriter writer = new OutputStreamWriter(
new FileOutputStream(fullPath), "UTF-8")) {
writer.write("\uFEFF");
writer.write("排名,关键词\n");
for (HotSearch hs : hotSearches) {
writer.write(String.valueOf(hs.getRank()));
writer.write(",");
writer.write(escapeCsv(hs.getKeyword()));
writer.write("\n");
}
}
}
private String escapeCsv(String value) {
if (value == null) return "";
if (value.contains(",") || value.contains("\"") || value.contains("\n") || value.contains("\r")) {
return "\"" + value.replace("\"", "\"\"") + "\"";
}
return value;
}
private String getFilePath(String filename) {
return DATA_DIR + File.separator + filename;
}
}

312
project/spider/src/main/java/com/spider/service/DoubanBookSpider.java

@ -0,0 +1,312 @@
package com.spider.service;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.model.Book;
import com.spider.utils.HttpClientUtil;
public class DoubanBookSpider extends AbstractSpider<Book> implements Spider<Book> {
private static final Logger logger = LoggerFactory.getLogger(DoubanBookSpider.class);
private static final String HOT_BOOKS_URL = "https://book.douban.com/chart?sub_type=1";
private static final int DEFAULT_LIMIT = 50;
private static final Pattern RATING_PATTERN = Pattern.compile("([\\d.]+)\\s*\\(\\s*([\\d.,万]+)\\s*人评价\\s*\\)");
private static final Pattern RATING_SIMPLE = Pattern.compile("(\\d+\\.\\d+)\\s*\\(\\s*\\d+\\s*人评价\\s*\\)");
private List<Book> books;
public DoubanBookSpider() {
super();
this.books = new ArrayList<>();
}
@Override
public String getSourceName() {
return "豆瓣读书";
}
@Override
public int getDefaultLimit() {
return DEFAULT_LIMIT;
}
public List<Book> crawlHotBooks(int limit) {
books.clear();
logger.info("开始爬取豆瓣读书热度最高的 {} 本书...", limit);
try {
int page = 1;
int fetched = 0;
while (fetched < limit) {
String url = page == 1 ? HOT_BOOKS_URL : HOT_BOOKS_URL + "&page=" + page;
logger.info("正在抓取第 {} 页: {}", page, url);
String html = HttpClientUtil.fetchHtml(url);
if (html == null || html.isEmpty()) {
logger.warn("第 {} 页获取为空", page);
break;
}
Document doc = Jsoup.parse(html);
Elements headings = doc.select("h2");
logger.info("找到 {} 个 h2 标签", headings.size());
for (Element h2 : headings) {
if (fetched >= limit) break;
Element link = h2.selectFirst("a[href*='/subject/']");
if (link == null) continue;
String title = link.text().trim();
if (title.isEmpty() || title.length() < 2 || title.contains("豆瓣") || title.contains("登录")) {
continue;
}
Book book = new Book();
book.setTitle(title);
Element parent = h2.parent();
if (parent != null) {
String parentText = parent.text();
String author = extractAuthor(parentText, title);
book.setAuthor(author);
double rating = extractRatingValue(parentText);
book.setRating(rating);
int commentCount = extractCommentCount(parentText);
book.setCommentCount(commentCount);
String summary = extractSummary(parentText);
book.setSummary(summary);
}
books.add(book);
fetched++;
logger.debug("已抓取 {}: {} | 作者: {} | 评分: {} | 评价数: {}",
fetched, title, book.getAuthor(), book.getRating(), book.getCommentCount());
}
if (fetched == 0) {
logger.warn("h2选择器没有找到书籍,尝试其他方法...");
Elements subjectItems = doc.select(".subject-item, .book-item, li, div[class*=item]");
logger.info("找到 {} 个可能的项目元素", subjectItems.size());
for (Element item : subjectItems) {
if (fetched >= limit) break;
Element link = item.selectFirst("a[href*='/subject/']");
if (link == null) continue;
String title = link.text().trim();
if (title.isEmpty() || title.length() < 2 || title.contains("豆瓣") || title.contains("登录")) {
continue;
}
Book book = new Book();
book.setTitle(title);
String text = item.text();
book.setAuthor(extractAuthor(text, title));
book.setRating(extractRatingValue(text));
book.setCommentCount(extractCommentCount(text));
book.setSummary(extractSummary(text));
books.add(book);
fetched++;
}
}
page++;
if (page > 5) break;
}
logger.info("豆瓣读书爬取完成,共获取 {} 本书", books.size());
} catch (Exception e) {
logger.error("爬取豆瓣读书时出错", e);
throw e;
}
return new ArrayList<>(books);
}
private String extractAuthor(String text, String title) {
if (text == null || text.isEmpty()) {
return "未知作者";
}
String[] parts = text.split("\n");
for (String part : parts) {
part = part.trim();
if (part.contains("/") && !part.contains("评价") && !part.contains("元/") && !part.matches("\\d.*")) {
String[] subParts = part.split("/");
if (subParts.length > 0) {
String author = subParts[0].trim()
.replace("著", "").replace("译", "").replace("选", "").replace("主编", "")
.replace(title, "").trim();
if (author.length() > 0 && author.length() < 40 && !author.matches("\\d.*")) {
return author;
}
}
}
}
Matcher matcher = Pattern.compile("(.*?)\\s*/\\s*\\d{4}").matcher(text);
if (matcher.find()) {
String author = matcher.group(1).trim();
author = author.replace("著", "").replace("译", "").replace("选", "").replace("主编", "")
.replace(title, "").trim();
if (author.length() < 50 && !author.matches("\\d.*")) {
return author;
}
}
return "未知作者";
}
private String extractRating(String text) {
if (text == null) return null;
Matcher matcher = RATING_PATTERN.matcher(text);
if (matcher.find()) {
return matcher.group(1);
}
Matcher simpleMatcher = RATING_SIMPLE.matcher(text);
if (simpleMatcher.find()) {
return simpleMatcher.group(1);
}
Matcher m2 = Pattern.compile("(\\d+\\.\\d+)\\(").matcher(text);
if (m2.find()) {
return m2.group(1);
}
return null;
}
private double extractRatingValue(String text) {
String rating = extractRating(text);
if (rating != null) {
try {
return Double.parseDouble(rating);
} catch (NumberFormatException e) {
return 0.0;
}
}
return 0.0;
}
private int extractCommentCount(String text) {
if (text == null) return 0;
Matcher matcher = RATING_PATTERN.matcher(text);
if (matcher.find()) {
String countStr = matcher.group(2);
return parseCount(countStr);
}
Matcher m2 = Pattern.compile("\\(\\s*([\\d.,万]+)\\s*人评价\\s*\\)").matcher(text);
if (m2.find()) {
return parseCount(m2.group(1));
}
Matcher m3 = Pattern.compile("\\((\\d+)\\s*人评价\\)").matcher(text);
if (m3.find()) {
return parseCount(m3.group(1));
}
return 0;
}
private int parseCount(String countStr) {
if (countStr == null || countStr.isEmpty()) return 0;
try {
countStr = countStr.replace(",", "").replace(" ", "");
if (countStr.contains("万")) {
return (int) (Double.parseDouble(countStr.replace("万", "")) * 10000);
}
return Integer.parseInt(countStr);
} catch (NumberFormatException e) {
return 0;
}
}
private String extractSummary(String text) {
if (text == null || text.isEmpty()) {
return "";
}
String[] parts = text.split("\\d\\.\\d");
if (parts.length > 1) {
String summary = parts[1].trim();
summary = summary.replaceAll("\\d+\\s*人评价.*", "")
.replaceAll("连续上榜[\\d个月]*", "")
.replaceAll("\\[.*?\\]", "")
.replaceAll("\\(.*?\\)", "")
.replaceAll("元.*?精装", "精装")
.replaceAll("元.*?平装", "平装")
.replaceAll("元", "")
.trim();
if (summary.length() > 100) {
summary = summary.substring(0, 100) + "...";
}
if (summary.length() > 5) {
return summary;
}
}
Pattern tagPattern = Pattern.compile("(社会纪实|小说|文学|历史|文化|科学|经管|绘本|漫画|科学新知|商业经管|绘本漫画|历史文化)");
Matcher matcher = tagPattern.matcher(text);
if (matcher.find()) {
return matcher.group(1);
}
return "";
}
public List<Book> getBooks() {
return books;
}
public String exportToJson() {
StringBuilder json = new StringBuilder();
json.append("[\n");
for (int i = 0; i < books.size(); i++) {
Book book = books.get(i);
json.append(" {\n");
json.append(" \"rank\": ").append(i + 1).append(",\n");
json.append(" \"title\": \"").append(escapeJson(book.getTitle())).append("\",\n");
json.append(" \"rating\": ").append(book.getRating()).append(",\n");
json.append(" \"author\": \"").append(escapeJson(book.getAuthor())).append("\",\n");
json.append(" \"commentCount\": ").append(book.getCommentCount()).append(",\n");
json.append(" \"summary\": \"").append(escapeJson(book.getSummary())).append("\"\n");
json.append(" }");
if (i < books.size() - 1) json.append(",");
json.append("\n");
}
json.append("]");
return json.toString();
}
private String escapeJson(String str) {
if (str == null) return "";
return str.replace("\\", "\\\\")
.replace("\"", "\\\"")
.replace("\n", "\\n")
.replace("\r", "\\r");
}
}

158
project/spider/src/main/java/com/spider/service/DoubanMovieSpider.java

@ -0,0 +1,158 @@
package com.spider.service;
import java.util.ArrayList;
import java.util.List;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.exception.ParseException;
import com.spider.model.Movie;
import com.spider.utils.HttpClientUtil;
public class DoubanMovieSpider extends AbstractSpider<Movie> implements Spider<Movie> {
private static final Logger logger = LoggerFactory.getLogger(DoubanMovieSpider.class);
private static final String TOP250_URL = "https://movie.douban.com/top250";
private static final int DEFAULT_LIMIT = 250;
private List<Movie> movies;
public DoubanMovieSpider() {
super();
this.movies = new ArrayList<>();
}
@Override
public String getSourceName() {
return "豆瓣电影";
}
@Override
public int getDefaultLimit() {
return DEFAULT_LIMIT;
}
public List<Movie> crawlTop250() {
movies.clear();
logger.info("开始爬取豆瓣电影Top250...");
try {
int page = 0;
int rank = 1;
while (page < 10) {
String url = page == 0 ? TOP250_URL : TOP250_URL + "?start=" + (page * 25);
logger.info("正在抓取第 {} 页 ({}): {}", page + 1, rank, url);
String html = HttpClientUtil.fetchHtml(url);
if (html == null || html.isEmpty()) {
logger.warn("第 {} 页获取为空", page + 1);
break;
}
Document doc = Jsoup.parse(html);
Elements movieItems = doc.select("div.item");
if (movieItems.isEmpty()) {
logger.info("第 {} 页没有更多电影", page + 1);
break;
}
for (Element item : movieItems) {
Movie movie = parseMovieItem(item, rank);
if (movie != null && movie.getTitle() != null && !movie.getTitle().isEmpty()) {
movies.add(movie);
rank++;
logger.debug("已抓取 Top{}: {}", rank - 1, movie.getTitle());
}
}
page++;
}
logger.info("豆瓣电影爬取完成,共获取 {} 部电影", movies.size());
} catch (Exception e) {
logger.error("爬取豆瓣电影时出错", e);
throw e;
}
return new ArrayList<>(movies);
}
private Movie parseMovieItem(Element item, int rank) {
try {
Movie movie = new Movie();
movie.setRank(rank);
Element titleElement = item.selectFirst("div.hd a span:nth-child(1)");
if (titleElement != null) {
movie.setTitle(titleElement.text().trim());
} else {
throw new ParseException("豆瓣电影", "title", "无法解析电影标题");
}
Element ratingElement = item.selectFirst("span.rating_num");
if (ratingElement != null) {
try {
String ratingStr = ratingElement.text().trim();
movie.setRating(Double.parseDouble(ratingStr));
} catch (NumberFormatException e) {
throw new ParseException("豆瓣电影", "rating", "评分格式错误: " + ratingElement.text(), e);
}
}
Element directorElement = item.selectFirst("div.bd p:nth-child(1)");
if (directorElement != null) {
String info = directorElement.text().trim();
if (info.contains("导演:")) {
int directorStart = info.indexOf("导演:") + 3;
int directorEnd = info.indexOf("主");
if (directorEnd > directorStart) {
String director = info.substring(directorStart, directorEnd).trim();
director = director.split("/")[0].trim();
movie.setDirector(director);
}
}
}
return movie;
} catch (ParseException e) {
throw e;
} catch (Exception e) {
throw new ParseException("豆瓣电影", "movieItem", "解析电影信息时出错", e);
}
}
public List<Movie> getMovies() {
return movies;
}
public String exportToJson() {
StringBuilder json = new StringBuilder();
json.append("[\n");
for (Movie movie : movies) {
json.append(" {\n");
json.append(" \"rank\": ").append(movie.getRank()).append(",\n");
json.append(" \"title\": \"").append(escapeJson(movie.getTitle())).append("\",\n");
json.append(" \"rating\": ").append(movie.getRating()).append(",\n");
json.append(" \"director\": \"").append(escapeJson(movie.getDirector())).append("\"\n");
json.append(" },\n");
}
if (!movies.isEmpty()) {
json.setLength(json.length() - 2);
}
json.append("\n]");
return json.toString();
}
private String escapeJson(String str) {
if (str == null) return "";
return str.replace("\"", "\\\"").replace("\\", "\\\\");
}
}

7
project/spider/src/main/java/com/spider/service/Spider.java

@ -0,0 +1,7 @@
package com.spider.service;
public interface Spider<T> {
String getSourceName();
int getDefaultLimit();
}

55
project/spider/src/main/java/com/spider/test/DebugDoubanBooks.java

@ -0,0 +1,55 @@
package com.spider.test;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import com.spider.utils.HttpClientUtil;
public class DebugDoubanBooks {
public static void main(String[] args) {
String url = "https://book.douban.com/chart?sub_type=1";
System.out.println("抓取URL: " + url);
String html = HttpClientUtil.fetchHtml(url);
if (html == null || html.isEmpty()) {
System.out.println("HTML为空!");
return;
}
System.out.println("HTML长度: " + html.length());
System.out.println("\n=== HTML前2000字符 ===");
System.out.println(html.substring(0, Math.min(2000, html.length())));
Document doc = Jsoup.parse(html);
System.out.println("\n\n=== 尝试各种选择器 ===");
String[] selectors = {
"tr.item",
".chart-item",
".book-item",
"[class*=item]",
"ul.list-view li",
".subject-item",
".info",
".DoubanBook",
"li[class*=item]",
"div[class*=item]",
"table tr"
};
for (String sel : selectors) {
Elements els = doc.select(sel);
System.out.println(sel + " -> " + els.size() + " 个元素");
}
System.out.println("\n=== 查找包含特定文本的元素 ===");
Elements links = doc.select("a[href*='/subject/']");
System.out.println("找到 " + links.size() + " 个 subject 链接");
for (int i = 0; i < Math.min(5, links.size()); i++) {
Element link = links.get(i);
System.out.println(" " + link.text().substring(0, Math.min(50, link.text().length())));
}
}
}

118
project/spider/src/main/java/com/spider/utils/HttpClientUtil.java

@ -0,0 +1,118 @@
package com.spider.utils;
import org.apache.hc.client5.http.classic.methods.HttpGet;
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient;
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse;
import org.apache.hc.client5.http.impl.classic.HttpClients;
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager;
import org.apache.hc.core5.http.HttpStatus;
import org.apache.hc.core5.http.io.entity.EntityUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.exception.NetworkException;
public class HttpClientUtil {
private static final Logger logger = LoggerFactory.getLogger(HttpClientUtil.class);
private static final CloseableHttpClient httpClient;
private static final long MIN_REQUEST_INTERVAL = 2000;
static {
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager();
cm.setMaxTotal(50);
cm.setDefaultMaxPerRoute(20);
httpClient = HttpClients.custom()
.setConnectionManager(cm)
.build();
}
public static String fetchHtml(String url) {
try {
Thread.sleep(MIN_REQUEST_INTERVAL);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
try {
return fetchHtmlWithHttpClient(url);
} catch (Exception e) {
logger.warn("HttpClient获取失败,尝试使用Jsoup: {}", e.getMessage());
try {
return fetchHtmlWithJsoup(url);
} catch (Exception ex) {
throw new NetworkException(url, "所有获取方式都失败: " + ex.getMessage(), ex);
}
}
}
private static String fetchHtmlWithHttpClient(String url) {
try {
HttpGet request = new HttpGet(url);
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36");
request.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8");
request.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8");
request.setHeader("Connection", "keep-alive");
request.setHeader("Cache-Control", "max-age=0");
request.setHeader("Upgrade-Insecure-Requests", "1");
try (CloseableHttpResponse response = httpClient.execute(request)) {
int statusCode = response.getCode();
if (statusCode == HttpStatus.SC_OK) {
String html = EntityUtils.toString(response.getEntity(), "UTF-8");
logger.debug("成功获取页面: {}", url);
return html;
} else if (statusCode == HttpStatus.SC_NOT_FOUND) {
throw new NetworkException(url, statusCode);
} else if (statusCode == HttpStatus.SC_FORBIDDEN || statusCode == 418) {
throw new NetworkException(url, "访问被拒绝,可能需要等待一段时间后再试");
} else {
throw new NetworkException(url, "HTTP请求失败,状态码: " + statusCode);
}
}
} catch (NetworkException e) {
throw e;
} catch (Exception e) {
throw new NetworkException(url, "网络请求失败: " + e.getMessage(), e);
}
}
private static String fetchHtmlWithJsoup(String url) throws Exception {
String host = new java.net.URL(url).getHost();
String referer = "https://" + host + "/";
Document doc = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8")
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
.header("Connection", "keep-alive")
.header("Cache-Control", "max-age=0")
.header("Upgrade-Insecure-Requests", "1")
.header("Host", host)
.header("Referer", referer)
.timeout(20000)
.followRedirects(true)
.get();
logger.debug("Jsoup成功获取页面: {}", url);
return doc.html();
}
public static Document parseHtml(String html) {
if (html == null || html.isEmpty()) {
throw new IllegalArgumentException("HTML内容为空");
}
return Jsoup.parse(html);
}
public static void close() {
try {
httpClient.close();
} catch (Exception e) {
logger.error("关闭HTTP客户端失败", e);
}
}
}

84
project/spider/src/main/java/com/spider/utils/RetryUtils.java

@ -0,0 +1,84 @@
package com.spider.utils;
import java.util.function.Supplier;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.exception.NetworkException;
public class RetryUtils {
private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class);
private static final int DEFAULT_MAX_RETRIES = 3;
private static final long DEFAULT_DELAY_MS = 1000;
private static final long DEFAULT_MAX_DELAY_MS = 5000;
public static <T> T executeWithRetry(Supplier<T> task) {
return executeWithRetry(task, DEFAULT_MAX_RETRIES, DEFAULT_DELAY_MS);
}
public static <T> T executeWithRetry(Supplier<T> task, int maxRetries) {
return executeWithRetry(task, maxRetries, DEFAULT_DELAY_MS);
}
public static <T> T executeWithRetry(Supplier<T> task, int maxRetries, long initialDelayMs) {
int attempts = 0;
long delayMs = initialDelayMs;
Exception lastException = null;
while (attempts < maxRetries) {
attempts++;
try {
logger.debug("执行任务,第 {}/{} 次尝试", attempts, maxRetries);
return task.get();
} catch (NetworkException e) {
lastException = e;
logger.warn("网络异常 (第{}次尝试): {}", attempts, e.getMessage());
if (attempts < maxRetries) {
try {
logger.info("等待 {}ms 后重试...", delayMs);
Thread.sleep(delayMs);
delayMs = Math.min(delayMs * 2, DEFAULT_MAX_DELAY_MS);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new RuntimeException("重试被中断", ie);
}
}
} catch (Exception e) {
lastException = e;
logger.warn("执行异常 (第{}次尝试): {}", attempts, e.getMessage());
if (attempts < maxRetries) {
try {
logger.info("等待 {}ms 后重试...", delayMs);
Thread.sleep(delayMs);
delayMs = Math.min(delayMs * 2, DEFAULT_MAX_DELAY_MS);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new RuntimeException("重试被中断", ie);
}
}
}
}
logger.error("任务在 {} 次尝试后失败", maxRetries);
throw new RuntimeException("任务执行失败,已重试 " + maxRetries + " 次", lastException);
}
public static void executeWithRetry(Runnable task) {
executeWithRetry(task, DEFAULT_MAX_RETRIES, DEFAULT_DELAY_MS);
}
public static void executeWithRetry(Runnable task, int maxRetries) {
executeWithRetry(task, maxRetries, DEFAULT_DELAY_MS);
}
public static void executeWithRetry(Runnable task, int maxRetries, long initialDelayMs) {
executeWithRetry(() -> {
task.run();
return null;
}, maxRetries, initialDelayMs);
}
}

133
project/spider/src/main/java/com/spider/view/ConsoleView.java

@ -0,0 +1,133 @@
package com.spider.view;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.spider.model.Book;
import com.spider.model.HotSearch;
import com.spider.model.Movie;
public class ConsoleView {
private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class);
public void showWelcome() {
logger.info("╔══════════════════════════════════════════╗");
logger.info("║ Spider 多功能爬虫框架 v2.0 ║");
logger.info("╚══════════════════════════════════════════╝");
}
public void showHelp() {
logger.info("\n┌─ 可用命令 ─────────────────────────────────┐");
logger.info("│ │");
logger.info("│ 爬虫命令: │");
logger.info("│ crawl books [数量] - 爬取豆瓣读书Top │");
logger.info("│ crawl movies - 爬取豆瓣电影Top250 │");
logger.info("│ crawl hotsearch [数量]- 爬取百度热搜 │");
logger.info("│ │");
logger.info("│ 控制命令: │");
logger.info("│ start <url> - 启动爬虫(可指定URL)│");
logger.info("│ stop - 停止爬虫 │");
logger.info("│ status - 查看运行状态 │");
logger.info("│ │");
logger.info("│ 配置命令: │");
logger.info("│ config show - 显示配置 │");
logger.info("│ config set <k> <v> - 设置配置 │");
logger.info("│ │");
logger.info("│ help - 显示帮助 │");
logger.info("│ exit - 退出程序 │");
logger.info("│ │");
logger.info("└──────────────────────────────────────────────┘");
}
public void showBooks(List<Book> books) {
if (books == null || books.isEmpty()) {
logger.info("没有书籍数据");
return;
}
logger.info("\n┌─ 豆瓣读书 Top{} ────────────────────────────┐", books.size());
logger.info("│ │");
for (int i = 0; i < books.size(); i++) {
Book book = books.get(i);
logger.info("│ [{}] 《{}》", String.format("%2d", i + 1), book.getTitle());
logger.info("│ 评分: {} | 作者: {}", book.getRating(), truncate(book.getAuthor(), 15));
if (book.getCommentCount() > 0) {
logger.info("│ 评价数: {}", book.getCommentCount());
}
if (book.getSummary() != null && !book.getSummary().isEmpty()) {
logger.info("│ 简介: {}", truncate(book.getSummary(), 40));
}
if (i < books.size() - 1) {
logger.info("│ │");
}
}
logger.info("│ │");
logger.info("└──────────────────────────────────────────────┘");
}
public void showMovies(List<Movie> movies) {
if (movies == null || movies.isEmpty()) {
logger.info("没有电影数据");
return;
}
logger.info("\n┌─ 豆瓣电影 Top250 ──────────────────────────┐");
logger.info("│ │");
for (Movie movie : movies) {
logger.info("│ Top{} 《{}》", String.format("%3d", movie.getRank()), movie.getTitle());
logger.info("│ 评分: {} | 导演: {}", movie.getRating(), truncate(movie.getDirector(), 15));
logger.info("│ │");
}
logger.info("└──────────────────────────────────────────────┘");
}
public void showHotSearch(List<HotSearch> hotSearches) {
if (hotSearches == null || hotSearches.isEmpty()) {
logger.info("没有热搜数据");
return;
}
logger.info("\n┌─ 百度实时热搜榜 Top{} ──────────────────────┐", hotSearches.size());
logger.info("│ │");
for (HotSearch hotSearch : hotSearches) {
logger.info("│ {} {}", String.format("%2d.", hotSearch.getRank()),
truncate(hotSearch.getKeyword(), 30));
}
logger.info("│ │");
logger.info("└──────────────────────────────────────────────┘");
}
public void showCrawlingResult(String url, int count) {
logger.info("=== 已完成爬取: {} ===", url);
logger.info("共爬取 {} 条数据", count);
}
public void showStatus(int pagesCrawled, boolean isRunning, double successRate, int memoryUsage) {
logger.info("┌─ 爬虫状态 ─────────────────────────────┐");
logger.info("│ 运行状态: {}", isRunning ? "运行中 ✓" : "已停止 ✗");
logger.info("│ 已抓取页面: {} 页", pagesCrawled);
logger.info("│ 成功率: {}%", String.format("%.2f", successRate));
logger.info("│ 内存使用: {} MB", memoryUsage);
logger.info("└─────────────────────────────────────────┘");
}
public void showConfig(String key, String value) {
logger.info("配置已更新: {} = {}", key, value);
}
public void showError(String message) {
logger.error("错误: {}", message);
}
public void showInfo(String message) {
logger.info(message);
}
private String truncate(String str, int maxLen) {
if (str == null) return "";
if (str.length() <= maxLen) return str;
return str.substring(0, maxLen - 3) + "...";
}
}

7
project/spider/src/main/java/com/spider/view/ViewFactory.java

@ -0,0 +1,7 @@
package com.spider.view;
public class ViewFactory {
public static ConsoleView createConsoleView() {
return new ConsoleView();
}
}

89
project/spider/src/main/resources/logback.xml

@ -0,0 +1,89 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property name="LOG_HOME" value="logs"/>
<property name="APP_NAME" value="spider"/>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_HOME}/${APP_NAME}.log</file>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_HOME}/${APP_NAME}-%d{yyyy-MM-dd}.%i.log</fileNamePattern>
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
<maxFileSize>10MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<maxHistory>30</maxHistory>
</rollingPolicy>
</appender>
<appender name="ERROR_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_HOME}/${APP_NAME}-error.log</file>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<level>ERROR</level>
<onMatch>ACCEPT</onMatch>
<onMismatch>DENY</onMismatch>
</filter>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_HOME}/${APP_NAME}-error-%d{yyyy-MM-dd}.%i.log</fileNamePattern>
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
<maxFileSize>10MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<maxHistory>30</maxHistory>
</rollingPolicy>
</appender>
<appender name="CRAWL_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_HOME}/${APP_NAME}-crawl.log</file>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_HOME}/${APP_NAME}-crawl-%d{yyyy-MM-dd}.%i.log</fileNamePattern>
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
<maxFileSize>50MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<maxHistory>7</maxHistory>
</rollingPolicy>
</appender>
<logger name="com.spider" level="INFO" additivity="false">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
<appender-ref ref="ERROR_FILE"/>
</logger>
<logger name="com.spider.service" level="INFO" additivity="false">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
<appender-ref ref="ERROR_FILE"/>
<appender-ref ref="CRAWL_FILE"/>
</logger>
<logger name="com.spider.controller" level="INFO" additivity="false">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
<appender-ref ref="ERROR_FILE"/>
</logger>
<root level="INFO">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
<appender-ref ref="ERROR_FILE"/>
</root>
</configuration>

BIN
project/spider/target/classes/com/spider/command/Command.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/command/ConfigCommand.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/command/CrawlCommand.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/command/HelpCommand.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/command/ListCommand.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/command/LoadCommand.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/command/SaveCommand.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/command/StartCommand.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/command/StatusCommand.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/command/StopCommand.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/controller/ControllerFactory.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/controller/ControllerInitializer.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/controller/SpiderController.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/core/CommandExecutor.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/core/SpiderRunner.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/exception/DataException.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/exception/NetworkException.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/exception/ParseException.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/exception/SpiderException.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/model/Book.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/model/DataItem.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/model/HotSearch.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/model/Movie.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/model/SpiderConfig.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/repository/ArticleRepository.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/service/AbstractSpider.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/service/BaiduHotSearchSpider.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/service/DataStorageService.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/service/DoubanBookSpider.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/service/DoubanMovieSpider.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/service/Spider.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/test/DebugDoubanBooks.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/utils/HttpClientUtil.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/utils/RetryUtils.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/view/ConsoleView.class

Binary file not shown.

BIN
project/spider/target/classes/com/spider/view/ViewFactory.class

Binary file not shown.

89
project/spider/target/classes/logback.xml

@ -0,0 +1,89 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property name="LOG_HOME" value="logs"/>
<property name="APP_NAME" value="spider"/>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_HOME}/${APP_NAME}.log</file>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_HOME}/${APP_NAME}-%d{yyyy-MM-dd}.%i.log</fileNamePattern>
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
<maxFileSize>10MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<maxHistory>30</maxHistory>
</rollingPolicy>
</appender>
<appender name="ERROR_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_HOME}/${APP_NAME}-error.log</file>
<filter class="ch.qos.logback.classic.filter.LevelFilter">
<level>ERROR</level>
<onMatch>ACCEPT</onMatch>
<onMismatch>DENY</onMismatch>
</filter>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_HOME}/${APP_NAME}-error-%d{yyyy-MM-dd}.%i.log</fileNamePattern>
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
<maxFileSize>10MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<maxHistory>30</maxHistory>
</rollingPolicy>
</appender>
<appender name="CRAWL_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_HOME}/${APP_NAME}-crawl.log</file>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_HOME}/${APP_NAME}-crawl-%d{yyyy-MM-dd}.%i.log</fileNamePattern>
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP">
<maxFileSize>50MB</maxFileSize>
</timeBasedFileNamingAndTriggeringPolicy>
<maxHistory>7</maxHistory>
</rollingPolicy>
</appender>
<logger name="com.spider" level="INFO" additivity="false">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
<appender-ref ref="ERROR_FILE"/>
</logger>
<logger name="com.spider.service" level="INFO" additivity="false">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
<appender-ref ref="ERROR_FILE"/>
<appender-ref ref="CRAWL_FILE"/>
</logger>
<logger name="com.spider.controller" level="INFO" additivity="false">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
<appender-ref ref="ERROR_FILE"/>
</logger>
<root level="INFO">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
<appender-ref ref="ERROR_FILE"/>
</root>
</configuration>

3
project/spider/target/maven-archiver/pom.properties

@ -0,0 +1,3 @@
artifactId=spider
groupId=com.spider
version=1.0.0

36
project/spider/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst

@ -0,0 +1,36 @@
com\spider\core\SpiderRunner.class
com\spider\exception\DataException.class
com\spider\model\HotSearch.class
com\spider\model\Movie.class
com\spider\service\DoubanMovieSpider.class
com\spider\command\ListCommand.class
com\spider\command\CrawlCommand.class
com\spider\test\DebugDoubanBooks.class
com\spider\utils\HttpClientUtil.class
com\spider\exception\NetworkException.class
com\spider\view\ConsoleView.class
com\spider\model\SpiderConfig.class
com\spider\service\DataStorageService.class
com\spider\exception\ParseException.class
com\spider\utils\RetryUtils.class
com\spider\view\ViewFactory.class
com\spider\command\HelpCommand.class
com\spider\command\ConfigCommand.class
com\spider\model\Book.class
com\spider\core\CommandExecutor.class
com\spider\command\StopCommand.class
com\spider\service\BaiduHotSearchSpider.class
com\spider\command\LoadCommand.class
com\spider\command\SaveCommand.class
com\spider\service\AbstractSpider.class
com\spider\command\StartCommand.class
com\spider\service\DoubanBookSpider.class
com\spider\controller\ControllerFactory.class
com\spider\exception\SpiderException.class
com\spider\service\Spider.class
com\spider\command\StatusCommand.class
com\spider\command\Command.class
com\spider\repository\ArticleRepository.class
com\spider\model\DataItem.class
com\spider\controller\SpiderController.class
com\spider\controller\ControllerInitializer.class

36
project/spider/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst

@ -0,0 +1,36 @@
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\CrawlCommand.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\core\CommandExecutor.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\Command.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\LoadCommand.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\exception\NetworkException.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\ListCommand.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\DoubanBookSpider.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\HotSearch.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\core\SpiderRunner.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\view\ViewFactory.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\SaveCommand.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\StartCommand.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\StatusCommand.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\ConfigCommand.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\exception\ParseException.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\DataStorageService.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\DoubanMovieSpider.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\repository\ArticleRepository.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\AbstractSpider.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\controller\ControllerInitializer.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\utils\RetryUtils.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\Movie.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\view\ConsoleView.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\exception\SpiderException.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\controller\SpiderController.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\test\DebugDoubanBooks.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\Book.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\SpiderConfig.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\Spider.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\controller\ControllerFactory.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\exception\DataException.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\BaiduHotSearchSpider.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\StopCommand.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\DataItem.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\HelpCommand.java
D:\java\job-pc\spider\spider\src\main\java\com\spider\utils\HttpClientUtil.java

0
project/spider/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst

0
project/spider/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst

BIN
project/spider/target/original-spider-1.0.0.jar

Binary file not shown.

BIN
project/spider/target/spider-1.0.0-shaded.jar

Binary file not shown.

BIN
project/spider/target/spider-1.0.0.jar

Binary file not shown.
Loading…
Cancel
Save