90 changed files with 0 additions and 17908 deletions
|
|
|
@ -1,56 +0,0 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
<groupId>com.spider</groupId> |
|||
<artifactId>spider</artifactId> |
|||
<name>Spider Project</name> |
|||
<version>1.0.0</version> |
|||
<description>A Java Web Spider Framework</description> |
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<version>3.11.0</version> |
|||
<configuration> |
|||
<source>11</source> |
|||
<target>11</target> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<artifactId>maven-jar-plugin</artifactId> |
|||
<version>3.3.0</version> |
|||
<configuration> |
|||
<archive> |
|||
<manifest> |
|||
<mainClass>com.spider.core.SpiderRunner</mainClass> |
|||
</manifest> |
|||
</archive> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<artifactId>maven-shade-plugin</artifactId> |
|||
<version>3.5.0</version> |
|||
<executions> |
|||
<execution> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>shade</goal> |
|||
</goals> |
|||
<configuration> |
|||
<transformers> |
|||
<transformer> |
|||
<mainClass>com.spider.core.SpiderRunner</mainClass> |
|||
</transformer> |
|||
</transformers> |
|||
</configuration> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
<properties> |
|||
<maven.compiler.target>11</maven.compiler.target> |
|||
<maven.compiler.source>11</maven.compiler.source> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
</properties> |
|||
</project> |
|||
@ -1,884 +0,0 @@ |
|||
2026-05-31 17:59:10.612 [main] INFO com.spider.service.DataStorageService - 创建数据目录: data |
|||
2026-05-31 17:59:10.613 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 17:59:10.615 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 17:59:11.694 [main] INFO com.spider.service.DoubanBookSpider - 第 1 页没有更多书籍 |
|||
2026-05-31 17:59:11.695 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 0 本书 |
|||
2026-05-31 17:59:11.695 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 17:59:11.697 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 17:59:11.699 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 17:59:12.173 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25 |
|||
2026-05-31 17:59:12.358 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50 |
|||
2026-05-31 17:59:12.474 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75 |
|||
2026-05-31 17:59:12.661 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100 |
|||
2026-05-31 17:59:12.769 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125 |
|||
2026-05-31 17:59:12.864 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150 |
|||
2026-05-31 17:59:12.973 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 8 页 (176): https://movie.douban.com/top250?start=175 |
|||
2026-05-31 17:59:13.089 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 9 页 (201): https://movie.douban.com/top250?start=200 |
|||
2026-05-31 17:59:13.188 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 10 页 (226): https://movie.douban.com/top250?start=225 |
|||
2026-05-31 17:59:13.283 [main] INFO com.spider.service.DoubanMovieSpider - 豆瓣电影爬取完成,共获取 250 部电影 |
|||
2026-05-31 17:59:13.433 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_175913.json |
|||
2026-05-31 17:59:13.433 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 17:59:13.433 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 17:59:13.911 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 17:59:13.916 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_175913.json |
|||
2026-05-31 17:59:13.916 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 17:59:13.918 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_175913.json |
|||
2026-05-31 17:59:13.919 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_175913.json |
|||
2026-05-31 18:06:37.844 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:06:37.844 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:06:39.098 [main] INFO com.spider.service.DoubanBookSpider - 第 1 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:39.106 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 2 页: https://book.douban.com/chart?sub_type=1&page=2 |
|||
2026-05-31 18:06:39.331 [main] INFO com.spider.service.DoubanBookSpider - 第 2 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:39.341 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 3 页: https://book.douban.com/chart?sub_type=1&page=3 |
|||
2026-05-31 18:06:39.536 [main] INFO com.spider.service.DoubanBookSpider - 第 3 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:39.541 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 4 页: https://book.douban.com/chart?sub_type=1&page=4 |
|||
2026-05-31 18:06:39.746 [main] INFO com.spider.service.DoubanBookSpider - 第 4 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:39.748 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 5 页: https://book.douban.com/chart?sub_type=1&page=5 |
|||
2026-05-31 18:06:39.945 [main] INFO com.spider.service.DoubanBookSpider - 第 5 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:39.947 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 6 页: https://book.douban.com/chart?sub_type=1&page=6 |
|||
2026-05-31 18:06:40.149 [main] INFO com.spider.service.DoubanBookSpider - 第 6 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:40.150 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 7 页: https://book.douban.com/chart?sub_type=1&page=7 |
|||
2026-05-31 18:06:40.351 [main] INFO com.spider.service.DoubanBookSpider - 第 7 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:40.352 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 8 页: https://book.douban.com/chart?sub_type=1&page=8 |
|||
2026-05-31 18:06:40.557 [main] INFO com.spider.service.DoubanBookSpider - 第 8 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:40.558 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 9 页: https://book.douban.com/chart?sub_type=1&page=9 |
|||
2026-05-31 18:06:40.762 [main] INFO com.spider.service.DoubanBookSpider - 第 9 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:40.763 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 10 页: https://book.douban.com/chart?sub_type=1&page=10 |
|||
2026-05-31 18:06:40.964 [main] INFO com.spider.service.DoubanBookSpider - 第 10 页找到 6 个书籍元素 |
|||
2026-05-31 18:06:40.965 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 40 本书 |
|||
2026-05-31 18:06:41.007 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_180640.json |
|||
2026-05-31 18:06:41.007 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:06:41.007 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:06:41.492 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25 |
|||
2026-05-31 18:06:41.596 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50 |
|||
2026-05-31 18:06:41.705 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75 |
|||
2026-05-31 18:06:41.889 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100 |
|||
2026-05-31 18:06:41.995 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125 |
|||
2026-05-31 18:06:42.093 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150 |
|||
2026-05-31 18:06:42.202 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 8 页 (176): https://movie.douban.com/top250?start=175 |
|||
2026-05-31 18:06:42.397 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 9 页 (201): https://movie.douban.com/top250?start=200 |
|||
2026-05-31 18:06:42.512 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 10 页 (226): https://movie.douban.com/top250?start=225 |
|||
2026-05-31 18:06:42.615 [main] INFO com.spider.service.DoubanMovieSpider - 豆瓣电影爬取完成,共获取 250 部电影 |
|||
2026-05-31 18:06:42.735 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_180642.json |
|||
2026-05-31 18:06:42.736 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:06:42.736 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:06:43.330 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:06:43.339 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_180643.json |
|||
2026-05-31 18:06:43.342 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_180643.json |
|||
2026-05-31 18:06:43.344 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_180643.json |
|||
2026-05-31 18:06:43.346 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_180643.json |
|||
2026-05-31 18:10:11.199 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:10:11.200 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:10:12.117 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 2 页: https://book.douban.com/chart?sub_type=1&page=2 |
|||
2026-05-31 18:10:12.338 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 50 本书 |
|||
2026-05-31 18:10:12.381 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_181012.json |
|||
2026-05-31 18:10:12.381 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:10:12.381 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:10:12.854 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25 |
|||
2026-05-31 18:10:13.043 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50 |
|||
2026-05-31 18:10:13.148 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75 |
|||
2026-05-31 18:10:13.258 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100 |
|||
2026-05-31 18:10:13.446 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125 |
|||
2026-05-31 18:10:13.549 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150 |
|||
2026-05-31 18:10:13.654 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 8 页 (176): https://movie.douban.com/top250?start=175 |
|||
2026-05-31 18:10:13.856 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 9 页 (201): https://movie.douban.com/top250?start=200 |
|||
2026-05-31 18:10:13.958 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 10 页 (226): https://movie.douban.com/top250?start=225 |
|||
2026-05-31 18:10:14.075 [main] INFO com.spider.service.DoubanMovieSpider - 豆瓣电影爬取完成,共获取 250 部电影 |
|||
2026-05-31 18:10:14.196 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_181014.json |
|||
2026-05-31 18:10:14.196 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:10:14.196 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:10:14.705 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:10:14.711 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_181014.json |
|||
2026-05-31 18:10:14.713 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_181014.json |
|||
2026-05-31 18:10:14.715 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_181014.json |
|||
2026-05-31 18:10:14.718 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_181014.json |
|||
2026-05-31 18:11:36.812 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:11:36.813 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:11:37.685 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:11:37.696 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 2 页: https://book.douban.com/chart?sub_type=1&page=2 |
|||
2026-05-31 18:11:37.958 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:11:37.959 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 50 本书 |
|||
2026-05-31 18:11:38.013 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_181137.json |
|||
2026-05-31 18:11:38.013 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:11:38.013 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:11:38.566 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25 |
|||
2026-05-31 18:11:38.673 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50 |
|||
2026-05-31 18:11:38.765 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75 |
|||
2026-05-31 18:11:38.954 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100 |
|||
2026-05-31 18:11:39.155 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125 |
|||
2026-05-31 18:11:39.260 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150 |
|||
2026-05-31 18:11:39.464 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 8 页 (176): https://movie.douban.com/top250?start=175 |
|||
2026-05-31 18:11:39.568 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 9 页 (201): https://movie.douban.com/top250?start=200 |
|||
2026-05-31 18:11:39.674 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 10 页 (226): https://movie.douban.com/top250?start=225 |
|||
2026-05-31 18:11:39.793 [main] INFO com.spider.service.DoubanMovieSpider - 豆瓣电影爬取完成,共获取 250 部电影 |
|||
2026-05-31 18:11:39.900 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_181139.json |
|||
2026-05-31 18:11:39.901 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:11:39.901 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:11:40.201 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:11:40.206 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_181140.json |
|||
2026-05-31 18:11:40.208 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_181140.json |
|||
2026-05-31 18:11:40.209 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_181140.json |
|||
2026-05-31 18:11:40.211 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_181140.json |
|||
2026-05-31 18:13:36.332 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:13:36.332 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:13:37.500 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:13:37.513 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 2 页: https://book.douban.com/chart?sub_type=1&page=2 |
|||
2026-05-31 18:13:37.739 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:13:37.740 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 50 本书 |
|||
2026-05-31 18:13:37.785 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_181337.json |
|||
2026-05-31 18:13:37.786 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:13:37.786 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:13:38.266 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25 |
|||
2026-05-31 18:13:38.482 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50 |
|||
2026-05-31 18:13:38.663 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75 |
|||
2026-05-31 18:13:38.779 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100 |
|||
2026-05-31 18:13:38.964 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125 |
|||
2026-05-31 18:13:39.070 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150 |
|||
2026-05-31 18:13:39.275 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 8 页 (176): https://movie.douban.com/top250?start=175 |
|||
2026-05-31 18:13:39.378 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 9 页 (201): https://movie.douban.com/top250?start=200 |
|||
2026-05-31 18:13:39.483 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 10 页 (226): https://movie.douban.com/top250?start=225 |
|||
2026-05-31 18:13:39.599 [main] INFO com.spider.service.DoubanMovieSpider - 豆瓣电影爬取完成,共获取 250 部电影 |
|||
2026-05-31 18:13:39.712 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_181339.json |
|||
2026-05-31 18:13:39.712 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:13:39.712 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:13:40.224 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:13:40.229 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_181340.json |
|||
2026-05-31 18:13:40.232 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_181340.json |
|||
2026-05-31 18:13:40.234 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_181340.json |
|||
2026-05-31 18:13:40.234 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_181340.json |
|||
2026-05-31 18:15:15.754 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:15:15.754 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:15:16.729 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:15:16.745 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 2 页: https://book.douban.com/chart?sub_type=1&page=2 |
|||
2026-05-31 18:15:16.970 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:15:16.974 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 50 本书 |
|||
2026-05-31 18:15:17.023 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_181516.json |
|||
2026-05-31 18:15:17.024 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:15:17.024 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:15:17.599 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25 |
|||
2026-05-31 18:15:17.803 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50 |
|||
2026-05-31 18:15:17.906 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75 |
|||
2026-05-31 18:15:18.008 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100 |
|||
2026-05-31 18:15:18.191 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125 |
|||
2026-05-31 18:15:18.306 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150 |
|||
2026-05-31 18:15:18.405 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 8 页 (176): https://movie.douban.com/top250?start=175 |
|||
2026-05-31 18:15:18.524 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 9 页 (201): https://movie.douban.com/top250?start=200 |
|||
2026-05-31 18:15:18.616 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 10 页 (226): https://movie.douban.com/top250?start=225 |
|||
2026-05-31 18:15:18.728 [main] INFO com.spider.service.DoubanMovieSpider - 豆瓣电影爬取完成,共获取 250 部电影 |
|||
2026-05-31 18:15:18.853 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_181518.json |
|||
2026-05-31 18:15:18.854 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:15:18.854 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:15:19.423 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:15:19.427 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_181519.json |
|||
2026-05-31 18:15:19.431 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books_20260531_181519.json |
|||
2026-05-31 18:15:19.432 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies_20260531_181519.json |
|||
2026-05-31 18:15:19.434 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_20260531_181519.json |
|||
2026-05-31 18:23:56.176 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:23:56.177 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:23:57.144 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:23:57.158 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 2 页: https://book.douban.com/chart?sub_type=1&page=2 |
|||
2026-05-31 18:23:57.364 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:23:57.367 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 50 本书 |
|||
2026-05-31 18:23:57.384 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books.csv |
|||
2026-05-31 18:23:57.385 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:23:57.385 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:23:57.886 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25 |
|||
2026-05-31 18:23:57.982 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50 |
|||
2026-05-31 18:23:58.087 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75 |
|||
2026-05-31 18:23:58.205 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100 |
|||
2026-05-31 18:23:58.387 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125 |
|||
2026-05-31 18:23:58.487 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150 |
|||
2026-05-31 18:23:58.599 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 8 页 (176): https://movie.douban.com/top250?start=175 |
|||
2026-05-31 18:23:58.711 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 9 页 (201): https://movie.douban.com/top250?start=200 |
|||
2026-05-31 18:23:58.823 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 10 页 (226): https://movie.douban.com/top250?start=225 |
|||
2026-05-31 18:23:59.309 [main] INFO com.spider.service.DoubanMovieSpider - 豆瓣电影爬取完成,共获取 250 部电影 |
|||
2026-05-31 18:23:59.410 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies.csv |
|||
2026-05-31 18:23:59.410 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:23:59.410 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:24:00.027 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:24:00.034 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch.csv |
|||
2026-05-31 18:24:00.035 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books.csv |
|||
2026-05-31 18:24:00.036 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies.csv |
|||
2026-05-31 18:24:00.037 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch.csv |
|||
2026-05-31 18:33:53.627 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:33:53.628 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:33:54.515 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:33:54.529 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 2 页: https://book.douban.com/chart?sub_type=1&page=2 |
|||
2026-05-31 18:33:54.772 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:33:54.776 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 50 本书 |
|||
2026-05-31 18:33:54.803 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books.csv |
|||
2026-05-31 18:33:54.803 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:33:54.803 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:33:55.308 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25 |
|||
2026-05-31 18:33:55.492 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50 |
|||
2026-05-31 18:33:55.587 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75 |
|||
2026-05-31 18:33:55.692 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100 |
|||
2026-05-31 18:33:55.800 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125 |
|||
2026-05-31 18:33:55.895 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150 |
|||
2026-05-31 18:33:55.996 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 8 页 (176): https://movie.douban.com/top250?start=175 |
|||
2026-05-31 18:33:56.201 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 9 页 (201): https://movie.douban.com/top250?start=200 |
|||
2026-05-31 18:33:56.315 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 10 页 (226): https://movie.douban.com/top250?start=225 |
|||
2026-05-31 18:33:56.414 [main] INFO com.spider.service.DoubanMovieSpider - 豆瓣电影爬取完成,共获取 250 部电影 |
|||
2026-05-31 18:33:56.524 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies.csv |
|||
2026-05-31 18:33:56.524 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:33:56.524 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:33:57.048 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:33:57.052 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch.csv |
|||
2026-05-31 18:33:57.054 [main] INFO com.spider.service.DataStorageService - 书籍数据已保存到: data\books.csv |
|||
2026-05-31 18:33:57.055 [main] INFO com.spider.service.DataStorageService - 电影数据已保存到: data\movies.csv |
|||
2026-05-31 18:33:57.056 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch.csv |
|||
2026-05-31 18:41:42.563 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:41:42.564 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:41:43.409 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:41:43.424 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 2 页: https://book.douban.com/chart?sub_type=1&page=2 |
|||
2026-05-31 18:41:43.661 [main] INFO com.spider.service.DoubanBookSpider - 找到 40 个 h2 标签 |
|||
2026-05-31 18:41:43.664 [main] INFO com.spider.service.DoubanBookSpider - 豆瓣读书爬取完成,共获取 50 本书 |
|||
2026-05-31 18:41:43.730 [main] ERROR com.spider.service.DataStorageService - 保存书籍数据失败 |
|||
java.io.FileNotFoundException: data\books.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveBooksToCsv(DataStorageService.java:94) |
|||
at com.spider.service.DataStorageService.saveBooks(DataStorageService.java:50) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:69) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:41:43.731 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:41:43.731 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:41:47.241 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1295644/ - URL: https://movie.douban.com/subject/1295644/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1295644%2F&_s=19d90a80e685c89f498db6303ac9746a0788c6f75e9dbf5242c3f8e79e183ba7&a=1' |
|||
2026-05-31 18:41:48.571 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3541415/ - URL: https://movie.douban.com/subject/3541415/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F3541415%2F&_s=b60726b8b232a58ecb4ab80744cfadd91f86df8d2daf6dfb69b1cc1b3c2ec607&a=1' |
|||
2026-05-31 18:41:49.902 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292064/ - URL: https://movie.douban.com/subject/1292064/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292064%2F&_s=582c0f41bcd8420b61faf56b4ca7b43d2522eff4b15c3cb6893ccb5131275d0b&a=1' |
|||
2026-05-31 18:41:51.234 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1295124/ - URL: https://movie.douban.com/subject/1295124/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1295124%2F&_s=8f76223bf35fa0e09a6dec62972e26694f129d02910b2dad1a5f6cfb4cfdd5b7&a=1' |
|||
2026-05-31 18:41:52.564 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3011091/ - URL: https://movie.douban.com/subject/3011091/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F3011091%2F&_s=a0c05d1c2ea0142b57c7ded279a9027e49e7ed0aaaca4fc88d182da496ca0c5e&a=1' |
|||
2026-05-31 18:41:53.896 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292001/ - URL: https://movie.douban.com/subject/1292001/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292001%2F&_s=42d9d2b1a4de3deedd90f3f7f2fce0668bdb697df72509ff681737a4175eae05&a=1' |
|||
2026-05-31 18:41:55.151 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/25662329/ - URL: https://movie.douban.com/subject/25662329/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F25662329%2F&_s=f9eeeab9834d75c0712e5820644f3596a69313df1dcd8b67f294b58e72f2db10&a=1' |
|||
2026-05-31 18:41:56.381 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3793023/ - URL: https://movie.douban.com/subject/3793023/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F3793023%2F&_s=f134521f56d8434140d01ce4a5055b4e8b2c3f87eae895fc975de06c55d15224&a=1' |
|||
2026-05-31 18:41:57.685 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/2131459/ - URL: https://movie.douban.com/subject/2131459/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F2131459%2F&_s=c1ea117e3b324d3dad6507c8f95ef5ecb8b7870c722771c1217048b1b3f77a64&a=1' |
|||
2026-05-31 18:41:59.222 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1307914/ - URL: https://movie.douban.com/subject/1307914/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1307914%2F&_s=49a74dcc367a70937524f8458be6713b314019bbcd1ab12e5e77a3b15f65d01f&a=1' |
|||
2026-05-31 18:42:00.478 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1296141/ - URL: https://movie.douban.com/subject/1296141/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1296141%2F&_s=620de006e8c9c894df5e0eed79e902eb614205b27ed75faa53bd6a2aa7e077f6&a=1' |
|||
2026-05-31 18:42:01.780 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/20495023/ - URL: https://movie.douban.com/subject/20495023/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F20495023%2F&_s=48ed1e86d7198b4ef9b234b1dc8921e98cd7a5fc716753ed8a98bbc34b577db1&a=1' |
|||
2026-05-31 18:42:03.036 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292213/ - URL: https://movie.douban.com/subject/1292213/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292213%2F&_s=309f9fb7afab29756c23f76bcaf21d84a3842138a1691bd7585bc17f7668f552&a=1' |
|||
2026-05-31 18:42:04.341 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/5912992/ - URL: https://movie.douban.com/subject/5912992/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F5912992%2F&_s=1dda0f90711eb2b6e159c8d74e29140d36bcc30690e97dc73a4dc743c881077b&a=1' |
|||
2026-05-31 18:42:05.674 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/6786002/ - URL: https://movie.douban.com/subject/6786002/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F6786002%2F&_s=370779d74c6d2938d6e31a66cbadf463181b8f58f6988c2407fe85b76e787946&a=1' |
|||
2026-05-31 18:42:06.927 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291841/ - URL: https://movie.douban.com/subject/1291841/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291841%2F&_s=09325b64707e27d1e6d971a62653ad6c4d495cf817c05ccea7290ffef1b26e23&a=1' |
|||
2026-05-31 18:42:08.233 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1293172/ - URL: https://movie.douban.com/subject/1293172/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1293172%2F&_s=2abf2b3aefdf70afd59ea35cb968f83c1365093a2d5a3b05676de17f8f16b779&a=1' |
|||
2026-05-31 18:42:08.234 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 2 页 (26): https://movie.douban.com/top250?start=25 |
|||
2026-05-31 18:42:09.591 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1295038/ - URL: https://movie.douban.com/subject/1295038/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1295038%2F&_s=5f1072d4328097584500f07e2db94049df527b3e438c5441b9ba2fc421252e55&a=1' |
|||
2026-05-31 18:42:11.024 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291560/ - URL: https://movie.douban.com/subject/1291560/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291560%2F&_s=4f101b51a741e0f51c27428b28c456b4ed35e25b2c733f1395ff2ad17f7aa95f&a=1' |
|||
2026-05-31 18:42:12.328 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292365/ - URL: https://movie.douban.com/subject/1292365/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292365%2F&_s=e3d8862f74c367362a30d267b1dc0698e215730e568c059a649c47fc8a783d5a&a=1' |
|||
2026-05-31 18:42:13.585 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3319755/ - URL: https://movie.douban.com/subject/3319755/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F3319755%2F&_s=dd2300fc85e7fdd0f3a16c82d0c61156d72db71f5891d81c56de6c3f704a001c&a=1' |
|||
2026-05-31 18:42:14.888 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1851857/ - URL: https://movie.douban.com/subject/1851857/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1851857%2F&_s=48db3f00dad8df4220e5737be12fd4ff7af622ef118811374b1a9a96419c2a68&a=1' |
|||
2026-05-31 18:42:16.221 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291552/ - URL: https://movie.douban.com/subject/1291552/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291552%2F&_s=2e26fbb4147403290cfbc8413dd30a586b710620b18d50e35303e18763552dbc&a=1' |
|||
2026-05-31 18:42:17.550 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/26752088/ - URL: https://movie.douban.com/subject/26752088/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F26752088%2F&_s=e79d28722ad099e437e7c9fd2707dec8dba2a94750c825086a8ccb4645d4d153&a=1' |
|||
2026-05-31 18:42:18.806 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1300267/ - URL: https://movie.douban.com/subject/1300267/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1300267%2F&_s=6a06f703d5f1f2e5470766436f3de62ad396e8c17d6c0093e647adf46628e793&a=1' |
|||
2026-05-31 18:42:20.036 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3742360/ - URL: https://movie.douban.com/subject/3742360/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F3742360%2F&_s=a1aca855ef0f0b50e5f7ef39bb3aa60651c6da0ae505e0991d61943a3e015083&a=1' |
|||
2026-05-31 18:42:21.262 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/2129039/ - URL: https://movie.douban.com/subject/2129039/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F2129039%2F&_s=37608c3df49335e9de26ff38c49fc54c24ef7153a460d209402419217dd22388&a=1' |
|||
2026-05-31 18:42:22.775 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1293182/ - URL: https://movie.douban.com/subject/1293182/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1293182%2F&_s=bbc648f97ee7686e12042eeb1480fe53d183e05c31c3bd2910282bff59afb159&a=1' |
|||
2026-05-31 18:42:24.028 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/25958717/ - URL: https://movie.douban.com/subject/25958717/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F25958717%2F&_s=0f874c6711daca7375031517db4e90280fb7793b1e603eebae0beb6dab0af469&a=1' |
|||
2026-05-31 18:42:25.335 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/21937452/ - URL: https://movie.douban.com/subject/21937452/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F21937452%2F&_s=03adedaa4e2ce28856d86d7ab01910d9abee80a9e20ffa6124cdd00d8ea8cd66&a=1' |
|||
2026-05-31 18:42:26.589 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1305487/ - URL: https://movie.douban.com/subject/1305487/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1305487%2F&_s=8c85831d86c594a5d5dcda790e1de8fc25c5c99b82dd724e2540ff8273c27fa8&a=1' |
|||
2026-05-31 18:42:27.818 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291583/ - URL: https://movie.douban.com/subject/1291583/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291583%2F&_s=929b6cf35c95c4a00d4ed53adde35d334dad4eea8d48a076ee13a37b16ef4cec&a=1' |
|||
2026-05-31 18:42:29.121 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291858/ - URL: https://movie.douban.com/subject/1291858/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291858%2F&_s=9631befd939ea3b6f38d8853c003204abc76bab59f0715ef1a2227e858d2a709&a=1' |
|||
2026-05-31 18:42:30.378 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/26387939/ - URL: https://movie.douban.com/subject/26387939/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F26387939%2F&_s=951471a00680b888ff23e0a1a20f23efd3e64ddfeb4bdcc03a2e30cc928fa92d&a=1' |
|||
2026-05-31 18:42:31.607 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1929463/ - URL: https://movie.douban.com/subject/1929463/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1929463%2F&_s=33ec4173a700ef035962093d3f6068b206cad375d656b8a6d11829b10e92d7e0&a=1' |
|||
2026-05-31 18:42:32.836 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1296736/ - URL: https://movie.douban.com/subject/1296736/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1296736%2F&_s=22aa247702038af9d62e00a1cc16d54eb8c1bcfab207098cda68bd272398b0bf&a=1' |
|||
2026-05-31 18:42:34.346 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291572/ - URL: https://movie.douban.com/subject/1291572/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291572%2F&_s=e5d657bffb35b705bc8ea1cb097172487f0722e3a321f58b65baa3c6f1803047&a=1' |
|||
2026-05-31 18:42:35.681 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1299398/ - URL: https://movie.douban.com/subject/1299398/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1299398%2F&_s=341d1551f69c2a91355dd4ca436d6a206a4abc8ab563273c43fdbd3d567b474a&a=1' |
|||
2026-05-31 18:42:36.931 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/27060077/ - URL: https://movie.douban.com/subject/27060077/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F27060077%2F&_s=875b99a9517af5581577b1e9ca266addfe329675e205b3ccae7a39be96c72169&a=1' |
|||
2026-05-31 18:42:36.931 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 3 页 (51): https://movie.douban.com/top250?start=50 |
|||
2026-05-31 18:42:38.365 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/30170448/ - URL: https://movie.douban.com/subject/30170448/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F30170448%2F&_s=2ded1155ce53575ca1db5b8ae403e3db15c92195f85a5d5ec71df6242d95ccaa&a=1' |
|||
2026-05-31 18:42:39.593 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1298624/ - URL: https://movie.douban.com/subject/1298624/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1298624%2F&_s=a7893cf9811df95b3e8070996a8d7c639cf7091a8df6aa8130bc5f79fd87f074&a=1' |
|||
2026-05-31 18:42:40.897 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1418019/ - URL: https://movie.douban.com/subject/1418019/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1418019%2F&_s=e5ad11fade978c85c167bc53bb60576e7e73b2b3d3c90c587a7e61af364ff575&a=1' |
|||
2026-05-31 18:42:42.230 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291843/ - URL: https://movie.douban.com/subject/1291843/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291843%2F&_s=9676b18e34acbe2202964a7a9e60e5742478be46ce3a0245c0d5a9c7827c8338&a=1' |
|||
2026-05-31 18:42:43.559 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291571/ - URL: https://movie.douban.com/subject/1291571/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291571%2F&_s=bbf09d101dfd40540908bf349674f37988406363af93fefd47cc75fcb0d612b4&a=1' |
|||
2026-05-31 18:42:44.891 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1293839/ - URL: https://movie.douban.com/subject/1293839/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1293839%2F&_s=7cd9a2d7a8eba30b56a3b4a5c317e5a3f9146d36f052424b8132c3226a25efa9&a=1' |
|||
2026-05-31 18:42:46.351 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1301753/ - URL: https://movie.douban.com/subject/1301753/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1301753%2F&_s=a2807203e045af8b93868b6d03bfea08f6625c437d460db0d6f385c9a3fa70a7&a=1' |
|||
2026-05-31 18:42:47.655 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291828/ - URL: https://movie.douban.com/subject/1291828/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291828%2F&_s=52a32e937c3bcb2ccd9090941cb52d4eda1ad226f24b748a514d6c52074610c8&a=1' |
|||
2026-05-31 18:42:48.988 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291818/ - URL: https://movie.douban.com/subject/1291818/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291818%2F&_s=ea8d527e04948300ed9bd9cc45dab7e6cc39a4c8b449cfae2c22bfc19f32bb92&a=1' |
|||
2026-05-31 18:42:50.319 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/21937445/ - URL: https://movie.douban.com/subject/21937445/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F21937445%2F&_s=50a356964b0c7cd6aeec629835872b1dbbee6f333da9d43f521d661d3cc6ee43&a=1' |
|||
2026-05-31 18:42:51.649 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1485260/ - URL: https://movie.douban.com/subject/1485260/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1485260%2F&_s=243083c234bf76945e6f0dd5fd74cd364787671f98d2d220fd16868ffb6eaa18&a=1' |
|||
2026-05-31 18:42:52.981 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292000/ - URL: https://movie.douban.com/subject/1292000/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292000%2F&_s=8394366bf777277b24c39838e5566a101080093996c689dabc77b13d41011dee&a=1' |
|||
2026-05-31 18:42:54.238 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1306029/ - URL: https://movie.douban.com/subject/1306029/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1306029%2F&_s=4dea05bf9cfa44f4866af1ba35cf7044a7a8903cef77f91f4e0b1e2451c441d3&a=1' |
|||
2026-05-31 18:42:55.540 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3008247/ - URL: https://movie.douban.com/subject/3008247/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F3008247%2F&_s=cd699284dac9d4dbe84e25545b350928290e835fc6ce2cee5f1851114d85c3bb&a=1' |
|||
2026-05-31 18:42:56.872 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3011235/ - URL: https://movie.douban.com/subject/3011235/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F3011235%2F&_s=621240a666ee2341b2c4906bf39ccba971809a5786f9252a20cdb3c26ac7d8ab&a=1' |
|||
2026-05-31 18:42:58.408 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1293350/ - URL: https://movie.douban.com/subject/1293350/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1293350%2F&_s=36f214e8b16e129ae9620e654ecd6d6470bb04b720325a9eb0ae30ba677bd89c&a=1' |
|||
2026-05-31 18:42:59.739 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1900841/ - URL: https://movie.douban.com/subject/1900841/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1900841%2F&_s=b70dffa2a1671d3938f1f827944fde1e209a99fa80769812cc0c16483c3c6644&a=1' |
|||
2026-05-31 18:43:01.072 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291543/ - URL: https://movie.douban.com/subject/1291543/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291543%2F&_s=a6ffcb7f3432741c0ab7b544ec757f58e6817ec41d5dd6b2490dc5c4af5bc266&a=1' |
|||
2026-05-31 18:43:02.327 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1294408/ - URL: https://movie.douban.com/subject/1294408/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1294408%2F&_s=557c8d1fb3030cdbca379210ca72aec0d6b06bd4030f8f3b1201b36404fe61f3&a=1' |
|||
2026-05-31 18:43:03.629 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291544/ - URL: https://movie.douban.com/subject/1291544/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291544%2F&_s=c8eed0481b6a5b595648b8f2ed8f6e2031f4f6bd94c9aa992e3975b4d6f59ef1&a=1' |
|||
2026-05-31 18:43:04.888 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1652587/ - URL: https://movie.douban.com/subject/1652587/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1652587%2F&_s=b21b94666e7fb556b6fb74f6273c0e47372117057009915a682e4fb33f0642c9&a=1' |
|||
2026-05-31 18:43:06.115 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292402/ - URL: https://movie.douban.com/subject/1292402/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292402%2F&_s=c825f34d21ab0f6caf93a1f9672e8965306f330d62f86592b735ae1da91fea0e&a=1' |
|||
2026-05-31 18:43:07.422 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/26580232/ - URL: https://movie.douban.com/subject/26580232/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F26580232%2F&_s=37bf7923323b85ff666a7b7db1a85208205073722a5bbc386aa74dacb6c59b20&a=1' |
|||
2026-05-31 18:43:07.422 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 4 页 (76): https://movie.douban.com/top250?start=75 |
|||
2026-05-31 18:43:08.778 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292849/ - URL: https://movie.douban.com/subject/1292849/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292849%2F&_s=32cbb0f3e6b579c6e03f743a2a000377430bc004825b0beb4908403045e87844&a=1' |
|||
2026-05-31 18:43:10.082 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1293544/ - URL: https://movie.douban.com/subject/1293544/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1293544%2F&_s=7b5cb6fd77701542835eb326270461c0ff7cfe215eda510286f1a02289714673&a=1' |
|||
2026-05-31 18:43:11.543 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292343/ - URL: https://movie.douban.com/subject/1292343/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292343%2F&_s=7d1d2a2de64316458c2471866ddf6ba2e6dfb7764a7a85702a4ce2d508833804&a=1' |
|||
2026-05-31 18:43:12.772 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/11525673/ - URL: https://movie.douban.com/subject/11525673/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F11525673%2F&_s=f5165f2567ab0cfdabfd49f8672bbce17250f2663e994bf2bfbd36ec40010626&a=1' |
|||
2026-05-31 18:43:14.001 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/36445098/ - URL: https://movie.douban.com/subject/36445098/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F36445098%2F&_s=6b35e3c49fc63ccfd4ecbd67afbe826a9618b5b0c40e885bf90727aceaf9c706&a=1' |
|||
2026-05-31 18:43:15.304 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292224/ - URL: https://movie.douban.com/subject/1292224/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292224%2F&_s=859cd9539bab0126ea599aa6e485f194c572953e5c254111e1a3e0605a1af20c&a=1' |
|||
2026-05-31 18:43:16.560 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/2334904/ - URL: https://movie.douban.com/subject/2334904/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F2334904%2F&_s=dbede70f0b96caf79fa5cebca4985027d7d3df755bd3b94f6068dbcb0decbc18&a=1' |
|||
2026-05-31 18:43:17.862 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292656/ - URL: https://movie.douban.com/subject/1292656/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292656%2F&_s=c46bdf1ae4923596b71132cfaf91ecd83172510c8afb1974ff567d1aa9c8cafa&a=1' |
|||
2026-05-31 18:43:19.120 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1780330/ - URL: https://movie.douban.com/subject/1780330/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1780330%2F&_s=9f49ae23f0fa49d2481224f2392edfa85e532f9dda2dda403f8d73587a155328&a=1' |
|||
2026-05-31 18:43:20.351 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291832/ - URL: https://movie.douban.com/subject/1291832/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291832%2F&_s=29471ba3628bc2e4b2d3dc3b4badc80866e6aa1bcea81961848d38b4b632e0da&a=1' |
|||
2026-05-31 18:43:21.652 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1296996/ - URL: https://movie.douban.com/subject/1296996/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1296996%2F&_s=6f2316a056f1ac5a15d446e67cd10dfe87733f288f3fde5ddd4b474423294dce&a=1' |
|||
2026-05-31 18:43:23.115 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292434/ - URL: https://movie.douban.com/subject/1292434/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292434%2F&_s=a154ccfc7e6a1708460dd0f7d53cca0fb62f560d3f3824ef6431cace7de7bae4&a=1' |
|||
2026-05-31 18:43:24.342 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1302425/ - URL: https://movie.douban.com/subject/1302425/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1302425%2F&_s=1f65248d3064d0103d7c4cf49d9ba3be166ef92a4a6d5b3437fd03244bdeed58&a=1' |
|||
2026-05-31 18:43:25.572 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1300299/ - URL: https://movie.douban.com/subject/1300299/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1300299%2F&_s=ce6056bd1cfbf84fc71d633290ea9ba43c6cfe478dd31cc56371380b269d6b44&a=1' |
|||
2026-05-31 18:43:26.876 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1297192/ - URL: https://movie.douban.com/subject/1297192/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1297192%2F&_s=6921c85897845efd2e28fe8b2caafbaaa5169a44333bf303964b1a4bb4778344&a=1' |
|||
2026-05-31 18:43:28.211 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1294371/ - URL: https://movie.douban.com/subject/1294371/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1294371%2F&_s=d1c3fbaf78b2fe6a00945a74923ae90a08e509b7de641e60719c08a00419513e&a=1' |
|||
2026-05-31 18:43:29.463 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292679/ - URL: https://movie.douban.com/subject/1292679/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292679%2F&_s=c235e527ec9ab37b8ab3f30e48acba48d80cf9a1c7676887c32ab85e3b0c98cd&a=1' |
|||
2026-05-31 18:43:30.692 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1298070/ - URL: https://movie.douban.com/subject/1298070/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1298070%2F&_s=ef0275a3474c42a051004b3f0b3dcc1b074e6502d5004defb3dff072068a4f71&a=1' |
|||
2026-05-31 18:43:31.996 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3442220/ - URL: https://movie.douban.com/subject/3442220/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F3442220%2F&_s=0c113778c4a4cb4f19d31ba901cda8f50ee40924b808b4702817c25018c1013c&a=1' |
|||
2026-05-31 18:43:33.328 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292262/ - URL: https://movie.douban.com/subject/1292262/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292262%2F&_s=36bd396dea0cff31fb41630e0344adb41fd0444d8d3e3b246bbd995de9b041dc&a=1' |
|||
2026-05-31 18:43:34.862 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292223/ - URL: https://movie.douban.com/subject/1292223/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292223%2F&_s=1151fe87c59895d0df88484fff6e3e3b4f5cef133cd52a517b493399b9ba2487&a=1' |
|||
2026-05-31 18:43:36.193 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1306249/ - URL: https://movie.douban.com/subject/1306249/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1306249%2F&_s=1d261e8566a53e176e353b3aa7382a5355bb300894c1b98e8c6211c1150999ae&a=1' |
|||
2026-05-31 18:43:36.193 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 5 页 (101): https://movie.douban.com/top250?start=100 |
|||
2026-05-31 18:43:37.552 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/6985810/ - URL: https://movie.douban.com/subject/6985810/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F6985810%2F&_s=74876346318e93441df416f05fb6980b7442d097670fadbb8ebf4de0cb0a0a50&a=1' |
|||
2026-05-31 18:43:38.782 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1297359/ - URL: https://movie.douban.com/subject/1297359/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1297359%2F&_s=310f437abcd0e7003d4f839ab3142c04cdfd92330e8daf5987c1bacbc0494299&a=1' |
|||
2026-05-31 18:43:40.011 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/27010768/ - URL: https://movie.douban.com/subject/27010768/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F27010768%2F&_s=09b80065b1c69f9cfbb0858563c8d04028f9bdb5cf5e19e1275c5c8be54ad835&a=1' |
|||
2026-05-31 18:43:41.313 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1305164/ - URL: https://movie.douban.com/subject/1305164/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1305164%2F&_s=aa82c2a6cd99a5662a8fd3081e3e29f548efecd08ee60ddb40834a9ce32ebdc3&a=1' |
|||
2026-05-31 18:43:42.646 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1428581/ - URL: https://movie.douban.com/subject/1428581/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1428581%2F&_s=2f78991b75ac2abbf326aa4a70fb078f681424fa86124b49737f17eb62975a07&a=1' |
|||
2026-05-31 18:43:43.901 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3395373/ - URL: https://movie.douban.com/subject/3395373/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F3395373%2F&_s=9fd928e7604d4f31b6f275c15b847b9bec54551d1ec5a8158d126ac458124c37&a=1' |
|||
2026-05-31 18:43:45.131 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/11026735/ - URL: https://movie.douban.com/subject/11026735/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F11026735%2F&_s=5fd1aff37bfeb5f5a31177ee7a0d0b18eee81de1fe6d188a65395aeb9e8a96bf&a=1' |
|||
2026-05-31 18:43:46.564 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1461403/ - URL: https://movie.douban.com/subject/1461403/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1461403%2F&_s=063a6bf27b6dbf804a9c874bc11de8981972f28d37723fc3f296ef9acac958f0&a=1' |
|||
2026-05-31 18:43:47.791 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1297630/ - URL: https://movie.douban.com/subject/1297630/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1297630%2F&_s=f4bd0265959c2d5bc5ca060e5e47184986e9d0a4a0c18f4bb6f4c15aba383494&a=1' |
|||
2026-05-31 18:43:49.096 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1296339/ - URL: https://movie.douban.com/subject/1296339/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1296339%2F&_s=5a00371252e2f52cbd14e3128e970595bd346189f402b0e07a27a907625d62a6&a=1' |
|||
2026-05-31 18:43:50.352 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291990/ - URL: https://movie.douban.com/subject/1291990/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291990%2F&_s=a24e3725866b29ec0d44df6bc64c4172fb1f1d5d27f0a20f24ae04c06e164bbc&a=1' |
|||
2026-05-31 18:43:51.582 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/10533913/ - URL: https://movie.douban.com/subject/10533913/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F10533913%2F&_s=d2fd748002fd974576cd53c47d689a32e32c4fe3b0268dc50aae615b88a8015d&a=1' |
|||
2026-05-31 18:43:52.886 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1787291/ - URL: https://movie.douban.com/subject/1787291/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1787291%2F&_s=6beeaeccfbd330a15e43dad0a34b6afe0db83ec1d4cb15527857485530fae5aa&a=1' |
|||
2026-05-31 18:43:54.215 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1309055/ - URL: https://movie.douban.com/subject/1309055/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1309055%2F&_s=b009b9b4b94acca5e91af3bf07fa4b5217f37668053c65d7bcadc52241b4a29a&a=1' |
|||
2026-05-31 18:43:55.473 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1395091/ - URL: https://movie.douban.com/subject/1395091/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1395091%2F&_s=88f43f67fe0da3489e08c7931ce1a980092b15dda7e4dc4fdc9a739e60dd0ca7&a=1' |
|||
2026-05-31 18:43:56.701 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291999/ - URL: https://movie.douban.com/subject/1291999/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1291999%2F&_s=df6d3ad1072fc6b0e67f017e5d4cc6fa42333149ace310ccf7be0a6844494f31&a=1' |
|||
2026-05-31 18:43:57.929 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/4202302/ - URL: https://movie.douban.com/subject/4202302/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F4202302%2F&_s=a2329babeea68ba5749237b37e7f2fb3110573f56e8d8af32efef450d7734e4b&a=1' |
|||
2026-05-31 18:43:59.442 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/2149806/ - URL: https://movie.douban.com/subject/2149806/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F2149806%2F&_s=1f0cdbab1edffa827fbbfa7b5850b3c90d870bb03d805a9296e9da4d56ab8ea9&a=1' |
|||
2026-05-31 18:44:00.772 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292370/ - URL: https://movie.douban.com/subject/1292370/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1292370%2F&_s=082160fa52606c8c4014f7673d76a04718c8ebe809528a94c29c1487d886d5b1&a=1' |
|||
2026-05-31 18:44:02.025 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1418834/ - URL: https://movie.douban.com/subject/1418834/, 网络请求失败: Circular redirect to 'https://sec.douban.com/c?r=https%3A%2F%2Fmovie.douban.com%2Fsubject%2F1418834%2F&_s=71e48b81458a069aedcd1eb1211c9883d5b58f053ccc43f707b6877d493b0a4e&a=1' |
|||
2026-05-31 18:44:03.432 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1294639/ - URL: https://movie.douban.com/subject/1294639/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:04.866 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/10577869/ - URL: https://movie.douban.com/subject/10577869/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:06.301 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/2353023/ - URL: https://movie.douban.com/subject/2353023/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:06.301 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 6 页 (126): https://movie.douban.com/top250?start=125 |
|||
2026-05-31 18:44:07.733 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1418200/ - URL: https://movie.douban.com/subject/1418200/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:09.169 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/21318488/ - URL: https://movie.douban.com/subject/21318488/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:10.806 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1297447/ - URL: https://movie.douban.com/subject/1297447/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:12.271 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/10437779/ - URL: https://movie.douban.com/subject/10437779/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:13.608 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291557/ - URL: https://movie.douban.com/subject/1291557/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:14.987 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1858711/ - URL: https://movie.douban.com/subject/1858711/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:16.301 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/26628357/ - URL: https://movie.douban.com/subject/26628357/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:17.604 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1828115/ - URL: https://movie.douban.com/subject/1828115/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:17.760 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1300992/ - URL: https://movie.douban.com/subject/1300992/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:17.905 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291579/ - URL: https://movie.douban.com/subject/1291579/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:18.009 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1294240/ - URL: https://movie.douban.com/subject/1294240/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:18.073 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1291875/ - URL: https://movie.douban.com/subject/1291875/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:18.146 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/25814705/ - URL: https://movie.douban.com/subject/25814705/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:18.209 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292215/ - URL: https://movie.douban.com/subject/1292215/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:18.274 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1297052/ - URL: https://movie.douban.com/subject/1297052/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:18.484 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1307315/ - URL: https://movie.douban.com/subject/1307315/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:18.688 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1297518/ - URL: https://movie.douban.com/subject/1297518/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:18.893 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/6307447/ - URL: https://movie.douban.com/subject/6307447/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:19.098 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/26799731/ - URL: https://movie.douban.com/subject/26799731/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:19.303 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1292274/ - URL: https://movie.douban.com/subject/1292274/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:19.508 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/25986180/ - URL: https://movie.douban.com/subject/25986180/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:19.714 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/3287562/ - URL: https://movie.douban.com/subject/3287562/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:19.916 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/25814707/ - URL: https://movie.douban.com/subject/25814707/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:20.123 [main] WARN com.spider.service.DoubanMovieSpider - 获取票房失败: https://movie.douban.com/subject/1303037/ - URL: https://movie.douban.com/subject/1303037/, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:20.123 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 7 页 (151): https://movie.douban.com/top250?start=150 |
|||
2026-05-31 18:44:20.334 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250?start=150, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:44:20.335 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:44:20.335 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:44:20.768 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:44:20.779 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:44:20.781 [main] ERROR com.spider.service.DataStorageService - 保存书籍数据失败 |
|||
java.io.FileNotFoundException: data\books.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveBooksToCsv(DataStorageService.java:94) |
|||
at com.spider.service.DataStorageService.saveBooks(DataStorageService.java:50) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:148) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:44:20.781 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 18:44:20.782 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:44:59.381 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:44:59.381 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:45:00.572 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:00.573 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:45:00.573 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:45:01.081 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:01.082 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:45:01.083 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:45:01.652 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:45:01.661 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:01.662 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 18:45:01.662 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 18:45:01.663 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:53.993 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:45:53.994 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:45:55.048 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:55.050 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:45:55.050 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:45:55.558 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:55.559 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:45:55.559 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:45:55.943 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:45:55.954 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:55.956 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 18:45:55.956 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 18:45:55.956 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:47:31.850 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:47:31.852 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:47:33.048 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:47:33.050 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:47:33.050 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:47:33.557 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:47:33.561 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:47:33.561 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:47:34.247 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:47:34.257 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:47:34.258 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 18:47:34.258 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 18:47:34.259 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:48:59.835 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:48:59.836 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:49:01.109 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:49:01.111 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:49:01.111 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:49:01.622 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:49:01.623 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:49:01.623 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:49:02.005 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:49:02.016 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:49:02.017 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 18:49:02.017 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 18:49:02.018 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:50:14.987 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 18:50:14.988 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 18:50:16.271 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:50:16.274 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 18:50:16.274 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 18:50:16.784 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:50:16.785 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 18:50:16.785 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 18:50:17.159 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 18:50:17.167 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:50:17.168 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 18:50:17.168 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 18:50:17.168 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:12:19.560 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 19:12:19.560 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 19:12:20.722 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:12:20.724 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 19:12:20.724 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 19:12:21.232 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:12:21.232 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 19:12:21.233 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 19:12:21.806 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 19:12:21.816 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:12:21.816 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 19:12:21.816 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 19:12:21.818 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:13:43.057 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 19:13:43.058 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 19:13:44.179 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:13:44.182 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 19:13:44.182 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 19:13:44.690 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:13:44.692 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 19:13:44.692 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 19:13:45.181 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 19:13:45.193 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:13:45.194 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 19:13:45.195 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 19:13:45.196 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:14:12.114 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 19:14:12.115 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 19:14:13.362 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:14:13.364 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 19:14:13.364 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 19:14:13.873 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:14:13.875 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 19:14:13.875 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 19:14:14.261 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 19:14:14.272 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:14:14.274 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 19:14:14.274 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 19:14:14.275 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:16:46.973 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 19:16:46.973 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 19:16:48.090 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:16:48.092 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 19:16:48.092 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 19:16:48.602 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:16:48.605 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 19:16:48.605 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 19:16:49.094 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 19:16:49.103 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_new.csv |
|||
2026-05-31 19:16:49.103 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 19:16:49.104 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 19:16:49.105 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_new.csv |
|||
2026-05-31 19:17:30.989 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 19:17:30.990 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 19:17:32.122 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:17:32.126 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 19:17:32.126 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 19:17:32.632 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:17:32.634 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 19:17:32.634 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 19:17:32.916 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 19:17:32.932 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_new.csv |
|||
2026-05-31 19:17:32.934 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 19:17:32.934 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 19:17:32.935 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_new.csv |
|||
2026-05-31 19:17:58.092 [main] INFO com.spider.service.DoubanBookSpider - 开始爬取豆瓣读书热度最高的 50 本书... |
|||
2026-05-31 19:17:58.093 [main] INFO com.spider.service.DoubanBookSpider - 正在抓取第 1 页: https://book.douban.com/chart?sub_type=1 |
|||
2026-05-31 19:17:59.360 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:17:59.362 [main] INFO com.spider.service.DoubanMovieSpider - 开始爬取豆瓣电影Top250... |
|||
2026-05-31 19:17:59.362 [main] INFO com.spider.service.DoubanMovieSpider - 正在抓取第 1 页 (1): https://movie.douban.com/top250 |
|||
2026-05-31 19:17:59.874 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:17:59.877 [main] INFO com.spider.service.BaiduHotSearchSpider - 开始爬取百度实时热搜榜前 50 条... |
|||
2026-05-31 19:17:59.877 [main] INFO com.spider.service.BaiduHotSearchSpider - 正在抓取: https://top.baidu.com/board?tab=realtime |
|||
2026-05-31 19:18:00.365 [main] INFO com.spider.service.BaiduHotSearchSpider - 百度热搜爬取完成,共获取 50 条热搜 |
|||
2026-05-31 19:18:00.376 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_new.csv |
|||
2026-05-31 19:18:00.376 [main] WARN com.spider.service.DataStorageService - 没有书籍数据可保存 |
|||
2026-05-31 19:18:00.377 [main] WARN com.spider.service.DataStorageService - 没有电影数据可保存 |
|||
2026-05-31 19:18:00.379 [main] INFO com.spider.service.DataStorageService - 热搜数据已保存到: data\hotsearch_new.csv |
|||
@ -1,447 +0,0 @@ |
|||
2026-05-31 18:41:43.730 [main] ERROR com.spider.service.DataStorageService - 保存书籍数据失败 |
|||
java.io.FileNotFoundException: data\books.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveBooksToCsv(DataStorageService.java:94) |
|||
at com.spider.service.DataStorageService.saveBooks(DataStorageService.java:50) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:69) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:44:20.334 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250?start=150, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:44:20.334 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250?start=150, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:20.335 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250?start=150, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:44:20.779 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:44:20.781 [main] ERROR com.spider.service.DataStorageService - 保存书籍数据失败 |
|||
java.io.FileNotFoundException: data\books.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveBooksToCsv(DataStorageService.java:94) |
|||
at com.spider.service.DataStorageService.saveBooks(DataStorageService.java:50) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:148) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:44:20.782 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:00.572 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:00.573 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:45:00.573 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:45:01.081 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:01.082 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:45:01.082 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:45:01.661 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:01.663 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:55.048 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:55.050 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:45:55.050 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:45:55.558 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:55.558 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:45:55.558 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:45:55.954 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:45:55.956 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:47:33.048 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:47:33.050 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:47:33.050 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:47:33.557 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:47:33.559 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:47:33.560 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:47:34.257 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:47:34.259 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:49:01.109 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:49:01.111 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:49:01.111 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:49:01.622 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:49:01.622 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:49:01.622 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:49:02.016 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:49:02.018 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:50:16.271 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:50:16.273 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:50:16.273 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:50:16.784 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:50:16.785 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:50:16.785 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 18:50:17.167 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 18:50:17.168 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:130) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:12:20.722 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:12:20.723 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:12:20.723 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:12:21.232 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:12:21.232 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:12:21.232 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:12:21.816 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:12:21.818 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:13:44.179 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:13:44.182 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:13:44.182 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:13:44.690 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:13:44.691 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:13:44.692 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:13:45.193 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:13:45.196 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:14:13.362 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:14:13.364 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:14:13.364 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:14:13.873 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:14:13.873 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:14:13.873 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:14:14.272 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.crawlBaiduHotSearch(SpiderController.java:129) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:44) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:14:14.275 [main] ERROR com.spider.service.DataStorageService - 保存热搜数据失败 |
|||
java.io.FileNotFoundException: data\hotsearch.csv (另一个程序正在使用此文件,进程无法访问。) |
|||
at java.base/java.io.FileOutputStream.open0(Native Method) |
|||
at java.base/java.io.FileOutputStream.open(FileOutputStream.java:293) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:235) |
|||
at java.base/java.io.FileOutputStream.<init>(FileOutputStream.java:123) |
|||
at java.base/java.io.FileWriter.<init>(FileWriter.java:66) |
|||
at com.spider.service.DataStorageService.saveHotSearchToCsv(DataStorageService.java:128) |
|||
at com.spider.service.DataStorageService.saveHotSearch(DataStorageService.java:84) |
|||
at com.spider.controller.SpiderController.saveAllData(SpiderController.java:150) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:47) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:16:48.090 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:16:48.091 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:16:48.092 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:16:48.602 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:16:48.603 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:16:48.604 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:17:32.122 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:17:32.123 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:17:32.125 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:17:32.632 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:17:32.633 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:17:32.634 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:17:59.360 [main] ERROR com.spider.service.DoubanBookSpider - 爬取豆瓣读书时出错 |
|||
com.spider.exception.NetworkException: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanBookSpider.crawlHotBooks(DoubanBookSpider.java:55) |
|||
at com.spider.controller.SpiderController.crawlDoubanBooks(SpiderController.java:66) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:38) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:17:59.361 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:17:59.362 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://book.douban.com/chart?sub_type=1, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:17:59.874 [main] ERROR com.spider.service.DoubanMovieSpider - 爬取豆瓣电影时出错 |
|||
com.spider.exception.NetworkException: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
at com.spider.utils.HttpClientUtil.fetchHtml(HttpClientUtil.java:49) |
|||
at com.spider.service.DoubanMovieSpider.crawlTop250(DoubanMovieSpider.java:51) |
|||
at com.spider.controller.SpiderController.crawlDoubanMovies(SpiderController.java:96) |
|||
at com.spider.core.SpiderRunner.runOneClickMode(SpiderRunner.java:41) |
|||
at com.spider.core.SpiderRunner.main(SpiderRunner.java:20) |
|||
2026-05-31 19:17:59.875 [main] ERROR com.spider.controller.SpiderController - 网络异常 - URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
2026-05-31 19:17:59.876 [main] ERROR com.spider.view.ConsoleView - 错误: 网络错误: URL: https://movie.douban.com/top250, 访问被拒绝,可能需要设置User-Agent或等待一段时间 |
|||
File diff suppressed because it is too large
@ -1,99 +0,0 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" |
|||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 |
|||
http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
|
|||
<groupId>com.spider</groupId> |
|||
<artifactId>spider</artifactId> |
|||
<version>1.0.0</version> |
|||
<packaging>jar</packaging> |
|||
|
|||
<name>Spider Project</name> |
|||
<description>A Java Web Spider Framework</description> |
|||
|
|||
<properties> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
<maven.compiler.source>11</maven.compiler.source> |
|||
<maven.compiler.target>11</maven.compiler.target> |
|||
</properties> |
|||
|
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>org.jsoup</groupId> |
|||
<artifactId>jsoup</artifactId> |
|||
<version>1.15.3</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>org.apache.httpcomponents.client5</groupId> |
|||
<artifactId>httpclient5</artifactId> |
|||
<version>5.2.1</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>org.slf4j</groupId> |
|||
<artifactId>slf4j-api</artifactId> |
|||
<version>2.0.7</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>ch.qos.logback</groupId> |
|||
<artifactId>logback-classic</artifactId> |
|||
<version>1.4.11</version> |
|||
</dependency> |
|||
|
|||
<dependency> |
|||
<groupId>com.fasterxml.jackson.core</groupId> |
|||
<artifactId>jackson-databind</artifactId> |
|||
<version>2.15.2</version> |
|||
</dependency> |
|||
</dependencies> |
|||
|
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<version>3.11.0</version> |
|||
<configuration> |
|||
<source>11</source> |
|||
<target>11</target> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-jar-plugin</artifactId> |
|||
<version>3.3.0</version> |
|||
<configuration> |
|||
<archive> |
|||
<manifest> |
|||
<mainClass>com.spider.core.SpiderRunner</mainClass> |
|||
</manifest> |
|||
</archive> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-shade-plugin</artifactId> |
|||
<version>3.5.0</version> |
|||
<executions> |
|||
<execution> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>shade</goal> |
|||
</goals> |
|||
<configuration> |
|||
<transformers> |
|||
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer"> |
|||
<mainClass>com.spider.core.SpiderRunner</mainClass> |
|||
</transformer> |
|||
</transformers> |
|||
</configuration> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
</project> |
|||
@ -1,7 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
public interface Command { |
|||
String getName(); |
|||
String getDescription(); |
|||
void execute(String[] args); |
|||
} |
|||
@ -1,69 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.controller.ControllerFactory; |
|||
import com.spider.view.ConsoleView; |
|||
import com.spider.view.ViewFactory; |
|||
|
|||
public class ConfigCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(ConfigCommand.class); |
|||
private final ConsoleView view; |
|||
|
|||
public ConfigCommand() { |
|||
this.view = ViewFactory.createConsoleView(); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "config"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "配置爬虫参数"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
if (args.length < 2) { |
|||
showHelp(); |
|||
return; |
|||
} |
|||
|
|||
String action = args[1]; |
|||
|
|||
switch (action) { |
|||
case "show": |
|||
ControllerFactory.getSpiderController().showConfig(); |
|||
break; |
|||
case "set": |
|||
if (args.length < 4) { |
|||
logger.error("用法: config set <key> <value>"); |
|||
view.showHelp(); |
|||
} else { |
|||
ControllerFactory.getSpiderController().updateConfig(args[2], args[3]); |
|||
} |
|||
break; |
|||
case "list": |
|||
listConfig(); |
|||
break; |
|||
default: |
|||
logger.error("未知操作: {}", action); |
|||
showHelp(); |
|||
} |
|||
} |
|||
|
|||
private void listConfig() { |
|||
logger.info("可配置的参数:"); |
|||
logger.info(" - thread.count : 线程数"); |
|||
logger.info(" - timeout : 超时时间(ms)"); |
|||
logger.info(" - retry.count : 重试次数"); |
|||
logger.info(" - user.agent : User-Agent"); |
|||
} |
|||
|
|||
private void showHelp() { |
|||
view.showHelp(); |
|||
} |
|||
} |
|||
@ -1,94 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.controller.ControllerFactory; |
|||
import com.spider.utils.RetryUtils; |
|||
|
|||
public class CrawlCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "crawl"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "爬取指定数据源"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
if (args.length < 2) { |
|||
showHelp(); |
|||
return; |
|||
} |
|||
|
|||
String target = args[1].toLowerCase(); |
|||
|
|||
switch (target) { |
|||
case "books": |
|||
int bookLimit = args.length > 2 ? parseInt(args[2]) : 50; |
|||
logger.info("执行 crawl books 命令,数量: {}", bookLimit); |
|||
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlDoubanBooks(bookLimit)); |
|||
break; |
|||
|
|||
case "movies": |
|||
logger.info("执行 crawl movies 命令"); |
|||
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlDoubanMovies()); |
|||
break; |
|||
|
|||
case "hotsearch": |
|||
int hotLimit = args.length > 2 ? parseInt(args[2]) : 50; |
|||
logger.info("执行 crawl hotsearch 命令,数量: {}", hotLimit); |
|||
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlBaiduHotSearch(hotLimit)); |
|||
break; |
|||
|
|||
case "all": |
|||
logger.info("执行 crawl all 命令,开始爬取所有数据源..."); |
|||
crawlAllDataWithRetry(); |
|||
break; |
|||
|
|||
default: |
|||
logger.warn("未知爬取目标: {}", target); |
|||
showHelp(); |
|||
} |
|||
} |
|||
|
|||
private void crawlWithRetry(Runnable task) { |
|||
try { |
|||
RetryUtils.executeWithRetry(task, 3, 2000); |
|||
logger.info("爬取任务完成"); |
|||
} catch (Exception e) { |
|||
logger.error("爬取任务失败: {}", e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
private void crawlAllDataWithRetry() { |
|||
logger.info("=== 开始爬取所有数据 ==="); |
|||
|
|||
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlDoubanBooks(50)); |
|||
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlDoubanMovies()); |
|||
crawlWithRetry(() -> ControllerFactory.getSpiderController().crawlBaiduHotSearch(50)); |
|||
|
|||
logger.info("=== 所有数据爬取完成 ==="); |
|||
} |
|||
|
|||
private void showHelp() { |
|||
logger.info("crawl 命令用法:"); |
|||
logger.info(" crawl books [数量] - 爬取豆瓣读书 TopN (默认50)"); |
|||
logger.info(" crawl movies - 爬取豆瓣电影 Top250"); |
|||
logger.info(" crawl hotsearch [数量]- 爬取百度热搜 TopN (默认50)"); |
|||
logger.info(" crawl all - 爬取所有数据源"); |
|||
} |
|||
|
|||
private int parseInt(String str) { |
|||
try { |
|||
return Integer.parseInt(str); |
|||
} catch (NumberFormatException e) { |
|||
return 50; |
|||
} |
|||
} |
|||
} |
|||
@ -1,81 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
public class HelpCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "help"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "显示帮助信息"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
logger.info("\n┌─ Spider 多功能爬虫框架 帮助 ──────────────────────────┐"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【数据爬取命令】 │"); |
|||
logger.info("│ crawl books [N] 爬取豆瓣读书 Top N (默认50) │"); |
|||
logger.info("│ crawl movies 爬取豆瓣电影 Top250 │"); |
|||
logger.info("│ crawl hotsearch [N] 爬取百度热搜 Top N (默认50) │"); |
|||
logger.info("│ crawl all 爬取所有数据源 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【数据保存命令】 │"); |
|||
logger.info("│ save books [fname] 保存书籍数据到文件 │"); |
|||
logger.info("│ save movies [fname] 保存电影数据到文件 │"); |
|||
logger.info("│ save hotsearch [fn] 保存热搜数据到文件 │"); |
|||
logger.info("│ save all 保存所有已爬取的数据 │"); |
|||
logger.info("│ save files 列出已保存的文件 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【数据加载命令】 │"); |
|||
logger.info("│ load books <fname> 从文件加载书籍数据 │"); |
|||
logger.info("│ load movies <fname> 从文件加载电影数据 │"); |
|||
logger.info("│ load hotsearch <fn> 从文件加载热搜数据 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【数据列表命令】 │"); |
|||
logger.info("│ list books 显示已爬取的书籍 │"); |
|||
logger.info("│ list movies 显示已爬取的电影 │"); |
|||
logger.info("│ list hotsearch 显示已爬取的热搜 │"); |
|||
logger.info("│ list all 显示所有已爬取的数据 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【控制命令】 │"); |
|||
logger.info("│ start [url] 启动爬虫任务 │"); |
|||
logger.info("│ stop 停止爬虫任务 │"); |
|||
logger.info("│ status 查看爬虫运行状态 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【配置命令】 │"); |
|||
logger.info("│ config show 显示当前配置 │"); |
|||
logger.info("│ config set <k> <v> 设置配置项 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【其他】 │"); |
|||
logger.info("│ help 显示帮助信息 │"); |
|||
logger.info("│ exit 退出程序 │"); |
|||
logger.info("│ │"); |
|||
logger.info("└────────────────────────────────────────────────────────────┘"); |
|||
logger.info("\n使用示例:"); |
|||
logger.info(" 爬取数据:"); |
|||
logger.info(" crawl books 50 # 爬取豆瓣读书Top50"); |
|||
logger.info(" crawl movies # 爬取豆瓣电影Top250"); |
|||
logger.info(" crawl hotsearch 50 # 爬取百度热搜Top50"); |
|||
logger.info(" crawl all # 爬取所有数据"); |
|||
logger.info(""); |
|||
logger.info(" 保存和加载:"); |
|||
logger.info(" save all # 保存所有数据到文件"); |
|||
logger.info(" save books my.json # 保存书籍到指定文件"); |
|||
logger.info(" save files # 查看已保存的文件"); |
|||
logger.info(" load books my.json # 加载书籍数据"); |
|||
logger.info(""); |
|||
logger.info(" 查看数据:"); |
|||
logger.info(" list books # 查看已爬取的书籍"); |
|||
logger.info(" list movies # 查看已爬取的电影"); |
|||
logger.info(""); |
|||
logger.info(" 配置:"); |
|||
logger.info(" config set thread.count 10 # 设置线程数"); |
|||
} |
|||
} |
|||
@ -1,104 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.controller.ControllerFactory; |
|||
|
|||
public class ListCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "list"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "列出爬取的数据"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
if (args.length < 2) { |
|||
showHelp(); |
|||
return; |
|||
} |
|||
|
|||
String type = args[1].toLowerCase(); |
|||
|
|||
switch (type) { |
|||
case "books": |
|||
listBooks(); |
|||
break; |
|||
case "movies": |
|||
listMovies(); |
|||
break; |
|||
case "hotsearch": |
|||
listHotSearch(); |
|||
break; |
|||
case "all": |
|||
listAll(); |
|||
break; |
|||
default: |
|||
logger.error("未知数据类型: {}", type); |
|||
showHelp(); |
|||
} |
|||
} |
|||
|
|||
private void listBooks() { |
|||
var books = ControllerFactory.getSpiderController().getBooks(); |
|||
if (books == null || books.isEmpty()) { |
|||
logger.info("暂无书籍数据,请先执行 crawl books 命令"); |
|||
return; |
|||
} |
|||
logger.info("=== 已爬取的书籍 (共 {} 本) ===", books.size()); |
|||
for (int i = 0; i < books.size(); i++) { |
|||
var book = books.get(i); |
|||
logger.info("{}. 《{}》 评分:{} 作者:{}", |
|||
i + 1, book.getTitle(), book.getRating(), book.getAuthor()); |
|||
} |
|||
} |
|||
|
|||
private void listMovies() { |
|||
var movies = ControllerFactory.getSpiderController().getMovies(); |
|||
if (movies == null || movies.isEmpty()) { |
|||
logger.info("暂无电影数据,请先执行 crawl movies 命令"); |
|||
return; |
|||
} |
|||
logger.info("=== 已爬取的电影 (共 {} 部) ===", movies.size()); |
|||
for (var movie : movies) { |
|||
logger.info("Top{}. 《{}》 评分:{} 导演:{}", |
|||
movie.getRank(), movie.getTitle(), movie.getRating(), movie.getDirector()); |
|||
} |
|||
} |
|||
|
|||
private void listHotSearch() { |
|||
var hotSearches = ControllerFactory.getSpiderController().getHotSearches(); |
|||
if (hotSearches == null || hotSearches.isEmpty()) { |
|||
logger.info("暂无热搜数据,请先执行 crawl hotsearch 命令"); |
|||
return; |
|||
} |
|||
logger.info("=== 已爬取的热搜 (共 {} 条) ===", hotSearches.size()); |
|||
for (var hotSearch : hotSearches) { |
|||
logger.info("{}. {}", |
|||
hotSearch.getRank(), hotSearch.getKeyword()); |
|||
} |
|||
} |
|||
|
|||
private void listAll() { |
|||
listBooks(); |
|||
logger.info(""); |
|||
listMovies(); |
|||
logger.info(""); |
|||
listHotSearch(); |
|||
} |
|||
|
|||
private void showHelp() { |
|||
logger.info("list 命令用法:"); |
|||
logger.info(" list books - 列出已爬取的书籍"); |
|||
logger.info(" list movies - 列出已爬取的电影"); |
|||
logger.info(" list hotsearch - 列出已爬取的热搜"); |
|||
logger.info(" list all - 列出所有已爬取的数据"); |
|||
} |
|||
} |
|||
@ -1,56 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
import java.io.File; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.view.ConsoleView; |
|||
import com.spider.view.ViewFactory; |
|||
|
|||
public class LoadCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(LoadCommand.class); |
|||
private final ConsoleView view; |
|||
|
|||
public LoadCommand() { |
|||
this.view = ViewFactory.createConsoleView(); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "load"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "查看已保存的CSV数据文件"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
view.showInfo("数据已改为CSV格式自动保存,每次爬取会自动覆盖。\n"); |
|||
view.showInfo("已保存的CSV文件位于 data 目录下:\n"); |
|||
|
|||
File dataDir = new File("data"); |
|||
if (!dataDir.exists()) { |
|||
view.showInfo("data 目录不存在,请先运行爬虫。"); |
|||
return; |
|||
} |
|||
|
|||
File[] csvFiles = dataDir.listFiles((dir, name) -> name.endsWith(".csv")); |
|||
if (csvFiles == null || csvFiles.length == 0) { |
|||
view.showInfo("没有找到CSV文件,请先运行爬虫。"); |
|||
return; |
|||
} |
|||
|
|||
view.showInfo("┌─ 已保存的CSV数据文件 ─────────────────┐"); |
|||
for (File file : csvFiles) { |
|||
long size = file.length(); |
|||
String sizeStr = size < 1024 ? size + " B" : |
|||
size < 1024 * 1024 ? (size / 1024) + " KB" : |
|||
(size / 1024 / 1024) + " MB"; |
|||
view.showInfo("│ " + file.getName() + " (" + sizeStr + ")"); |
|||
} |
|||
view.showInfo("└─────────────────────────────────────────┘"); |
|||
} |
|||
} |
|||
@ -1,66 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.controller.ControllerFactory; |
|||
|
|||
public class SaveCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(SaveCommand.class); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "save"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "保存爬取的数据到文件"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
if (args.length < 2) { |
|||
showHelp(); |
|||
return; |
|||
} |
|||
|
|||
String target = args[1].toLowerCase(); |
|||
String filename = args.length > 2 ? args[2] : null; |
|||
|
|||
switch (target) { |
|||
case "books": |
|||
ControllerFactory.getSpiderController().saveBooks(filename); |
|||
break; |
|||
case "movies": |
|||
ControllerFactory.getSpiderController().saveMovies(filename); |
|||
break; |
|||
case "hotsearch": |
|||
ControllerFactory.getSpiderController().saveHotSearch(filename); |
|||
break; |
|||
case "all": |
|||
ControllerFactory.getSpiderController().saveAllData(); |
|||
break; |
|||
case "files": |
|||
ControllerFactory.getSpiderController().listSavedFiles(); |
|||
break; |
|||
default: |
|||
logger.error("未知保存目标: {}", target); |
|||
showHelp(); |
|||
} |
|||
} |
|||
|
|||
private void showHelp() { |
|||
logger.info("save 命令用法:"); |
|||
logger.info(" save books [filename] - 保存书籍数据到文件"); |
|||
logger.info(" save movies [filename] - 保存电影数据到文件"); |
|||
logger.info(" save hotsearch [fname] - 保存热搜数据到文件"); |
|||
logger.info(" save all - 保存所有数据"); |
|||
logger.info(" save files - 列出已保存的文件"); |
|||
logger.info(""); |
|||
logger.info("示例:"); |
|||
logger.info(" save books my_books.json"); |
|||
logger.info(" save movies"); |
|||
logger.info(" save all"); |
|||
} |
|||
} |
|||
@ -1,31 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.controller.ControllerFactory; |
|||
|
|||
public class StartCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(StartCommand.class); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "start"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "启动爬虫任务"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
String url = "https://example.com"; |
|||
if (args.length > 1) { |
|||
url = args[1]; |
|||
} |
|||
|
|||
logger.info("执行 start 命令,URL: {}", url); |
|||
ControllerFactory.getSpiderController().startSpider(url); |
|||
} |
|||
} |
|||
@ -1,26 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.controller.ControllerFactory; |
|||
|
|||
public class StatusCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(StatusCommand.class); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "status"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "查看爬虫状态"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
logger.info("执行 status 命令"); |
|||
ControllerFactory.getSpiderController().showStatus(); |
|||
} |
|||
} |
|||
@ -1,26 +0,0 @@ |
|||
package com.spider.command; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.controller.ControllerFactory; |
|||
|
|||
public class StopCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(StopCommand.class); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "stop"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "停止爬虫任务"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
logger.info("执行 stop 命令"); |
|||
ControllerFactory.getSpiderController().stopSpider(); |
|||
} |
|||
} |
|||
@ -1,25 +0,0 @@ |
|||
package com.spider.controller; |
|||
|
|||
import com.spider.command.Command; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
public class ControllerFactory { |
|||
private static final Logger logger = LoggerFactory.getLogger(ControllerFactory.class); |
|||
|
|||
private static SpiderController spiderController; |
|||
|
|||
public static void initController() { |
|||
if (spiderController == null) { |
|||
spiderController = new ControllerInitializer().createController(); |
|||
logger.info("控制器初始化完成"); |
|||
} |
|||
} |
|||
|
|||
public static SpiderController getSpiderController() { |
|||
if (spiderController == null) { |
|||
initController(); |
|||
} |
|||
return spiderController; |
|||
} |
|||
} |
|||
@ -1,19 +0,0 @@ |
|||
package com.spider.controller; |
|||
|
|||
import com.spider.model.SpiderConfig; |
|||
import com.spider.view.ConsoleView; |
|||
import com.spider.view.ViewFactory; |
|||
|
|||
public class ControllerInitializer { |
|||
public SpiderController createController() { |
|||
SpiderConfig config = new SpiderConfig(); |
|||
config.setThreadCount(5); |
|||
config.setTimeout(30000); |
|||
config.setRetryCount(3); |
|||
config.setUserAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64)"); |
|||
|
|||
ConsoleView view = ViewFactory.createConsoleView(); |
|||
|
|||
return new SpiderController(config, view); |
|||
} |
|||
} |
|||
@ -1,330 +0,0 @@ |
|||
package com.spider.controller; |
|||
|
|||
import java.io.File; |
|||
import java.util.List; |
|||
import java.util.concurrent.atomic.AtomicBoolean; |
|||
import java.util.concurrent.atomic.AtomicInteger; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.exception.NetworkException; |
|||
import com.spider.exception.ParseException; |
|||
import com.spider.model.Book; |
|||
import com.spider.model.HotSearch; |
|||
import com.spider.model.Movie; |
|||
import com.spider.model.SpiderConfig; |
|||
import com.spider.service.BaiduHotSearchSpider; |
|||
import com.spider.service.DataStorageService; |
|||
import com.spider.service.DoubanBookSpider; |
|||
import com.spider.service.DoubanMovieSpider; |
|||
import com.spider.view.ConsoleView; |
|||
|
|||
public class SpiderController { |
|||
private static final Logger logger = LoggerFactory.getLogger(SpiderController.class); |
|||
|
|||
private final SpiderConfig config; |
|||
private final ConsoleView view; |
|||
private final AtomicBoolean isRunning; |
|||
private final AtomicInteger pagesCrawled; |
|||
private final AtomicInteger totalRequests; |
|||
private final AtomicInteger failedRequests; |
|||
|
|||
private DoubanBookSpider doubanBookSpider; |
|||
private DoubanMovieSpider doubanMovieSpider; |
|||
private BaiduHotSearchSpider baiduHotSearchSpider; |
|||
private DataStorageService storageService; |
|||
|
|||
private List<Book> books; |
|||
private List<Movie> movies; |
|||
private List<HotSearch> hotSearches; |
|||
|
|||
public SpiderController(SpiderConfig config, ConsoleView view) { |
|||
this.config = config; |
|||
this.view = view; |
|||
this.isRunning = new AtomicBoolean(false); |
|||
this.pagesCrawled = new AtomicInteger(0); |
|||
this.totalRequests = new AtomicInteger(0); |
|||
this.failedRequests = new AtomicInteger(0); |
|||
|
|||
this.doubanBookSpider = new DoubanBookSpider(); |
|||
this.doubanMovieSpider = new DoubanMovieSpider(); |
|||
this.baiduHotSearchSpider = new BaiduHotSearchSpider(); |
|||
this.storageService = new DataStorageService(); |
|||
} |
|||
|
|||
public void crawlDoubanBooks(int limit) { |
|||
if (isRunning.get()) { |
|||
view.showError("爬虫已经在运行中"); |
|||
return; |
|||
} |
|||
|
|||
isRunning.set(true); |
|||
view.showInfo("=== 开始爬取豆瓣读书 Top" + limit + " ==="); |
|||
|
|||
try { |
|||
books = doubanBookSpider.crawlHotBooks(limit); |
|||
view.showBooks(books); |
|||
|
|||
storageService.saveBooks(books); |
|||
logger.info("书籍数据已自动保存"); |
|||
} catch (NetworkException e) { |
|||
logger.error("网络异常 - {}", e.getMessage()); |
|||
view.showError("网络错误: " + e.getMessage()); |
|||
} catch (ParseException e) { |
|||
logger.error("解析异常 - Source: {}, Element: {}, Message: {}", |
|||
e.getSource(), e.getElement(), e.getMessage()); |
|||
view.showError("解析错误: " + e.getMessage()); |
|||
} catch (Exception e) { |
|||
logger.error("爬取豆瓣读书失败", e); |
|||
view.showError("爬取失败: " + e.getMessage()); |
|||
} finally { |
|||
isRunning.set(false); |
|||
} |
|||
} |
|||
|
|||
public void crawlDoubanMovies() { |
|||
if (isRunning.get()) { |
|||
view.showError("爬虫已经在运行中"); |
|||
return; |
|||
} |
|||
|
|||
isRunning.set(true); |
|||
view.showInfo("=== 开始爬取豆瓣电影 Top250 ==="); |
|||
|
|||
try { |
|||
movies = doubanMovieSpider.crawlTop250(); |
|||
view.showMovies(movies); |
|||
|
|||
storageService.saveMovies(movies); |
|||
logger.info("电影数据已自动保存"); |
|||
} catch (NetworkException e) { |
|||
logger.error("网络异常 - {}", e.getMessage()); |
|||
view.showError("网络错误: " + e.getMessage()); |
|||
} catch (ParseException e) { |
|||
logger.error("解析异常 - Source: {}, Element: {}, Message: {}", |
|||
e.getSource(), e.getElement(), e.getMessage()); |
|||
view.showError("解析错误: " + e.getMessage()); |
|||
} catch (Exception e) { |
|||
logger.error("爬取豆瓣电影失败", e); |
|||
view.showError("爬取失败: " + e.getMessage()); |
|||
} finally { |
|||
isRunning.set(false); |
|||
} |
|||
} |
|||
|
|||
public void crawlBaiduHotSearch(int limit) { |
|||
if (isRunning.get()) { |
|||
view.showError("爬虫已经在运行中"); |
|||
return; |
|||
} |
|||
|
|||
isRunning.set(true); |
|||
view.showInfo("=== 开始爬取百度热搜 Top" + limit + " ==="); |
|||
|
|||
try { |
|||
hotSearches = baiduHotSearchSpider.crawlHotSearch(limit); |
|||
view.showHotSearch(hotSearches); |
|||
|
|||
storageService.saveHotSearch(hotSearches); |
|||
logger.info("热搜数据已自动保存"); |
|||
} catch (NetworkException e) { |
|||
logger.error("网络异常 - {}", e.getMessage()); |
|||
view.showError("网络错误: " + e.getMessage()); |
|||
} catch (ParseException e) { |
|||
logger.error("解析异常 - Source: {}, Element: {}, Message: {}", |
|||
e.getSource(), e.getElement(), e.getMessage()); |
|||
view.showError("解析错误: " + e.getMessage()); |
|||
} catch (Exception e) { |
|||
logger.error("爬取百度热搜失败", e); |
|||
view.showError("爬取失败: " + e.getMessage()); |
|||
} finally { |
|||
isRunning.set(false); |
|||
} |
|||
} |
|||
|
|||
public void saveAllData() { |
|||
StringBuilder result = new StringBuilder(); |
|||
String booksPath = storageService.saveBooks(books); |
|||
String moviesPath = storageService.saveMovies(movies); |
|||
String hotSearchPath = storageService.saveHotSearch(hotSearches); |
|||
|
|||
if (booksPath != null) result.append("书籍: ").append(booksPath).append("\n"); |
|||
if (moviesPath != null) result.append("电影: ").append(moviesPath).append("\n"); |
|||
if (hotSearchPath != null) result.append("热搜: ").append(hotSearchPath); |
|||
|
|||
view.showInfo(result.toString()); |
|||
} |
|||
|
|||
public void saveBooks(String filename) { |
|||
String path = storageService.saveBooks(books); |
|||
if (path != null) { |
|||
view.showInfo("书籍数据已保存到: " + path); |
|||
} |
|||
} |
|||
|
|||
public void saveMovies(String filename) { |
|||
String path = storageService.saveMovies(movies); |
|||
if (path != null) { |
|||
view.showInfo("电影数据已保存到: " + path); |
|||
} |
|||
} |
|||
|
|||
public void saveHotSearch(String filename) { |
|||
String path = storageService.saveHotSearch(hotSearches); |
|||
if (path != null) { |
|||
view.showInfo("热搜数据已保存到: " + path); |
|||
} |
|||
} |
|||
|
|||
public void listSavedFiles() { |
|||
File dataDir = new File("data"); |
|||
if (!dataDir.exists()) { |
|||
view.showInfo("数据目录不存在"); |
|||
return; |
|||
} |
|||
|
|||
File[] files = dataDir.listFiles((dir, name) -> name.endsWith(".csv")); |
|||
if (files == null || files.length == 0) { |
|||
view.showInfo("没有已保存的数据文件"); |
|||
return; |
|||
} |
|||
|
|||
StringBuilder sb = new StringBuilder("\n已保存的数据文件:\n"); |
|||
for (File file : files) { |
|||
long size = file.length(); |
|||
String sizeStr = size < 1024 ? size + " B" : |
|||
size < 1024 * 1024 ? (size / 1024) + " KB" : |
|||
(size / 1024 / 1024) + " MB"; |
|||
sb.append(" ").append(file.getName()).append(" (").append(sizeStr).append(")\n"); |
|||
} |
|||
view.showInfo(sb.toString()); |
|||
} |
|||
|
|||
public void startSpider(String url) { |
|||
if (isRunning.get()) { |
|||
view.showError("爬虫已经在运行中"); |
|||
return; |
|||
} |
|||
|
|||
isRunning.set(true); |
|||
pagesCrawled.set(0); |
|||
totalRequests.set(0); |
|||
failedRequests.set(0); |
|||
|
|||
view.showInfo("=== 爬虫启动 ==="); |
|||
logger.info("目标URL: {}", url); |
|||
logger.info("线程数: {}", config.getThreadCount()); |
|||
logger.info("超时时间: {}ms", config.getTimeout()); |
|||
logger.info("开始抓取..."); |
|||
|
|||
performCrawling(url); |
|||
|
|||
view.showInfo("=== 爬虫任务完成 ==="); |
|||
} |
|||
|
|||
private void performCrawling(String url) { |
|||
logger.info("连接服务器: {}", url); |
|||
logger.info("发送HTTP请求..."); |
|||
|
|||
totalRequests.incrementAndGet(); |
|||
pagesCrawled.incrementAndGet(); |
|||
logger.info("接收响应: 200 OK"); |
|||
|
|||
logger.info("解析HTML内容..."); |
|||
logger.info("提取数据..."); |
|||
|
|||
logger.info("保存到数据库..."); |
|||
logger.info("页面已处理: {}", pagesCrawled.get()); |
|||
|
|||
view.showCrawlingResult(url, 10); |
|||
} |
|||
|
|||
public void stopSpider() { |
|||
if (!isRunning.get()) { |
|||
view.showError("爬虫未在运行"); |
|||
return; |
|||
} |
|||
|
|||
isRunning.set(false); |
|||
view.showInfo("正在停止爬虫..."); |
|||
logger.info("保存当前进度..."); |
|||
logger.info("关闭网络连接..."); |
|||
logger.info("释放资源..."); |
|||
view.showInfo("=== 爬虫已停止 ==="); |
|||
} |
|||
|
|||
public void showStatus() { |
|||
double successRate = 0.0; |
|||
if (totalRequests.get() > 0) { |
|||
successRate = (double) (totalRequests.get() - failedRequests.get()) / totalRequests.get() * 100; |
|||
} |
|||
|
|||
int memoryUsage = (int) (Runtime.getRuntime().totalMemory() / 1024 / 1024); |
|||
|
|||
view.showStatus(pagesCrawled.get(), isRunning.get(), successRate, memoryUsage); |
|||
} |
|||
|
|||
public void updateConfig(String key, String value) { |
|||
try { |
|||
switch (key) { |
|||
case "thread.count": |
|||
config.setThreadCount(Integer.parseInt(value)); |
|||
break; |
|||
case "timeout": |
|||
config.setTimeout(Integer.parseInt(value)); |
|||
break; |
|||
case "retry.count": |
|||
config.setRetryCount(Integer.parseInt(value)); |
|||
break; |
|||
case "user.agent": |
|||
config.setUserAgent(value); |
|||
break; |
|||
default: |
|||
view.showError("未知配置项: " + key); |
|||
return; |
|||
} |
|||
view.showConfig(key, value); |
|||
} catch (NumberFormatException e) { |
|||
view.showError("无效的数值: " + value); |
|||
} |
|||
} |
|||
|
|||
public void showConfig() { |
|||
view.showInfo("=== 当前配置 ==="); |
|||
logger.info("线程数: {}", config.getThreadCount()); |
|||
logger.info("超时时间: {}ms", config.getTimeout()); |
|||
logger.info("重试次数: {}", config.getRetryCount()); |
|||
logger.info("User-Agent: {}", config.getUserAgent()); |
|||
logger.info("起始URL: {}", config.getStartUrl()); |
|||
view.showInfo("================"); |
|||
} |
|||
|
|||
public boolean isRunning() { |
|||
return isRunning.get(); |
|||
} |
|||
|
|||
public List<Book> getBooks() { |
|||
return books; |
|||
} |
|||
|
|||
public List<Movie> getMovies() { |
|||
return movies; |
|||
} |
|||
|
|||
public List<HotSearch> getHotSearches() { |
|||
return hotSearches; |
|||
} |
|||
|
|||
public void setBooks(List<Book> books) { |
|||
this.books = books; |
|||
} |
|||
|
|||
public void setMovies(List<Movie> movies) { |
|||
this.movies = movies; |
|||
} |
|||
|
|||
public void setHotSearches(List<HotSearch> hotSearches) { |
|||
this.hotSearches = hotSearches; |
|||
} |
|||
} |
|||
@ -1,115 +0,0 @@ |
|||
package com.spider.core; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.command.Command; |
|||
import com.spider.command.ConfigCommand; |
|||
import com.spider.command.CrawlCommand; |
|||
import com.spider.command.HelpCommand; |
|||
import com.spider.command.ListCommand; |
|||
import com.spider.command.LoadCommand; |
|||
import com.spider.command.SaveCommand; |
|||
import com.spider.command.StartCommand; |
|||
import com.spider.command.StatusCommand; |
|||
import com.spider.command.StopCommand; |
|||
|
|||
public class CommandExecutor { |
|||
private static final Logger logger = LoggerFactory.getLogger(CommandExecutor.class); |
|||
private final Map<String, Command> commands; |
|||
|
|||
public CommandExecutor() { |
|||
commands = new HashMap<>(); |
|||
registerCommands(); |
|||
} |
|||
|
|||
private void registerCommands() { |
|||
commands.put("help", new HelpCommand()); |
|||
commands.put("list", new ListCommand()); |
|||
commands.put("crawl", new CrawlCommand()); |
|||
commands.put("save", new SaveCommand()); |
|||
commands.put("load", new LoadCommand()); |
|||
commands.put("start", new StartCommand()); |
|||
commands.put("stop", new StopCommand()); |
|||
commands.put("status", new StatusCommand()); |
|||
commands.put("config", new ConfigCommand()); |
|||
|
|||
logger.info("已注册 {} 个命令", commands.size()); |
|||
} |
|||
|
|||
public void execute(String input) { |
|||
if (input == null || input.trim().isEmpty()) { |
|||
return; |
|||
} |
|||
|
|||
String[] parts = input.trim().split("\\s+"); |
|||
String commandName = parts[0].toLowerCase(); |
|||
|
|||
if (commandName.equals("exit") || commandName.equals("quit")) { |
|||
logger.info("感谢使用,再见!"); |
|||
System.exit(0); |
|||
} |
|||
|
|||
Command command = commands.get(commandName); |
|||
if (command != null) { |
|||
try { |
|||
command.execute(parts); |
|||
} catch (Exception e) { |
|||
logger.error("执行命令时出错: {}", commandName, e); |
|||
} |
|||
} else { |
|||
logger.error("未知命令: {}", commandName); |
|||
showHelp(); |
|||
} |
|||
} |
|||
|
|||
public void showHelp() { |
|||
logger.info("\n┌─ 可用命令 ───────────────────────────────────────────┐"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【数据爬取】 │"); |
|||
logger.info("│ crawl books [N] 爬取豆瓣读书 Top N (默认50) │"); |
|||
logger.info("│ crawl movies 爬取豆瓣电影 Top250 │"); |
|||
logger.info("│ crawl hotsearch [N] 爬取百度热搜 Top N (默认50) │"); |
|||
logger.info("│ crawl all 爬取所有数据源 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【数据保存】 │"); |
|||
logger.info("│ save books [fname] 保存书籍数据到文件 │"); |
|||
logger.info("│ save movies [fname] 保存电影数据到文件 │"); |
|||
logger.info("│ save hotsearch [fn] 保存热搜数据到文件 │"); |
|||
logger.info("│ save all 保存所有已爬取的数据 │"); |
|||
logger.info("│ save files 列出已保存的文件 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【数据加载】 │"); |
|||
logger.info("│ load books <fname> 从文件加载书籍数据 │"); |
|||
logger.info("│ load movies <fname> 从文件加载电影数据 │"); |
|||
logger.info("│ load hotsearch <fn> 从文件加载热搜数据 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【数据列表】 │"); |
|||
logger.info("│ list books 显示已爬取的书籍 │"); |
|||
logger.info("│ list movies 显示已爬取的电影 │"); |
|||
logger.info("│ list hotsearch 显示已爬取的热搜 │"); |
|||
logger.info("│ list all 显示所有已爬取的数据 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【控制命令】 │"); |
|||
logger.info("│ start [url] 启动爬虫任务 │"); |
|||
logger.info("│ stop 停止爬虫任务 │"); |
|||
logger.info("│ status 查看爬虫运行状态 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【配置命令】 │"); |
|||
logger.info("│ config show 显示当前配置 │"); |
|||
logger.info("│ config set <k> <v> 设置配置项 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 【其他】 │"); |
|||
logger.info("│ help 显示帮助信息 │"); |
|||
logger.info("│ exit 退出程序 │"); |
|||
logger.info("│ │"); |
|||
logger.info("└──────────────────────────────────────────────────────────┘"); |
|||
} |
|||
|
|||
public Map<String, Command> getCommands() { |
|||
return commands; |
|||
} |
|||
} |
|||
@ -1,86 +0,0 @@ |
|||
package com.spider.core; |
|||
|
|||
import java.util.Scanner; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.controller.ControllerFactory; |
|||
import com.spider.view.ConsoleView; |
|||
import com.spider.view.ViewFactory; |
|||
|
|||
public class SpiderRunner { |
|||
private static final Logger logger = LoggerFactory.getLogger(SpiderRunner.class); |
|||
private static CommandExecutor executor; |
|||
|
|||
public static void main(String[] args) { |
|||
ConsoleView view = ViewFactory.createConsoleView(); |
|||
|
|||
if (args.length > 0 && "oneclick".equalsIgnoreCase(args[0])) { |
|||
runOneClickMode(view); |
|||
} else { |
|||
runInteractiveMode(view); |
|||
} |
|||
} |
|||
|
|||
private static void runOneClickMode(ConsoleView view) { |
|||
logger.info("╔══════════════════════════════════════════╗"); |
|||
logger.info("║ Spider 多功能爬虫框架 v2.0 ║"); |
|||
logger.info("║ 一键爬取模式 ║"); |
|||
logger.info("╚══════════════════════════════════════════╝"); |
|||
|
|||
ControllerFactory.initController(); |
|||
|
|||
try { |
|||
view.showInfo("=== 开始一键爬取所有数据 ===\n"); |
|||
|
|||
view.showInfo(">>> 第1步:爬取豆瓣读书 Top50..."); |
|||
ControllerFactory.getSpiderController().crawlDoubanBooks(50); |
|||
|
|||
view.showInfo("\n>>> 第2步:爬取豆瓣电影 Top250..."); |
|||
ControllerFactory.getSpiderController().crawlDoubanMovies(); |
|||
|
|||
view.showInfo("\n>>> 第3步:爬取百度热搜 Top50..."); |
|||
ControllerFactory.getSpiderController().crawlBaiduHotSearch(50); |
|||
|
|||
view.showInfo("\n=== 数据爬取完成,开始保存... ==="); |
|||
ControllerFactory.getSpiderController().saveAllData(); |
|||
|
|||
view.showInfo("\n=== 数据保存完成 ==="); |
|||
view.showInfo("=== 一键爬取执行完毕! ===\n"); |
|||
|
|||
} catch (Exception e) { |
|||
logger.error("一键爬取出错: ", e); |
|||
view.showError("执行失败: " + e.getMessage()); |
|||
} |
|||
|
|||
System.exit(0); |
|||
} |
|||
|
|||
private static void runInteractiveMode(ConsoleView view) { |
|||
view.showWelcome(); |
|||
|
|||
ControllerFactory.initController(); |
|||
|
|||
executor = new CommandExecutor(); |
|||
executor.showHelp(); |
|||
|
|||
Scanner scanner = new Scanner(System.in); |
|||
|
|||
while (true) { |
|||
try { |
|||
System.out.print("\nspider> "); |
|||
String input = scanner.nextLine(); |
|||
|
|||
if (input == null || input.trim().isEmpty()) { |
|||
continue; |
|||
} |
|||
|
|||
executor.execute(input); |
|||
|
|||
} catch (Exception e) { |
|||
logger.error("发生错误: ", e); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
@ -1,41 +0,0 @@ |
|||
package com.spider.exception; |
|||
|
|||
public class DataException extends SpiderException { |
|||
private final String dataType; |
|||
|
|||
public DataException(String message) { |
|||
super("DATA_ERROR", message); |
|||
this.dataType = null; |
|||
} |
|||
|
|||
public DataException(String message, Throwable cause) { |
|||
super("DATA_ERROR", message, cause); |
|||
this.dataType = null; |
|||
} |
|||
|
|||
public DataException(String dataType, String message) { |
|||
super("DATA_ERROR", message); |
|||
this.dataType = dataType; |
|||
} |
|||
|
|||
public DataException(String dataType, String message, Throwable cause) { |
|||
super("DATA_ERROR", message, cause); |
|||
this.dataType = dataType; |
|||
} |
|||
|
|||
public String getDataType() { |
|||
return dataType; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
StringBuilder sb = new StringBuilder("DataException{"); |
|||
sb.append("errorCode='").append(getErrorCode()).append('\''); |
|||
if (dataType != null) { |
|||
sb.append(", dataType='").append(dataType).append('\''); |
|||
} |
|||
sb.append(", message='").append(getMessage()).append('\''); |
|||
sb.append('}'); |
|||
return sb.toString(); |
|||
} |
|||
} |
|||
@ -1,23 +0,0 @@ |
|||
package com.spider.exception; |
|||
|
|||
public class NetworkException extends SpiderException { |
|||
public NetworkException(String message) { |
|||
super("NETWORK_ERROR", message); |
|||
} |
|||
|
|||
public NetworkException(String message, Throwable cause) { |
|||
super("NETWORK_ERROR", message, cause); |
|||
} |
|||
|
|||
public NetworkException(String url, String message, Throwable cause) { |
|||
super("NETWORK_ERROR", "URL: " + url + ", " + message, cause); |
|||
} |
|||
|
|||
public NetworkException(String url, int statusCode) { |
|||
super("NETWORK_ERROR", "URL: " + url + ", HTTP状态码: " + statusCode); |
|||
} |
|||
|
|||
public NetworkException(String url, String message) { |
|||
super("NETWORK_ERROR", "URL: " + url + ", " + message); |
|||
} |
|||
} |
|||
@ -1,53 +0,0 @@ |
|||
package com.spider.exception; |
|||
|
|||
public class ParseException extends SpiderException { |
|||
private final String source; |
|||
private final String element; |
|||
|
|||
public ParseException(String message) { |
|||
super("PARSE_ERROR", message); |
|||
this.source = null; |
|||
this.element = null; |
|||
} |
|||
|
|||
public ParseException(String message, Throwable cause) { |
|||
super("PARSE_ERROR", message, cause); |
|||
this.source = null; |
|||
this.element = null; |
|||
} |
|||
|
|||
public ParseException(String source, String element, String message) { |
|||
super("PARSE_ERROR", message); |
|||
this.source = source; |
|||
this.element = element; |
|||
} |
|||
|
|||
public ParseException(String source, String element, String message, Throwable cause) { |
|||
super("PARSE_ERROR", message, cause); |
|||
this.source = source; |
|||
this.element = element; |
|||
} |
|||
|
|||
public String getSource() { |
|||
return source; |
|||
} |
|||
|
|||
public String getElement() { |
|||
return element; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
StringBuilder sb = new StringBuilder("ParseException{"); |
|||
sb.append("errorCode='").append(getErrorCode()).append('\''); |
|||
if (source != null) { |
|||
sb.append(", source='").append(source).append('\''); |
|||
} |
|||
if (element != null) { |
|||
sb.append(", element='").append(element).append('\''); |
|||
} |
|||
sb.append(", message='").append(getMessage()).append('\''); |
|||
sb.append('}'); |
|||
return sb.toString(); |
|||
} |
|||
} |
|||
@ -1,37 +0,0 @@ |
|||
package com.spider.exception; |
|||
|
|||
public class SpiderException extends RuntimeException { |
|||
private final String errorCode; |
|||
|
|||
public SpiderException(String message) { |
|||
super(message); |
|||
this.errorCode = "SPIDER_ERROR"; |
|||
} |
|||
|
|||
public SpiderException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
this.errorCode = "SPIDER_ERROR"; |
|||
} |
|||
|
|||
public SpiderException(String errorCode, String message) { |
|||
super(message); |
|||
this.errorCode = errorCode; |
|||
} |
|||
|
|||
public SpiderException(String errorCode, String message, Throwable cause) { |
|||
super(message, cause); |
|||
this.errorCode = errorCode; |
|||
} |
|||
|
|||
public String getErrorCode() { |
|||
return errorCode; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "SpiderException{" + |
|||
"errorCode='" + errorCode + '\'' + |
|||
", message='" + getMessage() + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -1,89 +0,0 @@ |
|||
package com.spider.model; |
|||
|
|||
import java.util.Objects; |
|||
|
|||
public class Book implements DataItem { |
|||
private String title; |
|||
private double rating; |
|||
private String author; |
|||
private String summary; |
|||
private int commentCount; |
|||
|
|||
public Book() { |
|||
} |
|||
|
|||
public Book(String title, double rating, String author, String summary, int commentCount) { |
|||
this.title = title; |
|||
this.rating = rating; |
|||
this.author = author; |
|||
this.summary = summary; |
|||
this.commentCount = commentCount; |
|||
} |
|||
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public void setTitle(String title) { |
|||
this.title = title; |
|||
} |
|||
|
|||
public double getRating() { |
|||
return rating; |
|||
} |
|||
|
|||
public void setRating(double rating) { |
|||
this.rating = rating; |
|||
} |
|||
|
|||
public String getAuthor() { |
|||
return author; |
|||
} |
|||
|
|||
public void setAuthor(String author) { |
|||
this.author = author; |
|||
} |
|||
|
|||
public String getSummary() { |
|||
return summary; |
|||
} |
|||
|
|||
public void setSummary(String summary) { |
|||
this.summary = summary; |
|||
} |
|||
|
|||
public int getCommentCount() { |
|||
return commentCount; |
|||
} |
|||
|
|||
public void setCommentCount(int commentCount) { |
|||
this.commentCount = commentCount; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "《" + title + "》" + |
|||
"\n 评分: " + rating + |
|||
"\n 作者: " + author + |
|||
"\n 评价数: " + commentCount + |
|||
"\n 简介: " + summary; |
|||
} |
|||
|
|||
@Override |
|||
public boolean equals(Object o) { |
|||
if (this == o) return true; |
|||
if (o == null || getClass() != o.getClass()) return false; |
|||
Book book = (Book) o; |
|||
return Objects.equals(title, book.title); |
|||
} |
|||
|
|||
@Override |
|||
public int hashCode() { |
|||
return Objects.hash(title); |
|||
} |
|||
|
|||
@Override |
|||
public String getSource() { |
|||
return "豆瓣读书"; |
|||
} |
|||
} |
|||
@ -1,13 +0,0 @@ |
|||
package com.spider.model; |
|||
|
|||
public interface DataItem { |
|||
String getTitle(); |
|||
double getRating(); |
|||
String getSummary(); |
|||
|
|||
default String getDisplayString() { |
|||
return String.format("[%s] %s (评分: %.1f)", getSource(), getTitle(), getRating()); |
|||
} |
|||
|
|||
String getSource(); |
|||
} |
|||
@ -1,70 +0,0 @@ |
|||
package com.spider.model; |
|||
|
|||
import java.util.Objects; |
|||
|
|||
public class HotSearch implements DataItem { |
|||
private int rank; |
|||
private String keyword; |
|||
|
|||
public HotSearch() { |
|||
} |
|||
|
|||
public HotSearch(int rank, String keyword) { |
|||
this.rank = rank; |
|||
this.keyword = keyword; |
|||
} |
|||
|
|||
public int getRank() { |
|||
return rank; |
|||
} |
|||
|
|||
public void setRank(int rank) { |
|||
this.rank = rank; |
|||
} |
|||
|
|||
public String getKeyword() { |
|||
return keyword; |
|||
} |
|||
|
|||
public void setKeyword(String keyword) { |
|||
this.keyword = keyword; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return rank + ". " + keyword; |
|||
} |
|||
|
|||
@Override |
|||
public boolean equals(Object o) { |
|||
if (this == o) return true; |
|||
if (o == null || getClass() != o.getClass()) return false; |
|||
HotSearch that = (HotSearch) o; |
|||
return rank == that.rank && Objects.equals(keyword, that.keyword); |
|||
} |
|||
|
|||
@Override |
|||
public int hashCode() { |
|||
return Objects.hash(rank, keyword); |
|||
} |
|||
|
|||
@Override |
|||
public String getTitle() { |
|||
return keyword; |
|||
} |
|||
|
|||
@Override |
|||
public double getRating() { |
|||
return 0.0; |
|||
} |
|||
|
|||
@Override |
|||
public String getSource() { |
|||
return "百度热搜"; |
|||
} |
|||
|
|||
@Override |
|||
public String getSummary() { |
|||
return ""; |
|||
} |
|||
} |
|||
@ -1,82 +0,0 @@ |
|||
package com.spider.model; |
|||
|
|||
import java.util.Objects; |
|||
|
|||
public class Movie implements DataItem { |
|||
private String title; |
|||
private double rating; |
|||
private String director; |
|||
private int rank; |
|||
|
|||
public Movie() { |
|||
} |
|||
|
|||
public Movie(String title, double rating, String director, int rank) { |
|||
this.title = title; |
|||
this.rating = rating; |
|||
this.director = director; |
|||
this.rank = rank; |
|||
} |
|||
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public void setTitle(String title) { |
|||
this.title = title; |
|||
} |
|||
|
|||
public double getRating() { |
|||
return rating; |
|||
} |
|||
|
|||
public void setRating(double rating) { |
|||
this.rating = rating; |
|||
} |
|||
|
|||
public String getDirector() { |
|||
return director; |
|||
} |
|||
|
|||
public void setDirector(String director) { |
|||
this.director = director; |
|||
} |
|||
|
|||
public int getRank() { |
|||
return rank; |
|||
} |
|||
|
|||
public void setRank(int rank) { |
|||
this.rank = rank; |
|||
} |
|||
|
|||
@Override |
|||
public String getSummary() { |
|||
return ""; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "Top" + rank + " 《" + title + "》" + |
|||
"\n 评分: " + rating + |
|||
"\n 导演: " + director; |
|||
} |
|||
|
|||
@Override |
|||
public boolean equals(Object o) { |
|||
if (this == o) return true; |
|||
if (o == null || getClass() != o.getClass()) return false; |
|||
Movie movie = (Movie) o; |
|||
return Objects.equals(title, movie.title); |
|||
} |
|||
|
|||
@Override |
|||
public int hashCode() { |
|||
return Objects.hash(title); |
|||
} |
|||
|
|||
@Override |
|||
public String getSource() { |
|||
return "豆瓣电影"; |
|||
} |
|||
} |
|||
@ -1,60 +0,0 @@ |
|||
package com.spider.model; |
|||
|
|||
public class SpiderConfig { |
|||
private int threadCount = 5; |
|||
private int timeout = 30000; |
|||
private int retryCount = 3; |
|||
private String userAgent = "Mozilla/5.0"; |
|||
private String startUrl; |
|||
|
|||
public int getThreadCount() { |
|||
return threadCount; |
|||
} |
|||
|
|||
public void setThreadCount(int threadCount) { |
|||
this.threadCount = threadCount; |
|||
} |
|||
|
|||
public int getTimeout() { |
|||
return timeout; |
|||
} |
|||
|
|||
public void setTimeout(int timeout) { |
|||
this.timeout = timeout; |
|||
} |
|||
|
|||
public int getRetryCount() { |
|||
return retryCount; |
|||
} |
|||
|
|||
public void setRetryCount(int retryCount) { |
|||
this.retryCount = retryCount; |
|||
} |
|||
|
|||
public String getUserAgent() { |
|||
return userAgent; |
|||
} |
|||
|
|||
public void setUserAgent(String userAgent) { |
|||
this.userAgent = userAgent; |
|||
} |
|||
|
|||
public String getStartUrl() { |
|||
return startUrl; |
|||
} |
|||
|
|||
public void setStartUrl(String startUrl) { |
|||
this.startUrl = startUrl; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "SpiderConfig{" + |
|||
"threadCount=" + threadCount + |
|||
", timeout=" + timeout + |
|||
", retryCount=" + retryCount + |
|||
", userAgent='" + userAgent + '\'' + |
|||
", startUrl='" + startUrl + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -1,170 +0,0 @@ |
|||
package com.spider.repository; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.model.Book; |
|||
import com.spider.model.HotSearch; |
|||
import com.spider.model.Movie; |
|||
|
|||
public class ArticleRepository { |
|||
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); |
|||
|
|||
private final List<Book> books; |
|||
private final List<Movie> movies; |
|||
private final List<HotSearch> hotSearches; |
|||
|
|||
public ArticleRepository() { |
|||
this.books = new ArrayList<>(); |
|||
this.movies = new ArrayList<>(); |
|||
this.hotSearches = new ArrayList<>(); |
|||
} |
|||
|
|||
public void addBook(Book book) { |
|||
if (book == null) { |
|||
logger.warn("添加书籍失败:书籍对象为空"); |
|||
throw new IllegalArgumentException("书籍对象不能为空"); |
|||
} |
|||
|
|||
if (book.getTitle() == null || book.getTitle().trim().isEmpty()) { |
|||
logger.warn("添加书籍失败:书名不能为空"); |
|||
throw new IllegalArgumentException("书名不能为空"); |
|||
} |
|||
|
|||
if (books.contains(book)) { |
|||
logger.debug("书籍已存在,跳过添加: {}", book.getTitle()); |
|||
return; |
|||
} |
|||
|
|||
books.add(book); |
|||
logger.debug("成功添加书籍: {}", book.getTitle()); |
|||
} |
|||
|
|||
public void addBooks(List<Book> bookList) { |
|||
if (bookList == null || bookList.isEmpty()) { |
|||
logger.warn("添加书籍列表失败:列表为空"); |
|||
throw new IllegalArgumentException("书籍列表不能为空"); |
|||
} |
|||
|
|||
int added = 0; |
|||
for (Book book : bookList) { |
|||
try { |
|||
addBook(book); |
|||
added++; |
|||
} catch (Exception e) { |
|||
logger.warn("添加书籍失败: {}", e.getMessage()); |
|||
} |
|||
} |
|||
logger.info("批量添加书籍完成,成功添加 {} 本", added); |
|||
} |
|||
|
|||
public void addMovie(Movie movie) { |
|||
if (movie == null) { |
|||
logger.warn("添加电影失败:电影对象为空"); |
|||
throw new IllegalArgumentException("电影对象不能为空"); |
|||
} |
|||
|
|||
if (movie.getTitle() == null || movie.getTitle().trim().isEmpty()) { |
|||
logger.warn("添加电影失败:电影名不能为空"); |
|||
throw new IllegalArgumentException("电影名不能为空"); |
|||
} |
|||
|
|||
if (movies.contains(movie)) { |
|||
logger.debug("电影已存在,跳过添加: {}", movie.getTitle()); |
|||
return; |
|||
} |
|||
|
|||
movies.add(movie); |
|||
logger.debug("成功添加电影: {}", movie.getTitle()); |
|||
} |
|||
|
|||
public void addMovies(List<Movie> movieList) { |
|||
if (movieList == null || movieList.isEmpty()) { |
|||
logger.warn("添加电影列表失败:列表为空"); |
|||
throw new IllegalArgumentException("电影列表不能为空"); |
|||
} |
|||
|
|||
int added = 0; |
|||
for (Movie movie : movieList) { |
|||
try { |
|||
addMovie(movie); |
|||
added++; |
|||
} catch (Exception e) { |
|||
logger.warn("添加电影失败: {}", e.getMessage()); |
|||
} |
|||
} |
|||
logger.info("批量添加电影完成,成功添加 {} 部", added); |
|||
} |
|||
|
|||
public void addHotSearch(HotSearch hotSearch) { |
|||
if (hotSearch == null) { |
|||
logger.warn("添加热搜失败:热搜对象为空"); |
|||
throw new IllegalArgumentException("热搜对象不能为空"); |
|||
} |
|||
|
|||
if (hotSearch.getKeyword() == null || hotSearch.getKeyword().trim().isEmpty()) { |
|||
logger.warn("添加热搜失败:关键词不能为空"); |
|||
throw new IllegalArgumentException("关键词不能为空"); |
|||
} |
|||
|
|||
if (hotSearches.contains(hotSearch)) { |
|||
logger.debug("热搜已存在,跳过添加: {}", hotSearch.getKeyword()); |
|||
return; |
|||
} |
|||
|
|||
hotSearches.add(hotSearch); |
|||
logger.debug("成功添加热搜: {}", hotSearch.getKeyword()); |
|||
} |
|||
|
|||
public void addHotSearches(List<HotSearch> hotSearchList) { |
|||
if (hotSearchList == null || hotSearchList.isEmpty()) { |
|||
logger.warn("添加热搜列表失败:列表为空"); |
|||
throw new IllegalArgumentException("热搜列表不能为空"); |
|||
} |
|||
|
|||
int added = 0; |
|||
for (HotSearch hotSearch : hotSearchList) { |
|||
try { |
|||
addHotSearch(hotSearch); |
|||
added++; |
|||
} catch (Exception e) { |
|||
logger.warn("添加热搜失败: {}", e.getMessage()); |
|||
} |
|||
} |
|||
logger.info("批量添加热搜完成,成功添加 {} 条", added); |
|||
} |
|||
|
|||
public List<Book> getBooks() { |
|||
return new ArrayList<>(books); |
|||
} |
|||
|
|||
public List<Movie> getMovies() { |
|||
return new ArrayList<>(movies); |
|||
} |
|||
|
|||
public List<HotSearch> getHotSearches() { |
|||
return new ArrayList<>(hotSearches); |
|||
} |
|||
|
|||
public int getBookCount() { |
|||
return books.size(); |
|||
} |
|||
|
|||
public int getMovieCount() { |
|||
return movies.size(); |
|||
} |
|||
|
|||
public int getHotSearchCount() { |
|||
return hotSearches.size(); |
|||
} |
|||
|
|||
public void clearAll() { |
|||
books.clear(); |
|||
movies.clear(); |
|||
hotSearches.clear(); |
|||
logger.info("仓库已清空"); |
|||
} |
|||
} |
|||
@ -1,49 +0,0 @@ |
|||
package com.spider.service; |
|||
|
|||
import java.util.function.Supplier; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.exception.NetworkException; |
|||
import com.spider.exception.ParseException; |
|||
import com.spider.utils.HttpClientUtil; |
|||
import com.spider.utils.RetryUtils; |
|||
|
|||
public abstract class AbstractSpider<T> implements Spider<T> { |
|||
protected final Logger logger = LoggerFactory.getLogger(getClass()); |
|||
|
|||
protected Document fetchDocument(String url) { |
|||
return RetryUtils.executeWithRetry(() -> { |
|||
try { |
|||
String html = HttpClientUtil.fetchHtml(url); |
|||
if (html == null || html.isEmpty()) { |
|||
throw new NetworkException("NETWORK_ERROR", "获取页面内容为空: " + url); |
|||
} |
|||
return Jsoup.parse(html); |
|||
} catch (NetworkException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new NetworkException("NETWORK_ERROR", "网络请求失败: " + e.getMessage(), e); |
|||
} |
|||
}, 3); |
|||
} |
|||
|
|||
protected <R> R executeWithRetry(Supplier<R> operation, int maxRetries) { |
|||
return RetryUtils.executeWithRetry(operation, maxRetries); |
|||
} |
|||
|
|||
protected void validateData(T data) throws ParseException { |
|||
if (data == null) { |
|||
throw new ParseException(getSourceName(), "validateData", "爬取数据为空"); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public abstract String getSourceName(); |
|||
|
|||
@Override |
|||
public abstract int getDefaultLimit(); |
|||
} |
|||
@ -1,125 +0,0 @@ |
|||
package com.spider.service; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.exception.ParseException; |
|||
import com.spider.model.HotSearch; |
|||
import com.spider.utils.HttpClientUtil; |
|||
|
|||
public class BaiduHotSearchSpider extends AbstractSpider<HotSearch> implements Spider<HotSearch> { |
|||
private static final Logger logger = LoggerFactory.getLogger(BaiduHotSearchSpider.class); |
|||
private static final String HOT_SEARCH_URL = "https://top.baidu.com/board?tab=realtime"; |
|||
private static final int DEFAULT_LIMIT = 50; |
|||
|
|||
private List<HotSearch> hotSearches; |
|||
|
|||
public BaiduHotSearchSpider() { |
|||
super(); |
|||
this.hotSearches = new ArrayList<>(); |
|||
} |
|||
|
|||
@Override |
|||
public String getSourceName() { |
|||
return "百度热搜"; |
|||
} |
|||
|
|||
@Override |
|||
public int getDefaultLimit() { |
|||
return DEFAULT_LIMIT; |
|||
} |
|||
|
|||
public List<HotSearch> crawlHotSearch(int limit) { |
|||
hotSearches.clear(); |
|||
logger.info("开始爬取百度实时热搜榜前 {} 条...", limit); |
|||
|
|||
try { |
|||
logger.info("正在抓取: {}", HOT_SEARCH_URL); |
|||
|
|||
String html = HttpClientUtil.fetchHtml(HOT_SEARCH_URL); |
|||
if (html == null || html.isEmpty()) { |
|||
logger.error("获取热搜页面失败"); |
|||
return hotSearches; |
|||
} |
|||
|
|||
Document doc = Jsoup.parse(html); |
|||
|
|||
Elements hotItems = doc.select("div.c-single-text-ellipsis"); |
|||
|
|||
if (hotItems.isEmpty()) { |
|||
logger.warn("未能解析到热搜数据,尝试备用选择器..."); |
|||
hotItems = doc.select(".list-item .topic-title"); |
|||
} |
|||
|
|||
if (hotItems.isEmpty()) { |
|||
throw new ParseException("百度热搜", "hotItems", "无法找到热搜数据元素"); |
|||
} |
|||
|
|||
int count = 0; |
|||
for (int i = 0; i < Math.min(hotItems.size(), limit); i++) { |
|||
HotSearch hotSearch = new HotSearch(); |
|||
|
|||
hotSearch.setRank(i + 1); |
|||
|
|||
Element titleElement = hotItems.get(i); |
|||
String keyword = titleElement.text().trim(); |
|||
|
|||
if (keyword.isEmpty()) { |
|||
continue; |
|||
} |
|||
|
|||
hotSearch.setKeyword(keyword); |
|||
|
|||
hotSearches.add(hotSearch); |
|||
count++; |
|||
|
|||
if (count >= limit) break; |
|||
|
|||
logger.debug("已抓取热搜 #{}: {}", hotSearch.getRank(), hotSearch.getKeyword()); |
|||
} |
|||
|
|||
logger.info("百度热搜爬取完成,共获取 {} 条热搜", hotSearches.size()); |
|||
|
|||
} catch (Exception e) { |
|||
logger.error("爬取百度热搜时出错", e); |
|||
throw e; |
|||
} |
|||
|
|||
return new ArrayList<>(hotSearches); |
|||
} |
|||
|
|||
public List<HotSearch> getHotSearches() { |
|||
return hotSearches; |
|||
} |
|||
|
|||
public String exportToJson() { |
|||
StringBuilder json = new StringBuilder(); |
|||
json.append("[\n"); |
|||
for (HotSearch hotSearch : hotSearches) { |
|||
json.append(" {\n"); |
|||
json.append(" \"rank\": ").append(hotSearch.getRank()).append(",\n"); |
|||
json.append(" \"keyword\": \"").append(escapeJson(hotSearch.getKeyword())).append("\"\n"); |
|||
json.append(" },\n"); |
|||
} |
|||
if (!hotSearches.isEmpty()) { |
|||
json.setLength(json.length() - 2); |
|||
} |
|||
json.append("\n]"); |
|||
return json.toString(); |
|||
} |
|||
|
|||
private String escapeJson(String str) { |
|||
if (str == null) return ""; |
|||
return str.replace("\\", "\\\\") |
|||
.replace("\"", "\\\"") |
|||
.replace("\n", "\\n") |
|||
.replace("\r", "\\r"); |
|||
} |
|||
} |
|||
@ -1,151 +0,0 @@ |
|||
package com.spider.service; |
|||
|
|||
import java.io.File; |
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.nio.file.Files; |
|||
import java.nio.file.Path; |
|||
import java.nio.file.Paths; |
|||
import java.util.List; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.model.Book; |
|||
import com.spider.model.HotSearch; |
|||
import com.spider.model.Movie; |
|||
|
|||
public class DataStorageService { |
|||
private static final Logger logger = LoggerFactory.getLogger(DataStorageService.class); |
|||
|
|||
private static final String DATA_DIR = "data"; |
|||
private static final String BOOKS_FILE = "books.csv"; |
|||
private static final String MOVIES_FILE = "movies.csv"; |
|||
private static final String HOTSEARCH_FILE = "hotsearch.csv"; |
|||
|
|||
public DataStorageService() { |
|||
createDataDirectory(); |
|||
} |
|||
|
|||
private void createDataDirectory() { |
|||
Path path = Paths.get(DATA_DIR); |
|||
if (!Files.exists(path)) { |
|||
try { |
|||
Files.createDirectories(path); |
|||
logger.info("创建数据目录: {}", DATA_DIR); |
|||
} catch (IOException e) { |
|||
logger.error("创建数据目录失败", e); |
|||
} |
|||
} |
|||
} |
|||
|
|||
public String saveBooks(List<Book> books) { |
|||
if (books == null || books.isEmpty()) { |
|||
logger.warn("没有书籍数据可保存"); |
|||
return null; |
|||
} |
|||
|
|||
String fullPath = getFilePath(BOOKS_FILE); |
|||
try { |
|||
saveBooksToCsv(books, fullPath); |
|||
logger.info("书籍数据已保存到: {}", fullPath); |
|||
return fullPath; |
|||
} catch (IOException e) { |
|||
logger.error("保存书籍数据失败", e); |
|||
return null; |
|||
} |
|||
} |
|||
|
|||
public String saveMovies(List<Movie> movies) { |
|||
if (movies == null || movies.isEmpty()) { |
|||
logger.warn("没有电影数据可保存"); |
|||
return null; |
|||
} |
|||
|
|||
String fullPath = getFilePath(MOVIES_FILE); |
|||
try { |
|||
saveMoviesToCsv(movies, fullPath); |
|||
logger.info("电影数据已保存到: {}", fullPath); |
|||
return fullPath; |
|||
} catch (IOException e) { |
|||
logger.error("保存电影数据失败", e); |
|||
return null; |
|||
} |
|||
} |
|||
|
|||
public String saveHotSearch(List<HotSearch> hotSearches) { |
|||
if (hotSearches == null || hotSearches.isEmpty()) { |
|||
logger.warn("没有热搜数据可保存"); |
|||
return null; |
|||
} |
|||
|
|||
String fullPath = getFilePath(HOTSEARCH_FILE); |
|||
try { |
|||
saveHotSearchToCsv(hotSearches, fullPath); |
|||
logger.info("热搜数据已保存到: {}", fullPath); |
|||
return fullPath; |
|||
} catch (IOException e) { |
|||
logger.error("保存热搜数据失败", e); |
|||
return null; |
|||
} |
|||
} |
|||
|
|||
private void saveBooksToCsv(List<Book> books, String fullPath) throws IOException { |
|||
try (FileWriter writer = new FileWriter(fullPath)) { |
|||
writer.write("\uFEFF"); |
|||
writer.write("书名,评分,作者,简介,评价数\n"); |
|||
for (Book book : books) { |
|||
writer.write(escapeCsv(book.getTitle())); |
|||
writer.write(","); |
|||
writer.write(String.valueOf(book.getRating())); |
|||
writer.write(","); |
|||
writer.write(escapeCsv(book.getAuthor())); |
|||
writer.write(","); |
|||
writer.write(escapeCsv(book.getSummary())); |
|||
writer.write(","); |
|||
writer.write(String.valueOf(book.getCommentCount())); |
|||
writer.write("\n"); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private void saveMoviesToCsv(List<Movie> movies, String fullPath) throws IOException { |
|||
try (FileWriter writer = new FileWriter(fullPath)) { |
|||
writer.write("\uFEFF"); |
|||
writer.write("电影名,评分,导演\n"); |
|||
for (Movie movie : movies) { |
|||
writer.write(escapeCsv(movie.getTitle())); |
|||
writer.write(","); |
|||
writer.write(String.valueOf(movie.getRating())); |
|||
writer.write(","); |
|||
writer.write(escapeCsv(movie.getDirector())); |
|||
writer.write("\n"); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private void saveHotSearchToCsv(List<HotSearch> hotSearches, String fullPath) throws IOException { |
|||
try (FileWriter writer = new FileWriter(fullPath)) { |
|||
writer.write("\uFEFF"); |
|||
writer.write("排名,关键词\n"); |
|||
for (HotSearch hs : hotSearches) { |
|||
writer.write(String.valueOf(hs.getRank())); |
|||
writer.write(","); |
|||
writer.write(escapeCsv(hs.getKeyword())); |
|||
writer.write("\n"); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private String escapeCsv(String value) { |
|||
if (value == null) return ""; |
|||
if (value.contains(",") || value.contains("\"") || value.contains("\n") || value.contains("\r")) { |
|||
return "\"" + value.replace("\"", "\"\"") + "\""; |
|||
} |
|||
return value; |
|||
} |
|||
|
|||
private String getFilePath(String filename) { |
|||
return DATA_DIR + File.separator + filename; |
|||
} |
|||
} |
|||
@ -1,312 +0,0 @@ |
|||
package com.spider.service; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.model.Book; |
|||
import com.spider.utils.HttpClientUtil; |
|||
|
|||
public class DoubanBookSpider extends AbstractSpider<Book> implements Spider<Book> { |
|||
private static final Logger logger = LoggerFactory.getLogger(DoubanBookSpider.class); |
|||
private static final String HOT_BOOKS_URL = "https://book.douban.com/chart?sub_type=1"; |
|||
private static final int DEFAULT_LIMIT = 50; |
|||
|
|||
private static final Pattern RATING_PATTERN = Pattern.compile("([\\d.]+)\\s*\\(\\s*([\\d.,万]+)\\s*人评价\\s*\\)"); |
|||
private static final Pattern RATING_SIMPLE = Pattern.compile("(\\d+\\.\\d+)\\s*\\(\\s*\\d+\\s*人评价\\s*\\)"); |
|||
|
|||
private List<Book> books; |
|||
|
|||
public DoubanBookSpider() { |
|||
super(); |
|||
this.books = new ArrayList<>(); |
|||
} |
|||
|
|||
@Override |
|||
public String getSourceName() { |
|||
return "豆瓣读书"; |
|||
} |
|||
|
|||
@Override |
|||
public int getDefaultLimit() { |
|||
return DEFAULT_LIMIT; |
|||
} |
|||
|
|||
public List<Book> crawlHotBooks(int limit) { |
|||
books.clear(); |
|||
logger.info("开始爬取豆瓣读书热度最高的 {} 本书...", limit); |
|||
|
|||
try { |
|||
int page = 1; |
|||
int fetched = 0; |
|||
|
|||
while (fetched < limit) { |
|||
String url = page == 1 ? HOT_BOOKS_URL : HOT_BOOKS_URL + "&page=" + page; |
|||
logger.info("正在抓取第 {} 页: {}", page, url); |
|||
|
|||
String html = HttpClientUtil.fetchHtml(url); |
|||
if (html == null || html.isEmpty()) { |
|||
logger.warn("第 {} 页获取为空", page); |
|||
break; |
|||
} |
|||
|
|||
Document doc = Jsoup.parse(html); |
|||
|
|||
Elements headings = doc.select("h2"); |
|||
logger.info("找到 {} 个 h2 标签", headings.size()); |
|||
|
|||
for (Element h2 : headings) { |
|||
if (fetched >= limit) break; |
|||
|
|||
Element link = h2.selectFirst("a[href*='/subject/']"); |
|||
if (link == null) continue; |
|||
|
|||
String title = link.text().trim(); |
|||
if (title.isEmpty() || title.length() < 2 || title.contains("豆瓣") || title.contains("登录")) { |
|||
continue; |
|||
} |
|||
|
|||
Book book = new Book(); |
|||
book.setTitle(title); |
|||
|
|||
Element parent = h2.parent(); |
|||
if (parent != null) { |
|||
String parentText = parent.text(); |
|||
String author = extractAuthor(parentText, title); |
|||
book.setAuthor(author); |
|||
|
|||
double rating = extractRatingValue(parentText); |
|||
book.setRating(rating); |
|||
|
|||
int commentCount = extractCommentCount(parentText); |
|||
book.setCommentCount(commentCount); |
|||
|
|||
String summary = extractSummary(parentText); |
|||
book.setSummary(summary); |
|||
} |
|||
|
|||
books.add(book); |
|||
fetched++; |
|||
logger.debug("已抓取 {}: {} | 作者: {} | 评分: {} | 评价数: {}", |
|||
fetched, title, book.getAuthor(), book.getRating(), book.getCommentCount()); |
|||
} |
|||
|
|||
if (fetched == 0) { |
|||
logger.warn("h2选择器没有找到书籍,尝试其他方法..."); |
|||
Elements subjectItems = doc.select(".subject-item, .book-item, li, div[class*=item]"); |
|||
logger.info("找到 {} 个可能的项目元素", subjectItems.size()); |
|||
|
|||
for (Element item : subjectItems) { |
|||
if (fetched >= limit) break; |
|||
|
|||
Element link = item.selectFirst("a[href*='/subject/']"); |
|||
if (link == null) continue; |
|||
|
|||
String title = link.text().trim(); |
|||
if (title.isEmpty() || title.length() < 2 || title.contains("豆瓣") || title.contains("登录")) { |
|||
continue; |
|||
} |
|||
|
|||
Book book = new Book(); |
|||
book.setTitle(title); |
|||
|
|||
String text = item.text(); |
|||
book.setAuthor(extractAuthor(text, title)); |
|||
book.setRating(extractRatingValue(text)); |
|||
book.setCommentCount(extractCommentCount(text)); |
|||
book.setSummary(extractSummary(text)); |
|||
|
|||
books.add(book); |
|||
fetched++; |
|||
} |
|||
} |
|||
|
|||
page++; |
|||
if (page > 5) break; |
|||
} |
|||
|
|||
logger.info("豆瓣读书爬取完成,共获取 {} 本书", books.size()); |
|||
|
|||
} catch (Exception e) { |
|||
logger.error("爬取豆瓣读书时出错", e); |
|||
throw e; |
|||
} |
|||
|
|||
return new ArrayList<>(books); |
|||
} |
|||
|
|||
private String extractAuthor(String text, String title) { |
|||
if (text == null || text.isEmpty()) { |
|||
return "未知作者"; |
|||
} |
|||
|
|||
String[] parts = text.split("\n"); |
|||
for (String part : parts) { |
|||
part = part.trim(); |
|||
if (part.contains("/") && !part.contains("评价") && !part.contains("元/") && !part.matches("\\d.*")) { |
|||
String[] subParts = part.split("/"); |
|||
if (subParts.length > 0) { |
|||
String author = subParts[0].trim() |
|||
.replace("著", "").replace("译", "").replace("选", "").replace("主编", "") |
|||
.replace(title, "").trim(); |
|||
if (author.length() > 0 && author.length() < 40 && !author.matches("\\d.*")) { |
|||
return author; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
Matcher matcher = Pattern.compile("(.*?)\\s*/\\s*\\d{4}").matcher(text); |
|||
if (matcher.find()) { |
|||
String author = matcher.group(1).trim(); |
|||
author = author.replace("著", "").replace("译", "").replace("选", "").replace("主编", "") |
|||
.replace(title, "").trim(); |
|||
if (author.length() < 50 && !author.matches("\\d.*")) { |
|||
return author; |
|||
} |
|||
} |
|||
|
|||
return "未知作者"; |
|||
} |
|||
|
|||
private String extractRating(String text) { |
|||
if (text == null) return null; |
|||
|
|||
Matcher matcher = RATING_PATTERN.matcher(text); |
|||
if (matcher.find()) { |
|||
return matcher.group(1); |
|||
} |
|||
|
|||
Matcher simpleMatcher = RATING_SIMPLE.matcher(text); |
|||
if (simpleMatcher.find()) { |
|||
return simpleMatcher.group(1); |
|||
} |
|||
|
|||
Matcher m2 = Pattern.compile("(\\d+\\.\\d+)\\(").matcher(text); |
|||
if (m2.find()) { |
|||
return m2.group(1); |
|||
} |
|||
|
|||
return null; |
|||
} |
|||
|
|||
private double extractRatingValue(String text) { |
|||
String rating = extractRating(text); |
|||
if (rating != null) { |
|||
try { |
|||
return Double.parseDouble(rating); |
|||
} catch (NumberFormatException e) { |
|||
return 0.0; |
|||
} |
|||
} |
|||
return 0.0; |
|||
} |
|||
|
|||
private int extractCommentCount(String text) { |
|||
if (text == null) return 0; |
|||
|
|||
Matcher matcher = RATING_PATTERN.matcher(text); |
|||
if (matcher.find()) { |
|||
String countStr = matcher.group(2); |
|||
return parseCount(countStr); |
|||
} |
|||
|
|||
Matcher m2 = Pattern.compile("\\(\\s*([\\d.,万]+)\\s*人评价\\s*\\)").matcher(text); |
|||
if (m2.find()) { |
|||
return parseCount(m2.group(1)); |
|||
} |
|||
|
|||
Matcher m3 = Pattern.compile("\\((\\d+)\\s*人评价\\)").matcher(text); |
|||
if (m3.find()) { |
|||
return parseCount(m3.group(1)); |
|||
} |
|||
|
|||
return 0; |
|||
} |
|||
|
|||
private int parseCount(String countStr) { |
|||
if (countStr == null || countStr.isEmpty()) return 0; |
|||
try { |
|||
countStr = countStr.replace(",", "").replace(" ", ""); |
|||
if (countStr.contains("万")) { |
|||
return (int) (Double.parseDouble(countStr.replace("万", "")) * 10000); |
|||
} |
|||
return Integer.parseInt(countStr); |
|||
} catch (NumberFormatException e) { |
|||
return 0; |
|||
} |
|||
} |
|||
|
|||
private String extractSummary(String text) { |
|||
if (text == null || text.isEmpty()) { |
|||
return ""; |
|||
} |
|||
|
|||
String[] parts = text.split("\\d\\.\\d"); |
|||
if (parts.length > 1) { |
|||
String summary = parts[1].trim(); |
|||
summary = summary.replaceAll("\\d+\\s*人评价.*", "") |
|||
.replaceAll("连续上榜[\\d个月]*", "") |
|||
.replaceAll("\\[.*?\\]", "") |
|||
.replaceAll("\\(.*?\\)", "") |
|||
.replaceAll("元.*?精装", "精装") |
|||
.replaceAll("元.*?平装", "平装") |
|||
.replaceAll("元", "") |
|||
.trim(); |
|||
if (summary.length() > 100) { |
|||
summary = summary.substring(0, 100) + "..."; |
|||
} |
|||
if (summary.length() > 5) { |
|||
return summary; |
|||
} |
|||
} |
|||
|
|||
Pattern tagPattern = Pattern.compile("(社会纪实|小说|文学|历史|文化|科学|经管|绘本|漫画|科学新知|商业经管|绘本漫画|历史文化)"); |
|||
Matcher matcher = tagPattern.matcher(text); |
|||
if (matcher.find()) { |
|||
return matcher.group(1); |
|||
} |
|||
|
|||
return ""; |
|||
} |
|||
|
|||
public List<Book> getBooks() { |
|||
return books; |
|||
} |
|||
|
|||
public String exportToJson() { |
|||
StringBuilder json = new StringBuilder(); |
|||
json.append("[\n"); |
|||
for (int i = 0; i < books.size(); i++) { |
|||
Book book = books.get(i); |
|||
json.append(" {\n"); |
|||
json.append(" \"rank\": ").append(i + 1).append(",\n"); |
|||
json.append(" \"title\": \"").append(escapeJson(book.getTitle())).append("\",\n"); |
|||
json.append(" \"rating\": ").append(book.getRating()).append(",\n"); |
|||
json.append(" \"author\": \"").append(escapeJson(book.getAuthor())).append("\",\n"); |
|||
json.append(" \"commentCount\": ").append(book.getCommentCount()).append(",\n"); |
|||
json.append(" \"summary\": \"").append(escapeJson(book.getSummary())).append("\"\n"); |
|||
json.append(" }"); |
|||
if (i < books.size() - 1) json.append(","); |
|||
json.append("\n"); |
|||
} |
|||
json.append("]"); |
|||
return json.toString(); |
|||
} |
|||
|
|||
private String escapeJson(String str) { |
|||
if (str == null) return ""; |
|||
return str.replace("\\", "\\\\") |
|||
.replace("\"", "\\\"") |
|||
.replace("\n", "\\n") |
|||
.replace("\r", "\\r"); |
|||
} |
|||
} |
|||
@ -1,158 +0,0 @@ |
|||
package com.spider.service; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.exception.ParseException; |
|||
import com.spider.model.Movie; |
|||
import com.spider.utils.HttpClientUtil; |
|||
|
|||
public class DoubanMovieSpider extends AbstractSpider<Movie> implements Spider<Movie> { |
|||
private static final Logger logger = LoggerFactory.getLogger(DoubanMovieSpider.class); |
|||
private static final String TOP250_URL = "https://movie.douban.com/top250"; |
|||
private static final int DEFAULT_LIMIT = 250; |
|||
|
|||
private List<Movie> movies; |
|||
|
|||
public DoubanMovieSpider() { |
|||
super(); |
|||
this.movies = new ArrayList<>(); |
|||
} |
|||
|
|||
@Override |
|||
public String getSourceName() { |
|||
return "豆瓣电影"; |
|||
} |
|||
|
|||
@Override |
|||
public int getDefaultLimit() { |
|||
return DEFAULT_LIMIT; |
|||
} |
|||
|
|||
public List<Movie> crawlTop250() { |
|||
movies.clear(); |
|||
logger.info("开始爬取豆瓣电影Top250..."); |
|||
|
|||
try { |
|||
int page = 0; |
|||
int rank = 1; |
|||
|
|||
while (page < 10) { |
|||
String url = page == 0 ? TOP250_URL : TOP250_URL + "?start=" + (page * 25); |
|||
logger.info("正在抓取第 {} 页 ({}): {}", page + 1, rank, url); |
|||
|
|||
String html = HttpClientUtil.fetchHtml(url); |
|||
if (html == null || html.isEmpty()) { |
|||
logger.warn("第 {} 页获取为空", page + 1); |
|||
break; |
|||
} |
|||
|
|||
Document doc = Jsoup.parse(html); |
|||
Elements movieItems = doc.select("div.item"); |
|||
|
|||
if (movieItems.isEmpty()) { |
|||
logger.info("第 {} 页没有更多电影", page + 1); |
|||
break; |
|||
} |
|||
|
|||
for (Element item : movieItems) { |
|||
Movie movie = parseMovieItem(item, rank); |
|||
if (movie != null && movie.getTitle() != null && !movie.getTitle().isEmpty()) { |
|||
movies.add(movie); |
|||
rank++; |
|||
logger.debug("已抓取 Top{}: {}", rank - 1, movie.getTitle()); |
|||
} |
|||
} |
|||
|
|||
page++; |
|||
} |
|||
|
|||
logger.info("豆瓣电影爬取完成,共获取 {} 部电影", movies.size()); |
|||
|
|||
} catch (Exception e) { |
|||
logger.error("爬取豆瓣电影时出错", e); |
|||
throw e; |
|||
} |
|||
|
|||
return new ArrayList<>(movies); |
|||
} |
|||
|
|||
private Movie parseMovieItem(Element item, int rank) { |
|||
try { |
|||
Movie movie = new Movie(); |
|||
movie.setRank(rank); |
|||
|
|||
Element titleElement = item.selectFirst("div.hd a span:nth-child(1)"); |
|||
if (titleElement != null) { |
|||
movie.setTitle(titleElement.text().trim()); |
|||
} else { |
|||
throw new ParseException("豆瓣电影", "title", "无法解析电影标题"); |
|||
} |
|||
|
|||
Element ratingElement = item.selectFirst("span.rating_num"); |
|||
if (ratingElement != null) { |
|||
try { |
|||
String ratingStr = ratingElement.text().trim(); |
|||
movie.setRating(Double.parseDouble(ratingStr)); |
|||
} catch (NumberFormatException e) { |
|||
throw new ParseException("豆瓣电影", "rating", "评分格式错误: " + ratingElement.text(), e); |
|||
} |
|||
} |
|||
|
|||
Element directorElement = item.selectFirst("div.bd p:nth-child(1)"); |
|||
if (directorElement != null) { |
|||
String info = directorElement.text().trim(); |
|||
if (info.contains("导演:")) { |
|||
int directorStart = info.indexOf("导演:") + 3; |
|||
int directorEnd = info.indexOf("主"); |
|||
if (directorEnd > directorStart) { |
|||
String director = info.substring(directorStart, directorEnd).trim(); |
|||
director = director.split("/")[0].trim(); |
|||
movie.setDirector(director); |
|||
} |
|||
} |
|||
} |
|||
|
|||
return movie; |
|||
|
|||
} catch (ParseException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new ParseException("豆瓣电影", "movieItem", "解析电影信息时出错", e); |
|||
} |
|||
} |
|||
|
|||
public List<Movie> getMovies() { |
|||
return movies; |
|||
} |
|||
|
|||
public String exportToJson() { |
|||
StringBuilder json = new StringBuilder(); |
|||
json.append("[\n"); |
|||
for (Movie movie : movies) { |
|||
json.append(" {\n"); |
|||
json.append(" \"rank\": ").append(movie.getRank()).append(",\n"); |
|||
json.append(" \"title\": \"").append(escapeJson(movie.getTitle())).append("\",\n"); |
|||
json.append(" \"rating\": ").append(movie.getRating()).append(",\n"); |
|||
json.append(" \"director\": \"").append(escapeJson(movie.getDirector())).append("\"\n"); |
|||
json.append(" },\n"); |
|||
} |
|||
if (!movies.isEmpty()) { |
|||
json.setLength(json.length() - 2); |
|||
} |
|||
json.append("\n]"); |
|||
return json.toString(); |
|||
} |
|||
|
|||
private String escapeJson(String str) { |
|||
if (str == null) return ""; |
|||
return str.replace("\"", "\\\"").replace("\\", "\\\\"); |
|||
} |
|||
} |
|||
@ -1,7 +0,0 @@ |
|||
package com.spider.service; |
|||
|
|||
public interface Spider<T> { |
|||
String getSourceName(); |
|||
|
|||
int getDefaultLimit(); |
|||
} |
|||
@ -1,55 +0,0 @@ |
|||
package com.spider.test; |
|||
|
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import com.spider.utils.HttpClientUtil; |
|||
|
|||
public class DebugDoubanBooks { |
|||
public static void main(String[] args) { |
|||
String url = "https://book.douban.com/chart?sub_type=1"; |
|||
System.out.println("抓取URL: " + url); |
|||
|
|||
String html = HttpClientUtil.fetchHtml(url); |
|||
if (html == null || html.isEmpty()) { |
|||
System.out.println("HTML为空!"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("HTML长度: " + html.length()); |
|||
System.out.println("\n=== HTML前2000字符 ==="); |
|||
System.out.println(html.substring(0, Math.min(2000, html.length()))); |
|||
|
|||
Document doc = Jsoup.parse(html); |
|||
|
|||
System.out.println("\n\n=== 尝试各种选择器 ==="); |
|||
|
|||
String[] selectors = { |
|||
"tr.item", |
|||
".chart-item", |
|||
".book-item", |
|||
"[class*=item]", |
|||
"ul.list-view li", |
|||
".subject-item", |
|||
".info", |
|||
".DoubanBook", |
|||
"li[class*=item]", |
|||
"div[class*=item]", |
|||
"table tr" |
|||
}; |
|||
|
|||
for (String sel : selectors) { |
|||
Elements els = doc.select(sel); |
|||
System.out.println(sel + " -> " + els.size() + " 个元素"); |
|||
} |
|||
|
|||
System.out.println("\n=== 查找包含特定文本的元素 ==="); |
|||
Elements links = doc.select("a[href*='/subject/']"); |
|||
System.out.println("找到 " + links.size() + " 个 subject 链接"); |
|||
for (int i = 0; i < Math.min(5, links.size()); i++) { |
|||
Element link = links.get(i); |
|||
System.out.println(" " + link.text().substring(0, Math.min(50, link.text().length()))); |
|||
} |
|||
} |
|||
} |
|||
@ -1,75 +0,0 @@ |
|||
package com.spider.utils; |
|||
|
|||
import org.apache.hc.client5.http.classic.methods.HttpGet; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpClient; |
|||
import org.apache.hc.client5.http.impl.classic.CloseableHttpResponse; |
|||
import org.apache.hc.client5.http.impl.classic.HttpClients; |
|||
import org.apache.hc.client5.http.impl.io.PoolingHttpClientConnectionManager; |
|||
import org.apache.hc.core5.http.HttpStatus; |
|||
import org.apache.hc.core5.http.io.entity.EntityUtils; |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.exception.NetworkException; |
|||
|
|||
public class HttpClientUtil { |
|||
private static final Logger logger = LoggerFactory.getLogger(HttpClientUtil.class); |
|||
private static final CloseableHttpClient httpClient; |
|||
|
|||
static { |
|||
PoolingHttpClientConnectionManager cm = new PoolingHttpClientConnectionManager(); |
|||
cm.setMaxTotal(50); |
|||
cm.setDefaultMaxPerRoute(20); |
|||
|
|||
httpClient = HttpClients.custom() |
|||
.setConnectionManager(cm) |
|||
.build(); |
|||
} |
|||
|
|||
public static String fetchHtml(String url) { |
|||
try { |
|||
HttpGet request = new HttpGet(url); |
|||
request.setHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64)"); |
|||
request.setHeader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); |
|||
request.setHeader("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8"); |
|||
request.setHeader("Accept-Encoding", "gzip, deflate"); |
|||
|
|||
try (CloseableHttpResponse response = httpClient.execute(request)) { |
|||
int statusCode = response.getCode(); |
|||
|
|||
if (statusCode == HttpStatus.SC_OK) { |
|||
String html = EntityUtils.toString(response.getEntity(), "UTF-8"); |
|||
logger.debug("成功获取页面: {}", url); |
|||
return html; |
|||
} else if (statusCode == HttpStatus.SC_NOT_FOUND) { |
|||
throw new NetworkException(url, statusCode); |
|||
} else if (statusCode == HttpStatus.SC_FORBIDDEN) { |
|||
throw new NetworkException(url, "访问被拒绝,可能需要设置User-Agent或等待一段时间"); |
|||
} else { |
|||
throw new NetworkException(url, "HTTP请求失败,状态码: " + statusCode); |
|||
} |
|||
} |
|||
} catch (NetworkException e) { |
|||
throw e; |
|||
} catch (Exception e) { |
|||
throw new NetworkException(url, "网络请求失败: " + e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
public static Document parseHtml(String html) { |
|||
if (html == null || html.isEmpty()) { |
|||
throw new IllegalArgumentException("HTML内容为空"); |
|||
} |
|||
return Jsoup.parse(html); |
|||
} |
|||
|
|||
public static void close() { |
|||
try { |
|||
httpClient.close(); |
|||
} catch (Exception e) { |
|||
logger.error("关闭HTTP客户端失败", e); |
|||
} |
|||
} |
|||
} |
|||
@ -1,84 +0,0 @@ |
|||
package com.spider.utils; |
|||
|
|||
import java.util.function.Supplier; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.exception.NetworkException; |
|||
|
|||
public class RetryUtils { |
|||
private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class); |
|||
|
|||
private static final int DEFAULT_MAX_RETRIES = 3; |
|||
private static final long DEFAULT_DELAY_MS = 1000; |
|||
private static final long DEFAULT_MAX_DELAY_MS = 5000; |
|||
|
|||
public static <T> T executeWithRetry(Supplier<T> task) { |
|||
return executeWithRetry(task, DEFAULT_MAX_RETRIES, DEFAULT_DELAY_MS); |
|||
} |
|||
|
|||
public static <T> T executeWithRetry(Supplier<T> task, int maxRetries) { |
|||
return executeWithRetry(task, maxRetries, DEFAULT_DELAY_MS); |
|||
} |
|||
|
|||
public static <T> T executeWithRetry(Supplier<T> task, int maxRetries, long initialDelayMs) { |
|||
int attempts = 0; |
|||
long delayMs = initialDelayMs; |
|||
Exception lastException = null; |
|||
|
|||
while (attempts < maxRetries) { |
|||
attempts++; |
|||
try { |
|||
logger.debug("执行任务,第 {}/{} 次尝试", attempts, maxRetries); |
|||
return task.get(); |
|||
} catch (NetworkException e) { |
|||
lastException = e; |
|||
logger.warn("网络异常 (第{}次尝试): {}", attempts, e.getMessage()); |
|||
|
|||
if (attempts < maxRetries) { |
|||
try { |
|||
logger.info("等待 {}ms 后重试...", delayMs); |
|||
Thread.sleep(delayMs); |
|||
delayMs = Math.min(delayMs * 2, DEFAULT_MAX_DELAY_MS); |
|||
} catch (InterruptedException ie) { |
|||
Thread.currentThread().interrupt(); |
|||
throw new RuntimeException("重试被中断", ie); |
|||
} |
|||
} |
|||
} catch (Exception e) { |
|||
lastException = e; |
|||
logger.warn("执行异常 (第{}次尝试): {}", attempts, e.getMessage()); |
|||
|
|||
if (attempts < maxRetries) { |
|||
try { |
|||
logger.info("等待 {}ms 后重试...", delayMs); |
|||
Thread.sleep(delayMs); |
|||
delayMs = Math.min(delayMs * 2, DEFAULT_MAX_DELAY_MS); |
|||
} catch (InterruptedException ie) { |
|||
Thread.currentThread().interrupt(); |
|||
throw new RuntimeException("重试被中断", ie); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
logger.error("任务在 {} 次尝试后失败", maxRetries); |
|||
throw new RuntimeException("任务执行失败,已重试 " + maxRetries + " 次", lastException); |
|||
} |
|||
|
|||
public static void executeWithRetry(Runnable task) { |
|||
executeWithRetry(task, DEFAULT_MAX_RETRIES, DEFAULT_DELAY_MS); |
|||
} |
|||
|
|||
public static void executeWithRetry(Runnable task, int maxRetries) { |
|||
executeWithRetry(task, maxRetries, DEFAULT_DELAY_MS); |
|||
} |
|||
|
|||
public static void executeWithRetry(Runnable task, int maxRetries, long initialDelayMs) { |
|||
executeWithRetry(() -> { |
|||
task.run(); |
|||
return null; |
|||
}, maxRetries, initialDelayMs); |
|||
} |
|||
} |
|||
@ -1,133 +0,0 @@ |
|||
package com.spider.view; |
|||
|
|||
import java.util.List; |
|||
|
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import com.spider.model.Book; |
|||
import com.spider.model.HotSearch; |
|||
import com.spider.model.Movie; |
|||
|
|||
public class ConsoleView { |
|||
private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class); |
|||
|
|||
public void showWelcome() { |
|||
logger.info("╔══════════════════════════════════════════╗"); |
|||
logger.info("║ Spider 多功能爬虫框架 v2.0 ║"); |
|||
logger.info("╚══════════════════════════════════════════╝"); |
|||
} |
|||
|
|||
public void showHelp() { |
|||
logger.info("\n┌─ 可用命令 ─────────────────────────────────┐"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 爬虫命令: │"); |
|||
logger.info("│ crawl books [数量] - 爬取豆瓣读书Top │"); |
|||
logger.info("│ crawl movies - 爬取豆瓣电影Top250 │"); |
|||
logger.info("│ crawl hotsearch [数量]- 爬取百度热搜 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 控制命令: │"); |
|||
logger.info("│ start <url> - 启动爬虫(可指定URL)│"); |
|||
logger.info("│ stop - 停止爬虫 │"); |
|||
logger.info("│ status - 查看运行状态 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ 配置命令: │"); |
|||
logger.info("│ config show - 显示配置 │"); |
|||
logger.info("│ config set <k> <v> - 设置配置 │"); |
|||
logger.info("│ │"); |
|||
logger.info("│ help - 显示帮助 │"); |
|||
logger.info("│ exit - 退出程序 │"); |
|||
logger.info("│ │"); |
|||
logger.info("└──────────────────────────────────────────────┘"); |
|||
} |
|||
|
|||
public void showBooks(List<Book> books) { |
|||
if (books == null || books.isEmpty()) { |
|||
logger.info("没有书籍数据"); |
|||
return; |
|||
} |
|||
|
|||
logger.info("\n┌─ 豆瓣读书 Top{} ────────────────────────────┐", books.size()); |
|||
logger.info("│ │"); |
|||
for (int i = 0; i < books.size(); i++) { |
|||
Book book = books.get(i); |
|||
logger.info("│ [{}] 《{}》", String.format("%2d", i + 1), book.getTitle()); |
|||
logger.info("│ 评分: {} | 作者: {}", book.getRating(), truncate(book.getAuthor(), 15)); |
|||
if (book.getCommentCount() > 0) { |
|||
logger.info("│ 评价数: {}", book.getCommentCount()); |
|||
} |
|||
if (book.getSummary() != null && !book.getSummary().isEmpty()) { |
|||
logger.info("│ 简介: {}", truncate(book.getSummary(), 40)); |
|||
} |
|||
if (i < books.size() - 1) { |
|||
logger.info("│ │"); |
|||
} |
|||
} |
|||
logger.info("│ │"); |
|||
logger.info("└──────────────────────────────────────────────┘"); |
|||
} |
|||
|
|||
public void showMovies(List<Movie> movies) { |
|||
if (movies == null || movies.isEmpty()) { |
|||
logger.info("没有电影数据"); |
|||
return; |
|||
} |
|||
|
|||
logger.info("\n┌─ 豆瓣电影 Top250 ──────────────────────────┐"); |
|||
logger.info("│ │"); |
|||
for (Movie movie : movies) { |
|||
logger.info("│ Top{} 《{}》", String.format("%3d", movie.getRank()), movie.getTitle()); |
|||
logger.info("│ 评分: {} | 导演: {}", movie.getRating(), truncate(movie.getDirector(), 15)); |
|||
logger.info("│ │"); |
|||
} |
|||
logger.info("└──────────────────────────────────────────────┘"); |
|||
} |
|||
|
|||
public void showHotSearch(List<HotSearch> hotSearches) { |
|||
if (hotSearches == null || hotSearches.isEmpty()) { |
|||
logger.info("没有热搜数据"); |
|||
return; |
|||
} |
|||
|
|||
logger.info("\n┌─ 百度实时热搜榜 Top{} ──────────────────────┐", hotSearches.size()); |
|||
logger.info("│ │"); |
|||
for (HotSearch hotSearch : hotSearches) { |
|||
logger.info("│ {} {}", String.format("%2d.", hotSearch.getRank()), |
|||
truncate(hotSearch.getKeyword(), 30)); |
|||
} |
|||
logger.info("│ │"); |
|||
logger.info("└──────────────────────────────────────────────┘"); |
|||
} |
|||
|
|||
public void showCrawlingResult(String url, int count) { |
|||
logger.info("=== 已完成爬取: {} ===", url); |
|||
logger.info("共爬取 {} 条数据", count); |
|||
} |
|||
|
|||
public void showStatus(int pagesCrawled, boolean isRunning, double successRate, int memoryUsage) { |
|||
logger.info("┌─ 爬虫状态 ─────────────────────────────┐"); |
|||
logger.info("│ 运行状态: {}", isRunning ? "运行中 ✓" : "已停止 ✗"); |
|||
logger.info("│ 已抓取页面: {} 页", pagesCrawled); |
|||
logger.info("│ 成功率: {}%", String.format("%.2f", successRate)); |
|||
logger.info("│ 内存使用: {} MB", memoryUsage); |
|||
logger.info("└─────────────────────────────────────────┘"); |
|||
} |
|||
|
|||
public void showConfig(String key, String value) { |
|||
logger.info("配置已更新: {} = {}", key, value); |
|||
} |
|||
|
|||
public void showError(String message) { |
|||
logger.error("错误: {}", message); |
|||
} |
|||
|
|||
public void showInfo(String message) { |
|||
logger.info(message); |
|||
} |
|||
|
|||
private String truncate(String str, int maxLen) { |
|||
if (str == null) return ""; |
|||
if (str.length() <= maxLen) return str; |
|||
return str.substring(0, maxLen - 3) + "..."; |
|||
} |
|||
} |
|||
@ -1,7 +0,0 @@ |
|||
package com.spider.view; |
|||
|
|||
public class ViewFactory { |
|||
public static ConsoleView createConsoleView() { |
|||
return new ConsoleView(); |
|||
} |
|||
} |
|||
@ -1,89 +0,0 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<configuration> |
|||
|
|||
<property name="LOG_HOME" value="logs"/> |
|||
<property name="APP_NAME" value="spider"/> |
|||
|
|||
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<file>${LOG_HOME}/${APP_NAME}.log</file> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<fileNamePattern>${LOG_HOME}/${APP_NAME}-%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
|||
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
|||
<maxFileSize>10MB</maxFileSize> |
|||
</timeBasedFileNamingAndTriggeringPolicy> |
|||
<maxHistory>30</maxHistory> |
|||
</rollingPolicy> |
|||
</appender> |
|||
|
|||
<appender name="ERROR_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<file>${LOG_HOME}/${APP_NAME}-error.log</file> |
|||
<filter class="ch.qos.logback.classic.filter.LevelFilter"> |
|||
<level>ERROR</level> |
|||
<onMatch>ACCEPT</onMatch> |
|||
<onMismatch>DENY</onMismatch> |
|||
</filter> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<fileNamePattern>${LOG_HOME}/${APP_NAME}-error-%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
|||
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
|||
<maxFileSize>10MB</maxFileSize> |
|||
</timeBasedFileNamingAndTriggeringPolicy> |
|||
<maxHistory>30</maxHistory> |
|||
</rollingPolicy> |
|||
</appender> |
|||
|
|||
<appender name="CRAWL_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<file>${LOG_HOME}/${APP_NAME}-crawl.log</file> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<fileNamePattern>${LOG_HOME}/${APP_NAME}-crawl-%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
|||
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
|||
<maxFileSize>50MB</maxFileSize> |
|||
</timeBasedFileNamingAndTriggeringPolicy> |
|||
<maxHistory>7</maxHistory> |
|||
</rollingPolicy> |
|||
</appender> |
|||
|
|||
<logger name="com.spider" level="INFO" additivity="false"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
<appender-ref ref="ERROR_FILE"/> |
|||
</logger> |
|||
|
|||
<logger name="com.spider.service" level="INFO" additivity="false"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
<appender-ref ref="ERROR_FILE"/> |
|||
<appender-ref ref="CRAWL_FILE"/> |
|||
</logger> |
|||
|
|||
<logger name="com.spider.controller" level="INFO" additivity="false"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
<appender-ref ref="ERROR_FILE"/> |
|||
</logger> |
|||
|
|||
<root level="INFO"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
<appender-ref ref="ERROR_FILE"/> |
|||
</root> |
|||
|
|||
</configuration> |
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,89 +0,0 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<configuration> |
|||
|
|||
<property name="LOG_HOME" value="logs"/> |
|||
<property name="APP_NAME" value="spider"/> |
|||
|
|||
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<file>${LOG_HOME}/${APP_NAME}.log</file> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<fileNamePattern>${LOG_HOME}/${APP_NAME}-%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
|||
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
|||
<maxFileSize>10MB</maxFileSize> |
|||
</timeBasedFileNamingAndTriggeringPolicy> |
|||
<maxHistory>30</maxHistory> |
|||
</rollingPolicy> |
|||
</appender> |
|||
|
|||
<appender name="ERROR_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<file>${LOG_HOME}/${APP_NAME}-error.log</file> |
|||
<filter class="ch.qos.logback.classic.filter.LevelFilter"> |
|||
<level>ERROR</level> |
|||
<onMatch>ACCEPT</onMatch> |
|||
<onMismatch>DENY</onMismatch> |
|||
</filter> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<fileNamePattern>${LOG_HOME}/${APP_NAME}-error-%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
|||
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
|||
<maxFileSize>10MB</maxFileSize> |
|||
</timeBasedFileNamingAndTriggeringPolicy> |
|||
<maxHistory>30</maxHistory> |
|||
</rollingPolicy> |
|||
</appender> |
|||
|
|||
<appender name="CRAWL_FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<file>${LOG_HOME}/${APP_NAME}-crawl.log</file> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{50} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<fileNamePattern>${LOG_HOME}/${APP_NAME}-crawl-%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
|||
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
|||
<maxFileSize>50MB</maxFileSize> |
|||
</timeBasedFileNamingAndTriggeringPolicy> |
|||
<maxHistory>7</maxHistory> |
|||
</rollingPolicy> |
|||
</appender> |
|||
|
|||
<logger name="com.spider" level="INFO" additivity="false"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
<appender-ref ref="ERROR_FILE"/> |
|||
</logger> |
|||
|
|||
<logger name="com.spider.service" level="INFO" additivity="false"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
<appender-ref ref="ERROR_FILE"/> |
|||
<appender-ref ref="CRAWL_FILE"/> |
|||
</logger> |
|||
|
|||
<logger name="com.spider.controller" level="INFO" additivity="false"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
<appender-ref ref="ERROR_FILE"/> |
|||
</logger> |
|||
|
|||
<root level="INFO"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
<appender-ref ref="ERROR_FILE"/> |
|||
</root> |
|||
|
|||
</configuration> |
|||
@ -1,3 +0,0 @@ |
|||
artifactId=spider |
|||
groupId=com.spider |
|||
version=1.0.0 |
|||
@ -1,36 +0,0 @@ |
|||
com\spider\core\SpiderRunner.class |
|||
com\spider\exception\DataException.class |
|||
com\spider\model\HotSearch.class |
|||
com\spider\model\Movie.class |
|||
com\spider\service\DoubanMovieSpider.class |
|||
com\spider\command\ListCommand.class |
|||
com\spider\command\CrawlCommand.class |
|||
com\spider\test\DebugDoubanBooks.class |
|||
com\spider\utils\HttpClientUtil.class |
|||
com\spider\exception\NetworkException.class |
|||
com\spider\view\ConsoleView.class |
|||
com\spider\model\SpiderConfig.class |
|||
com\spider\service\DataStorageService.class |
|||
com\spider\exception\ParseException.class |
|||
com\spider\utils\RetryUtils.class |
|||
com\spider\view\ViewFactory.class |
|||
com\spider\command\HelpCommand.class |
|||
com\spider\command\ConfigCommand.class |
|||
com\spider\model\Book.class |
|||
com\spider\core\CommandExecutor.class |
|||
com\spider\command\StopCommand.class |
|||
com\spider\service\BaiduHotSearchSpider.class |
|||
com\spider\command\LoadCommand.class |
|||
com\spider\command\SaveCommand.class |
|||
com\spider\service\AbstractSpider.class |
|||
com\spider\command\StartCommand.class |
|||
com\spider\service\DoubanBookSpider.class |
|||
com\spider\controller\ControllerFactory.class |
|||
com\spider\exception\SpiderException.class |
|||
com\spider\service\Spider.class |
|||
com\spider\command\StatusCommand.class |
|||
com\spider\command\Command.class |
|||
com\spider\repository\ArticleRepository.class |
|||
com\spider\model\DataItem.class |
|||
com\spider\controller\SpiderController.class |
|||
com\spider\controller\ControllerInitializer.class |
|||
@ -1,36 +0,0 @@ |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\CrawlCommand.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\core\CommandExecutor.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\Command.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\LoadCommand.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\exception\NetworkException.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\ListCommand.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\DoubanBookSpider.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\HotSearch.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\core\SpiderRunner.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\view\ViewFactory.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\SaveCommand.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\StartCommand.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\StatusCommand.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\ConfigCommand.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\exception\ParseException.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\DataStorageService.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\DoubanMovieSpider.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\repository\ArticleRepository.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\AbstractSpider.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\controller\ControllerInitializer.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\utils\RetryUtils.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\Movie.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\view\ConsoleView.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\exception\SpiderException.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\controller\SpiderController.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\test\DebugDoubanBooks.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\Book.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\SpiderConfig.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\Spider.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\controller\ControllerFactory.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\exception\DataException.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\service\BaiduHotSearchSpider.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\StopCommand.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\model\DataItem.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\command\HelpCommand.java |
|||
D:\java\job-pc\spider\spider\src\main\java\com\spider\utils\HttpClientUtil.java |
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in new issue