76 changed files with 2665 additions and 0 deletions
@ -0,0 +1,13 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="CompilerConfiguration"> |
|||
<annotationProcessing> |
|||
<profile name="Maven default annotation processors profile" enabled="true"> |
|||
<sourceOutputDir name="target/generated-sources/annotations" /> |
|||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> |
|||
<outputRelativeToContentRoot value="true" /> |
|||
<module name="crawler-homework" /> |
|||
</profile> |
|||
</annotationProcessing> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,7 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="Encoding"> |
|||
<file url="file://$PROJECT_DIR$/src/main/java" charset="UTF-8" /> |
|||
<file url="file://$PROJECT_DIR$/src/main/resources" charset="UTF-8" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,20 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="RemoteRepositoriesConfiguration"> |
|||
<remote-repository> |
|||
<option name="id" value="central" /> |
|||
<option name="name" value="Central Repository" /> |
|||
<option name="url" value="https://repo.maven.apache.org/maven2" /> |
|||
</remote-repository> |
|||
<remote-repository> |
|||
<option name="id" value="central" /> |
|||
<option name="name" value="Maven Central repository" /> |
|||
<option name="url" value="https://repo1.maven.org/maven2" /> |
|||
</remote-repository> |
|||
<remote-repository> |
|||
<option name="id" value="jboss.community" /> |
|||
<option name="name" value="JBoss Community repository" /> |
|||
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" /> |
|||
</remote-repository> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,11 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="ExternalStorageConfigurationManager" enabled="true" /> |
|||
<component name="MavenProjectsManager"> |
|||
<option name="originalFiles"> |
|||
<list> |
|||
<option value="$PROJECT_DIR$/pom.xml" /> |
|||
</list> |
|||
</option> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,48 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="AutoImportSettings"> |
|||
<option name="autoReloadType" value="SELECTIVE" /> |
|||
</component> |
|||
<component name="ChangeListManager"> |
|||
<list default="true" id="a40ce2cf-f2b7-4cb0-8a2d-83ed72738a18" name="Changes" comment="" /> |
|||
<option name="SHOW_DIALOG" value="false" /> |
|||
<option name="HIGHLIGHT_CONFLICTS" value="true" /> |
|||
<option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" /> |
|||
<option name="LAST_RESOLUTION" value="IGNORE" /> |
|||
</component> |
|||
<component name="ProjectColorInfo"><![CDATA[{ |
|||
"associatedIndex": 8, |
|||
"fromUser": false |
|||
}]]></component> |
|||
<component name="ProjectId" id="3EUCHdoyXSRUuKKivDUUluAAHMf" /> |
|||
<component name="ProjectViewState"> |
|||
<option name="hideEmptyMiddlePackages" value="true" /> |
|||
<option name="showLibraryContents" value="true" /> |
|||
</component> |
|||
<component name="PropertiesComponent"><![CDATA[{ |
|||
"keyToString": { |
|||
"ModuleVcsDetector.initialDetectionPerformed": "true", |
|||
"RunOnceActivity.ShowReadmeOnStart": "true", |
|||
"RunOnceActivity.typescript.service.memoryLimit.init": "true", |
|||
"codeWithMe.voiceChat.enabledByDefault": "false", |
|||
"ignore.virus.scanning.warn.message": "true", |
|||
"kotlin-language-version-configured": "true", |
|||
"last_opened_file_path": "E:/新建文件夹 (2)/java/爬虫项目", |
|||
"vue.rearranger.settings.migration": "true" |
|||
} |
|||
}]]></component> |
|||
<component name="TaskManager"> |
|||
<task active="true" id="Default" summary="Default task"> |
|||
<changelist id="a40ce2cf-f2b7-4cb0-8a2d-83ed72738a18" name="Changes" comment="" /> |
|||
<created>1780223676037</created> |
|||
<option name="number" value="Default" /> |
|||
<option name="presentableId" value="Default" /> |
|||
<updated>1780223676037</updated> |
|||
<workItem from="1780223683558" duration="496000" /> |
|||
</task> |
|||
<servers /> |
|||
</component> |
|||
<component name="TypeScriptGeneratedFilesManager"> |
|||
<option name="version" value="3" /> |
|||
</component> |
|||
</project> |
|||
Binary file not shown.
Binary file not shown.
@ -0,0 +1,446 @@ |
|||
2026-05-31 15:16:29 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:16:42 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:16:42 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:16:42 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据... |
|||
2026-05-31 15:16:42 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2023 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202401/t20240116_1946622.html |
|||
2026-05-31 15:16:42 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2022 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202301/t20230118_1951109.html |
|||
2026-05-31 15:16:43 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2021 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202201/t20220129_1947574.html |
|||
2026-05-31 15:16:43 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2020 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202102/t20210227_1954092.html |
|||
2026-05-31 15:16:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 部分年份数据获取失败,使用备用数据填充 |
|||
2026-05-31 15:16:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 19 年数据 |
|||
2026-05-31 15:16:43 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:17:07 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 15:18:11 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:18:27 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:18:27 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:18:57 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:19:22 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 15:19:27 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据... |
|||
2026-05-31 15:19:27 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2023 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202401/t20240116_1946622.html |
|||
2026-05-31 15:19:27 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2022 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202301/t20230118_1951109.html |
|||
2026-05-31 15:19:28 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2021 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202201/t20220129_1947574.html |
|||
2026-05-31 15:19:28 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2020 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202102/t20210227_1954092.html |
|||
2026-05-31 15:19:28 [main] INFO s.PerCapitaIncomeCrawlStrategy - 部分年份数据获取失败,使用备用数据填充 |
|||
2026-05-31 15:19:28 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 19 年数据 |
|||
2026-05-31 15:23:06 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:23:19 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:23:19 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:23:19 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据... |
|||
2026-05-31 15:23:21 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2022 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202301/t20230118_1951109.html |
|||
2026-05-31 15:23:21 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2021 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202201/t20220129_1947574.html |
|||
2026-05-31 15:23:21 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2020 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202102/t20210227_1954092.html |
|||
2026-05-31 15:23:21 [main] INFO s.PerCapitaIncomeCrawlStrategy - 部分年份数据获取失败,使用备用数据填充 |
|||
2026-05-31 15:23:21 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 19 年数据 |
|||
2026-05-31 15:23:21 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:23:46 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 15:24:50 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:25:02 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:25:02 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:25:02 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据... |
|||
2026-05-31 15:25:05 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2021 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202201/t20220129_1947574.html |
|||
2026-05-31 15:25:05 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2020 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202102/t20210227_1954092.html |
|||
2026-05-31 15:25:05 [main] INFO s.PerCapitaIncomeCrawlStrategy - 部分年份数据获取失败,使用备用数据填充 |
|||
2026-05-31 15:25:05 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 19 年数据 |
|||
2026-05-31 15:25:05 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:25:44 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:25:58 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:25:58 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:25:58 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据... |
|||
2026-05-31 15:26:01 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2021 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202201/t20220129_1947574.html |
|||
2026-05-31 15:26:02 [main] WARN s.PerCapitaIncomeCrawlStrategy - 获取 2020 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/sj/zxfb/202102/t20210227_1954092.html |
|||
2026-05-31 15:26:02 [main] INFO s.PerCapitaIncomeCrawlStrategy - 部分年份数据获取失败,使用备用数据填充 |
|||
2026-05-31 15:26:02 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 19 年数据 |
|||
2026-05-31 15:26:02 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:26:29 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 15:27:21 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:27:34 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:27:34 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:27:34 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据... |
|||
2026-05-31 15:27:36 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:28:04 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 15:29:09 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:29:25 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:29:25 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:29:25 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据... |
|||
2026-05-31 15:29:25 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 15:29:27 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 15:29:27 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 15:29:28 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 15:29:28 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 15:29:28 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:29:55 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 15:31:01 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:31:12 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:31:12 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:31:12 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据... |
|||
2026-05-31 15:31:12 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 15:31:14 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 15:31:14 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 15:31:15 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 15:31:15 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 15:31:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 15:31:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 15:31:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 15:31:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 19 年数据 |
|||
2026-05-31 15:31:16 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:31:39 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 15:36:52 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:37:09 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:37:09 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:37:10 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2015-2024年)... |
|||
2026-05-31 15:37:10 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 15:37:12 [main] WARN s.PerCapitaIncomeCrawlStrategy - 解析 2024 年数据失败,尝试其他正则模式 |
|||
2026-05-31 15:37:12 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据(备用模式): 41314.0元, 增速: 5.3% |
|||
2026-05-31 15:37:12 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 15:37:13 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 15:37:13 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 15:37:13 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 15:37:13 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 15:37:14 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 15:37:14 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 15:37:15 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 15:37:15 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 15:37:17 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 15:37:17 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2018 年数据... |
|||
2026-05-31 15:37:17 [main] WARN s.PerCapitaIncomeCrawlStrategy - 爬取 2018 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/xxgk/sjfb/zxfb2020/201901/t20190121_1688307.html |
|||
2026-05-31 15:37:17 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2017 年数据... |
|||
2026-05-31 15:37:18 [main] WARN s.PerCapitaIncomeCrawlStrategy - 爬取 2017 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/xxgk/sjfb/zxfb2020/201801/t20180118_1582343.html |
|||
2026-05-31 15:37:18 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2016 年数据... |
|||
2026-05-31 15:37:18 [main] WARN s.PerCapitaIncomeCrawlStrategy - 爬取 2016 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/xxgk/sjfb/zxfb2020/201701/t20170120_1467428.html |
|||
2026-05-31 15:37:18 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2015 年数据... |
|||
2026-05-31 15:37:18 [main] WARN s.PerCapitaIncomeCrawlStrategy - 爬取 2015 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/xxgk/sjfb/zxfb2020/201601/t20160119_1337835.html |
|||
2026-05-31 15:37:18 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,成功获取 6 年数据 |
|||
2026-05-31 15:37:18 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:37:45 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 15:40:28 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:40:37 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:40:37 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:40:37 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2015-2024年)... |
|||
2026-05-31 15:40:37 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 15:40:40 [main] WARN s.PerCapitaIncomeCrawlStrategy - 解析 2024 年数据失败,尝试其他正则模式 |
|||
2026-05-31 15:40:40 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据(备用模式): 41314.0元, 增速: 5.3% |
|||
2026-05-31 15:40:40 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 15:40:41 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 15:40:41 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 15:40:42 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 15:40:42 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 15:40:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 15:40:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 15:40:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 15:40:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 15:40:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 15:40:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2018 年数据... |
|||
2026-05-31 15:40:44 [main] WARN s.PerCapitaIncomeCrawlStrategy - 爬取 2018 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/xxgk/sjfb/zxfb2020/201901/t20190121_1688307.html |
|||
2026-05-31 15:40:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2017 年数据... |
|||
2026-05-31 15:40:44 [main] WARN s.PerCapitaIncomeCrawlStrategy - 爬取 2017 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/xxgk/sjfb/zxfb2020/201801/t20180118_1582343.html |
|||
2026-05-31 15:40:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2016 年数据... |
|||
2026-05-31 15:40:44 [main] WARN s.PerCapitaIncomeCrawlStrategy - 爬取 2016 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/xxgk/sjfb/zxfb2020/201701/t20170120_1467428.html |
|||
2026-05-31 15:40:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2015 年数据... |
|||
2026-05-31 15:40:45 [main] WARN s.PerCapitaIncomeCrawlStrategy - 爬取 2015 年数据异常: Failed to fetch URL: https://www.stats.gov.cn/xxgk/sjfb/zxfb2020/201601/t20160119_1337835.html |
|||
2026-05-31 15:40:45 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,成功获取 6 年数据 |
|||
2026-05-31 15:40:45 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:41:09 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 15:42:09 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 15:42:20 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 15:42:20 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 15:42:20 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 15:42:20 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 15:42:23 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 15:42:23 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 15:42:24 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 15:42:24 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 15:42:24 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 15:42:24 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 15:42:25 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 15:42:25 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 15:42:26 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 15:42:26 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 15:42:26 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 15:42:26 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 15:42:26 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 15:42:48 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 16:07:45 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 16:08:01 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 16:08:01 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 16:08:01 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 16:08:01 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 16:08:05 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 16:08:05 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 16:08:06 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 16:08:06 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 16:08:06 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 16:08:06 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 16:08:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 16:08:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 16:08:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 16:08:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 16:08:11 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 16:08:11 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 16:08:11 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 16:08:37 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 16:13:22 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 16:13:37 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 16:13:37 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 16:13:41 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 16:14:08 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 16:14:13 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 16:14:13 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 16:14:17 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 16:14:17 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 16:14:18 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 16:14:18 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 16:14:19 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 16:14:19 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 16:14:20 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 16:14:20 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 16:14:20 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 16:14:20 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 16:14:21 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 16:14:21 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 16:25:33 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 16:25:48 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 16:25:48 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 16:25:48 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 16:25:48 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 16:25:51 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 16:25:51 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 16:25:52 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 16:25:52 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 16:25:53 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 16:25:53 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 16:25:54 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 16:25:54 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 16:25:54 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 16:25:54 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 16:25:56 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 16:25:56 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 16:25:56 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 16:26:18 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 16:28:26 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 16:28:40 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 16:28:40 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 16:28:40 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 16:28:40 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 16:28:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 16:28:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 16:28:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 16:28:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 16:28:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 16:28:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 16:28:45 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 16:28:45 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 16:28:45 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 16:28:45 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 16:28:46 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 16:28:46 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 16:28:46 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 16:29:14 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 16:29:55 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 16:30:10 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 16:30:10 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 16:30:10 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 16:30:10 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 16:30:13 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 16:30:13 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 16:30:14 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 16:30:14 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 16:30:15 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 16:30:15 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 16:30:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 16:30:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 16:30:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 16:30:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 16:30:18 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 16:30:18 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 16:30:18 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 16:30:41 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 16:32:29 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 16:32:40 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 16:32:41 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 16:32:41 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 16:32:41 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 16:32:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 16:32:43 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 16:32:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 16:32:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 16:32:45 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 16:32:45 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 16:32:46 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 16:32:46 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 16:32:46 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 16:32:46 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 16:32:49 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 16:32:49 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 16:32:49 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 16:33:13 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 16:34:11 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 16:34:21 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 16:34:21 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 16:34:21 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 16:34:21 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 16:34:23 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 16:34:23 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 16:34:23 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 16:34:23 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 16:34:23 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 16:34:23 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 16:34:24 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 16:34:24 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 16:34:24 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 16:34:24 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 16:34:27 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 16:34:27 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 16:34:27 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 16:34:53 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 16:40:41 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 16:40:55 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 16:40:55 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 16:40:55 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 16:40:55 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 16:40:58 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 16:40:58 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 16:41:00 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 16:41:00 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 16:41:01 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 16:41:01 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 16:41:02 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 16:41:02 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 16:41:03 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 16:41:03 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 16:41:04 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 16:41:04 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 16:41:04 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 16:41:26 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 18:06:05 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 18:06:14 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 18:06:14 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 18:06:14 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 18:06:14 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 18:06:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 18:06:16 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 18:06:17 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 18:06:17 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 18:06:18 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 18:06:18 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 18:06:19 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 18:06:19 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 18:06:19 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 18:06:19 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 18:06:21 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 18:06:21 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 18:06:21 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 18:06:48 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 18:07:31 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 18:07:44 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 18:07:44 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 18:07:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 18:07:44 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 18:07:46 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 18:07:46 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 18:07:47 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 18:07:47 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 18:07:48 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 18:07:48 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 18:07:49 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 18:07:49 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 18:07:49 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 18:07:49 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 18:07:51 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 18:07:51 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 18:07:51 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 18:08:17 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 18:08:56 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 18:09:04 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 18:09:04 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 18:09:04 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 18:09:04 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 18:09:07 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 18:09:07 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 18:09:07 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 18:09:07 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 18:09:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 18:09:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 18:09:09 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 18:09:09 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 18:09:09 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 18:09:09 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 18:09:10 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 18:09:10 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 18:09:10 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 18:09:33 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 18:10:48 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 18:11:01 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 18:11:01 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 18:11:01 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 18:11:01 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 18:11:04 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 18:11:04 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 18:11:04 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 18:11:04 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 18:11:05 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 18:11:05 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 18:11:06 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 18:11:06 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 18:11:07 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 18:11:07 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 18:11:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 18:11:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 18:11:08 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 18:11:32 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 18:13:22 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 18:13:34 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 18:13:34 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 18:13:34 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 18:13:34 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 18:13:36 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 18:13:36 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 18:13:37 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 18:13:37 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 18:13:38 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 18:13:38 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 18:13:39 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 18:13:39 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 18:13:39 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 18:13:39 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 18:13:41 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 18:13:41 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 18:13:41 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 18:14:04 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 18:14:53 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 18:15:03 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 18:15:03 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 18:15:03 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 18:15:03 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 18:15:06 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 18:15:06 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 18:15:07 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 18:15:07 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 18:15:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 18:15:08 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 18:15:09 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 18:15:09 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 18:15:09 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 18:15:09 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 18:15:11 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 18:15:11 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 18:15:11 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 18:15:39 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
2026-05-31 18:16:21 [main] INFO strategy.HeritageCrawlStrategy - 开始爬取中国非物质文化遗产数据... |
|||
2026-05-31 18:16:31 [main] INFO strategy.HeritageCrawlStrategy - 成功获取API响应数据 |
|||
2026-05-31 18:16:31 [main] INFO strategy.HeritageCrawlStrategy - 成功解析 200 条非遗项目数据 |
|||
2026-05-31 18:16:31 [main] INFO s.PerCapitaIncomeCrawlStrategy - 开始爬取全国居民人均可支配收入数据(2019-2024年)... |
|||
2026-05-31 18:16:31 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2024 年数据... |
|||
2026-05-31 18:16:32 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2024 年数据: 41314.0元, 增速: 5.3% |
|||
2026-05-31 18:16:32 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2023 年数据... |
|||
2026-05-31 18:16:33 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2023 年数据: 39218.0元, 增速: 6.3% |
|||
2026-05-31 18:16:33 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2022 年数据... |
|||
2026-05-31 18:16:33 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2022 年数据: 36883.0元, 增速: 5.0% |
|||
2026-05-31 18:16:33 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2021 年数据... |
|||
2026-05-31 18:16:34 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2021 年数据: 35128.0元, 增速: 9.1% |
|||
2026-05-31 18:16:34 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2020 年数据... |
|||
2026-05-31 18:16:34 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2020 年数据: 32189.0元, 增速: 4.7% |
|||
2026-05-31 18:16:34 [main] INFO s.PerCapitaIncomeCrawlStrategy - 正在爬取 2019 年数据... |
|||
2026-05-31 18:16:35 [main] INFO s.PerCapitaIncomeCrawlStrategy - 成功爬取 2019 年数据: 30733.0元, 增速: 8.9% |
|||
2026-05-31 18:16:35 [main] INFO s.PerCapitaIncomeCrawlStrategy - 人均可支配收入数据爬取完成,共 6 年数据 |
|||
2026-05-31 18:16:35 [main] INFO strategy.DoubanCrawlStrategy - 开始爬取豆瓣读书Top250数据... |
|||
2026-05-31 18:16:58 [main] INFO strategy.DoubanCrawlStrategy - 豆瓣读书Top250爬取完成,共 250 条数据 |
|||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
|
|
|
@ -0,0 +1,65 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" |
|||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
|
|||
<groupId>com.course</groupId> |
|||
<artifactId>crawler-homework</artifactId> |
|||
<version>1.0.0</version> |
|||
<packaging>jar</packaging> |
|||
|
|||
<name>Java Crawler Course Project</name> |
|||
<description>A Java crawler project with MVC architecture, Command pattern, Strategy pattern</description> |
|||
|
|||
<properties> |
|||
<maven.compiler.source>1.8</maven.compiler.source> |
|||
<maven.compiler.target>1.8</maven.compiler.target> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
</properties> |
|||
|
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>org.jsoup</groupId> |
|||
<artifactId>jsoup</artifactId> |
|||
<version>1.17.2</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.apache.commons</groupId> |
|||
<artifactId>commons-csv</artifactId> |
|||
<version>1.10.0</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.slf4j</groupId> |
|||
<artifactId>slf4j-api</artifactId> |
|||
<version>2.0.9</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>ch.qos.logback</groupId> |
|||
<artifactId>logback-classic</artifactId> |
|||
<version>1.4.14</version> |
|||
</dependency> |
|||
</dependencies> |
|||
|
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<version>3.11.0</version> |
|||
<configuration> |
|||
<source>1.8</source> |
|||
<target>1.8</target> |
|||
</configuration> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.codehaus.mojo</groupId> |
|||
<artifactId>exec-maven-plugin</artifactId> |
|||
<version>3.1.0</version> |
|||
<configuration> |
|||
<mainClass>Main</mainClass> |
|||
</configuration> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
</project> |
|||
@ -0,0 +1,60 @@ |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import java.io.FileWriter; |
|||
|
|||
public class DebugHeritage { |
|||
public static void main(String[] args) throws Exception { |
|||
String url = "https://www.ihchina.cn/project.html?page=1"; |
|||
Document doc = Jsoup.connect(url) |
|||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") |
|||
.timeout(30000) |
|||
.get(); |
|||
|
|||
StringBuilder sb = new StringBuilder(); |
|||
sb.append("=== TABLE ELEMENTS ===\n"); |
|||
Elements tables = doc.select("table"); |
|||
sb.append("Found " + tables.size() + " tables\n\n"); |
|||
|
|||
for (int i = 0; i < tables.size(); i++) { |
|||
Element table = tables.get(i); |
|||
sb.append("Table " + i + ":\n"); |
|||
Elements rows = table.select("tr"); |
|||
sb.append(" Found " + rows.size() + " tr elements\n"); |
|||
|
|||
if (!rows.isEmpty()) { |
|||
Element firstRow = rows.first(); |
|||
Elements cells = firstRow.select("td, th"); |
|||
sb.append(" First row has " + cells.size() + " cells (td+th)\n"); |
|||
|
|||
Elements tds = firstRow.select("td"); |
|||
Elements ths = firstRow.select("th"); |
|||
sb.append(" td count: " + tds.size() + "\n"); |
|||
sb.append(" th count: " + ths.size() + "\n"); |
|||
|
|||
if (tds.isEmpty() && ths.isEmpty()) { |
|||
sb.append(" First row HTML: " + firstRow.html() + "\n\n"); |
|||
} |
|||
} |
|||
} |
|||
|
|||
sb.append("\n=== DIV TABLE CLASS ===\n"); |
|||
Elements divTables = doc.select(".table_box"); |
|||
sb.append("Found " + divTables.size() + " .table_box elements\n"); |
|||
|
|||
Elements listContainer = doc.select(".list-container, .table-list, .heritage-list"); |
|||
sb.append("Found " + listContainer.size() + " list container elements\n"); |
|||
|
|||
sb.append("\n=== ALL TEXT IN PAGE ===\n"); |
|||
sb.append(doc.text().substring(0, Math.min(2000, doc.text().length()))); |
|||
|
|||
try (FileWriter fw = new FileWriter("debug_output.txt")) { |
|||
fw.write(sb.toString()); |
|||
} |
|||
|
|||
System.out.println("Debug output written to debug_output.txt"); |
|||
System.out.println(sb.toString()); |
|||
} |
|||
} |
|||
@ -0,0 +1,17 @@ |
|||
import util.HttpUtil; |
|||
import java.io.FileWriter; |
|||
|
|||
public class DebugJson { |
|||
public static void main(String[] args) throws Exception { |
|||
String url = "https://www.ihchina.cn/Article/Index/getProject.html?province=&rx_time=&type=&cate=&keywords=&category_id=16&limit=5&p=1"; |
|||
String jsonText = HttpUtil.getJsonContent(url); |
|||
|
|||
try (FileWriter fw = new FileWriter("debug_json.txt")) { |
|||
fw.write(jsonText); |
|||
} |
|||
|
|||
System.out.println("JSON written to debug_json.txt"); |
|||
System.out.println("First 2000 chars:"); |
|||
System.out.println(jsonText.substring(0, Math.min(2000, jsonText.length()))); |
|||
} |
|||
} |
|||
@ -0,0 +1,28 @@ |
|||
import command.DoubanCommand; |
|||
import controller.CrawlerController; |
|||
import view.CliView; |
|||
import command.CommandInvoker; |
|||
|
|||
public class DoubanTest { |
|||
public static void main(String[] args) { |
|||
CliView view = new CliView(); |
|||
CrawlerController controller = new CrawlerController(view); |
|||
CommandInvoker invoker = new CommandInvoker(view); |
|||
|
|||
invoker.addCommand(new DoubanCommand(controller, view)); |
|||
|
|||
view.displayWelcome(); |
|||
|
|||
try { |
|||
view.displayDivider(); |
|||
invoker.executeCommand(0); |
|||
view.displayDivider(); |
|||
} catch (Exception e) { |
|||
view.displayErrorWithStackTrace(e); |
|||
view.displayDivider(); |
|||
} |
|||
|
|||
view.displayExitMessage(); |
|||
view.close(); |
|||
} |
|||
} |
|||
@ -0,0 +1,28 @@ |
|||
import command.HeritageCommand; |
|||
import controller.CrawlerController; |
|||
import view.CliView; |
|||
import command.CommandInvoker; |
|||
|
|||
public class HeritageTest { |
|||
public static void main(String[] args) { |
|||
CliView view = new CliView(); |
|||
CrawlerController controller = new CrawlerController(view); |
|||
CommandInvoker invoker = new CommandInvoker(view); |
|||
|
|||
invoker.addCommand(new HeritageCommand(controller, view)); |
|||
|
|||
view.displayWelcome(); |
|||
|
|||
try { |
|||
view.displayDivider(); |
|||
invoker.executeCommand(0); |
|||
view.displayDivider(); |
|||
} catch (Exception e) { |
|||
view.displayErrorWithStackTrace(e); |
|||
view.displayDivider(); |
|||
} |
|||
|
|||
view.displayExitMessage(); |
|||
view.close(); |
|||
} |
|||
} |
|||
@ -0,0 +1,28 @@ |
|||
import command.PerCapitaIncomeCommand; |
|||
import controller.CrawlerController; |
|||
import view.CliView; |
|||
import command.CommandInvoker; |
|||
|
|||
public class IncomeTest { |
|||
public static void main(String[] args) { |
|||
CliView view = new CliView(); |
|||
CrawlerController controller = new CrawlerController(view); |
|||
CommandInvoker invoker = new CommandInvoker(view); |
|||
|
|||
invoker.addCommand(new PerCapitaIncomeCommand(controller, view)); |
|||
|
|||
view.displayWelcome(); |
|||
|
|||
try { |
|||
view.displayDivider(); |
|||
invoker.executeCommand(0); |
|||
view.displayDivider(); |
|||
} catch (Exception e) { |
|||
view.displayErrorWithStackTrace(e); |
|||
view.displayDivider(); |
|||
} |
|||
|
|||
view.displayExitMessage(); |
|||
view.close(); |
|||
} |
|||
} |
|||
@ -0,0 +1,46 @@ |
|||
import command.Command; |
|||
import command.CommandInvoker; |
|||
import command.DoubanCommand; |
|||
import command.HeritageCommand; |
|||
import command.PerCapitaIncomeCommand; |
|||
import controller.CrawlerController; |
|||
import exception.CrawlerException; |
|||
import view.CliView; |
|||
|
|||
public class Main { |
|||
public static void main(String[] args) { |
|||
CliView view = new CliView(); |
|||
CrawlerController controller = new CrawlerController(view); |
|||
CommandInvoker invoker = new CommandInvoker(view); |
|||
|
|||
invoker.addCommand(new HeritageCommand(controller, view)); |
|||
invoker.addCommand(new DoubanCommand(controller, view)); |
|||
invoker.addCommand(new PerCapitaIncomeCommand(controller, view)); |
|||
|
|||
view.displayWelcome(); |
|||
|
|||
boolean running = true; |
|||
while (running) { |
|||
view.displayMenu(invoker.getCommands()); |
|||
int choice = view.getUserChoice(); |
|||
|
|||
if (choice == 0) { |
|||
running = false; |
|||
view.displayExitMessage(); |
|||
} else if (choice > 0 && choice <= invoker.getCommandCount()) { |
|||
try { |
|||
view.displayDivider(); |
|||
invoker.executeCommand(choice - 1); |
|||
view.displayDivider(); |
|||
} catch (CrawlerException e) { |
|||
view.displayErrorWithStackTrace(e); |
|||
view.displayDivider(); |
|||
} |
|||
} else { |
|||
view.displayInvalidChoice(); |
|||
} |
|||
} |
|||
|
|||
view.close(); |
|||
} |
|||
} |
|||
@ -0,0 +1,41 @@ |
|||
import java.net.InetAddress; |
|||
import java.net.HttpURLConnection; |
|||
import java.net.URL; |
|||
|
|||
public class NetworkTest { |
|||
public static void main(String[] args) { |
|||
System.out.println("=== 网络连接测试 ===\n"); |
|||
|
|||
String[] hosts = { |
|||
"www.stats.gov.cn", |
|||
"www.baidu.com", |
|||
"www.ihchina.cn" |
|||
}; |
|||
|
|||
for (String host : hosts) { |
|||
System.out.println("测试: " + host); |
|||
try { |
|||
InetAddress addr = InetAddress.getByName(host); |
|||
System.out.println(" IP地址: " + addr.getHostAddress()); |
|||
System.out.println(" 可达: " + addr.isReachable(5000)); |
|||
} catch (Exception e) { |
|||
System.out.println(" DNS解析失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
System.out.println("\n测试HTTP连接..."); |
|||
try { |
|||
URL url = new URL("https://www.stats.gov.cn/sj/zxfb/202402/t20240228_1947915.html"); |
|||
HttpURLConnection conn = (HttpURLConnection) url.openConnection(); |
|||
conn.setConnectTimeout(10000); |
|||
conn.setReadTimeout(10000); |
|||
conn.setRequestMethod("GET"); |
|||
conn.setRequestProperty("User-Agent", "Mozilla/5.0"); |
|||
|
|||
System.out.println("响应码: " + conn.getResponseCode()); |
|||
conn.disconnect(); |
|||
} catch (Exception e) { |
|||
System.out.println("连接失败: " + e.getClass().getName() + " - " + e.getMessage()); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,34 @@ |
|||
import command.HeritageCommand; |
|||
import command.DoubanCommand; |
|||
import command.PerCapitaIncomeCommand; |
|||
import controller.CrawlerController; |
|||
import view.CliView; |
|||
import command.CommandInvoker; |
|||
|
|||
public class TestRunner { |
|||
public static void main(String[] args) { |
|||
CliView view = new CliView(); |
|||
CrawlerController controller = new CrawlerController(view); |
|||
CommandInvoker invoker = new CommandInvoker(view); |
|||
|
|||
invoker.addCommand(new HeritageCommand(controller, view)); |
|||
invoker.addCommand(new PerCapitaIncomeCommand(controller, view)); |
|||
invoker.addCommand(new DoubanCommand(controller, view)); |
|||
|
|||
view.displayWelcome(); |
|||
|
|||
for (int i = 0; i < invoker.getCommandCount(); i++) { |
|||
try { |
|||
view.displayDivider(); |
|||
invoker.executeCommand(i); |
|||
view.displayDivider(); |
|||
} catch (Exception e) { |
|||
view.displayErrorWithStackTrace(e); |
|||
view.displayDivider(); |
|||
} |
|||
} |
|||
|
|||
view.displayExitMessage(); |
|||
view.close(); |
|||
} |
|||
} |
|||
@ -0,0 +1,17 @@ |
|||
package command; |
|||
|
|||
import exception.CrawlerException; |
|||
|
|||
public abstract class Command { |
|||
protected String name; |
|||
|
|||
public Command(String name) { |
|||
this.name = name; |
|||
} |
|||
|
|||
public String getName() { |
|||
return name; |
|||
} |
|||
|
|||
public abstract void execute() throws CrawlerException; |
|||
} |
|||
@ -0,0 +1,43 @@ |
|||
package command; |
|||
|
|||
import exception.CrawlerException; |
|||
import view.CliView; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class CommandInvoker { |
|||
private final List<Command> commands; |
|||
private final CliView view; |
|||
|
|||
public CommandInvoker(CliView view) { |
|||
this.commands = new ArrayList<>(); |
|||
this.view = view; |
|||
} |
|||
|
|||
public void addCommand(Command command) { |
|||
commands.add(command); |
|||
} |
|||
|
|||
public void executeCommand(int index) throws CrawlerException { |
|||
if (index < 0 || index >= commands.size()) { |
|||
throw new CrawlerException("无效的命令索引: " + index); |
|||
} |
|||
Command command = commands.get(index); |
|||
command.execute(); |
|||
} |
|||
|
|||
public void executeAll() throws CrawlerException { |
|||
for (Command command : commands) { |
|||
command.execute(); |
|||
} |
|||
} |
|||
|
|||
public List<Command> getCommands() { |
|||
return new ArrayList<>(commands); |
|||
} |
|||
|
|||
public int getCommandCount() { |
|||
return commands.size(); |
|||
} |
|||
} |
|||
@ -0,0 +1,33 @@ |
|||
package command; |
|||
|
|||
import controller.CrawlerController; |
|||
import exception.CrawlerException; |
|||
import model.BookItem; |
|||
import strategy.CrawlStrategy; |
|||
import strategy.DoubanCrawlStrategy; |
|||
import view.CliView; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class DoubanCommand extends Command { |
|||
private final CrawlerController controller; |
|||
private final CliView view; |
|||
|
|||
public DoubanCommand(CrawlerController controller, CliView view) { |
|||
super("豆瓣读书Top250爬虫"); |
|||
this.controller = controller; |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() throws CrawlerException { |
|||
view.displayMessage("开始执行: " + name); |
|||
view.displayMessage("正在爬取豆瓣读书Top250数据..."); |
|||
|
|||
CrawlStrategy<BookItem> strategy = new DoubanCrawlStrategy(); |
|||
List<BookItem> data = controller.executeCrawl(strategy); |
|||
|
|||
view.displayMessage("成功爬取 " + data.size() + " 条数据"); |
|||
view.displayMessage("数据已保存到: " + strategy.getOutputFileName()); |
|||
} |
|||
} |
|||
@ -0,0 +1,33 @@ |
|||
package command; |
|||
|
|||
import controller.CrawlerController; |
|||
import exception.CrawlerException; |
|||
import model.HeritageItem; |
|||
import strategy.CrawlStrategy; |
|||
import strategy.HeritageCrawlStrategy; |
|||
import view.CliView; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class HeritageCommand extends Command { |
|||
private final CrawlerController controller; |
|||
private final CliView view; |
|||
|
|||
public HeritageCommand(CrawlerController controller, CliView view) { |
|||
super("中国非物质文化遗产名录爬虫"); |
|||
this.controller = controller; |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() throws CrawlerException { |
|||
view.displayMessage("开始执行: " + name); |
|||
view.displayMessage("正在爬取中国非物质文化遗产名录数据..."); |
|||
|
|||
CrawlStrategy<HeritageItem> strategy = new HeritageCrawlStrategy(); |
|||
List<HeritageItem> data = controller.executeCrawl(strategy); |
|||
|
|||
view.displayMessage("成功爬取 " + data.size() + " 条数据"); |
|||
view.displayMessage("数据已保存到: " + strategy.getOutputFileName()); |
|||
} |
|||
} |
|||
@ -0,0 +1,27 @@ |
|||
package command; |
|||
|
|||
import controller.CrawlerController; |
|||
import model.PerCapitaIncome; |
|||
import strategy.CrawlStrategy; |
|||
import strategy.PerCapitaIncomeCrawlStrategy; |
|||
import view.CliView; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class PerCapitaIncomeCommand extends Command { |
|||
private CrawlerController controller; |
|||
private CliView view; |
|||
private CrawlStrategy<PerCapitaIncome> strategy; |
|||
|
|||
public PerCapitaIncomeCommand(CrawlerController controller, CliView view) { |
|||
super("全国居民人均可支配收入爬虫"); |
|||
this.controller = controller; |
|||
this.view = view; |
|||
this.strategy = new PerCapitaIncomeCrawlStrategy(); |
|||
} |
|||
|
|||
@Override |
|||
public void execute() throws exception.CrawlerException { |
|||
controller.executeCrawl(strategy); |
|||
} |
|||
} |
|||
@ -0,0 +1,25 @@ |
|||
package controller; |
|||
|
|||
import exception.CrawlerException; |
|||
import strategy.CrawlStrategy; |
|||
import view.CliView; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class CrawlerController { |
|||
private final CliView view; |
|||
|
|||
public CrawlerController(CliView view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
public <T> List<T> executeCrawl(CrawlStrategy<T> strategy) throws CrawlerException { |
|||
List<T> data = strategy.crawl(); |
|||
strategy.saveToFile(data); |
|||
return data; |
|||
} |
|||
|
|||
public CliView getView() { |
|||
return view; |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package exception; |
|||
|
|||
public class CrawlerException extends Exception { |
|||
public CrawlerException() { |
|||
super(); |
|||
} |
|||
|
|||
public CrawlerException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public CrawlerException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
|
|||
public CrawlerException(Throwable cause) { |
|||
super(cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package exception; |
|||
|
|||
public class FileWriteException extends CrawlerException { |
|||
public FileWriteException() { |
|||
super(); |
|||
} |
|||
|
|||
public FileWriteException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public FileWriteException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
|
|||
public FileWriteException(Throwable cause) { |
|||
super(cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package exception; |
|||
|
|||
public class HttpRequestException extends CrawlerException { |
|||
public HttpRequestException() { |
|||
super(); |
|||
} |
|||
|
|||
public HttpRequestException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public HttpRequestException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
|
|||
public HttpRequestException(Throwable cause) { |
|||
super(cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package exception; |
|||
|
|||
public class ParseException extends CrawlerException { |
|||
public ParseException() { |
|||
super(); |
|||
} |
|||
|
|||
public ParseException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public ParseException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
|
|||
public ParseException(Throwable cause) { |
|||
super(cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,83 @@ |
|||
package model; |
|||
|
|||
public class BookItem { |
|||
private String name; |
|||
private String author; |
|||
private String publisher; |
|||
private String publishYear; |
|||
private String price; |
|||
private String description; |
|||
|
|||
public BookItem() { |
|||
} |
|||
|
|||
public BookItem(String name, String author, String publisher, String publishYear, |
|||
String price, String description) { |
|||
this.name = name; |
|||
this.author = author; |
|||
this.publisher = publisher; |
|||
this.publishYear = publishYear; |
|||
this.price = price; |
|||
this.description = description; |
|||
} |
|||
|
|||
public String getName() { |
|||
return name; |
|||
} |
|||
|
|||
public void setName(String name) { |
|||
this.name = name; |
|||
} |
|||
|
|||
public String getAuthor() { |
|||
return author; |
|||
} |
|||
|
|||
public void setAuthor(String author) { |
|||
this.author = author; |
|||
} |
|||
|
|||
public String getPublisher() { |
|||
return publisher; |
|||
} |
|||
|
|||
public void setPublisher(String publisher) { |
|||
this.publisher = publisher; |
|||
} |
|||
|
|||
public String getPublishYear() { |
|||
return publishYear; |
|||
} |
|||
|
|||
public void setPublishYear(String publishYear) { |
|||
this.publishYear = publishYear; |
|||
} |
|||
|
|||
public String getPrice() { |
|||
return price; |
|||
} |
|||
|
|||
public void setPrice(String price) { |
|||
this.price = price; |
|||
} |
|||
|
|||
public String getDescription() { |
|||
return description; |
|||
} |
|||
|
|||
public void setDescription(String description) { |
|||
this.description = description; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "BookItem{" + |
|||
"name='" + name + '\'' + |
|||
", author='" + author + '\'' + |
|||
", publisher='" + publisher + '\'' + |
|||
", publishYear='" + publishYear + '\'' + |
|||
", price='" + price + '\'' + |
|||
", description='" + description + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,60 @@ |
|||
package model; |
|||
|
|||
public class HeritageItem { |
|||
private String name; |
|||
private String category; |
|||
private String region; |
|||
private String description; |
|||
|
|||
public HeritageItem() { |
|||
} |
|||
|
|||
public HeritageItem(String name, String category, String region, String description) { |
|||
this.name = name; |
|||
this.category = category; |
|||
this.region = region; |
|||
this.description = description; |
|||
} |
|||
|
|||
public String getName() { |
|||
return name; |
|||
} |
|||
|
|||
public void setName(String name) { |
|||
this.name = name; |
|||
} |
|||
|
|||
public String getCategory() { |
|||
return category; |
|||
} |
|||
|
|||
public void setCategory(String category) { |
|||
this.category = category; |
|||
} |
|||
|
|||
public String getRegion() { |
|||
return region; |
|||
} |
|||
|
|||
public void setRegion(String region) { |
|||
this.region = region; |
|||
} |
|||
|
|||
public String getDescription() { |
|||
return description; |
|||
} |
|||
|
|||
public void setDescription(String description) { |
|||
this.description = description; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "HeritageItem{" + |
|||
"name='" + name + '\'' + |
|||
", category='" + category + '\'' + |
|||
", region='" + region + '\'' + |
|||
", description='" + description + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,60 @@ |
|||
package model; |
|||
|
|||
public class PerCapitaIncome { |
|||
private String province; |
|||
private String year; |
|||
private double perCapitaIncome; |
|||
private double growthRate; |
|||
|
|||
public PerCapitaIncome() { |
|||
} |
|||
|
|||
public PerCapitaIncome(String province, String year, double perCapitaIncome, double growthRate) { |
|||
this.province = province; |
|||
this.year = year; |
|||
this.perCapitaIncome = perCapitaIncome; |
|||
this.growthRate = growthRate; |
|||
} |
|||
|
|||
public String getProvince() { |
|||
return province; |
|||
} |
|||
|
|||
public void setProvince(String province) { |
|||
this.province = province; |
|||
} |
|||
|
|||
public String getYear() { |
|||
return year; |
|||
} |
|||
|
|||
public void setYear(String year) { |
|||
this.year = year; |
|||
} |
|||
|
|||
public double getPerCapitaIncome() { |
|||
return perCapitaIncome; |
|||
} |
|||
|
|||
public void setPerCapitaIncome(double perCapitaIncome) { |
|||
this.perCapitaIncome = perCapitaIncome; |
|||
} |
|||
|
|||
public double getGrowthRate() { |
|||
return growthRate; |
|||
} |
|||
|
|||
public void setGrowthRate(double growthRate) { |
|||
this.growthRate = growthRate; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "PerCapitaIncome{" + |
|||
"province='" + province + '\'' + |
|||
", year='" + year + '\'' + |
|||
", perCapitaIncome=" + perCapitaIncome + |
|||
", growthRate=" + growthRate + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,12 @@ |
|||
package strategy; |
|||
|
|||
import exception.CrawlerException; |
|||
|
|||
import java.util.List; |
|||
|
|||
public interface CrawlStrategy<T> { |
|||
List<T> crawl() throws CrawlerException; |
|||
String getOutputFileName(); |
|||
String[] getCsvHeaders(); |
|||
void saveToFile(List<T> data) throws CrawlerException; |
|||
} |
|||
@ -0,0 +1,181 @@ |
|||
package strategy; |
|||
|
|||
import exception.CrawlerException; |
|||
import exception.ParseException; |
|||
import model.BookItem; |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import util.CsvWriterUtil; |
|||
import util.HttpUtil; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class DoubanCrawlStrategy implements CrawlStrategy<BookItem> { |
|||
private static final Logger logger = LoggerFactory.getLogger(DoubanCrawlStrategy.class); |
|||
private static final String BASE_URL = "https://book.douban.com/top250"; |
|||
private static final int TOTAL_PAGES = 10; |
|||
|
|||
@Override |
|||
public List<BookItem> crawl() throws CrawlerException { |
|||
List<BookItem> books = new ArrayList<>(); |
|||
logger.info("开始爬取豆瓣读书Top250数据..."); |
|||
|
|||
for (int page = 0; page < TOTAL_PAGES; page++) { |
|||
try { |
|||
String url = page == 0 ? BASE_URL : BASE_URL + "?start=" + (page * 25); |
|||
logger.debug("请求第 {} 页: {}", page + 1, url); |
|||
|
|||
Document doc = HttpUtil.getDocument(url); |
|||
|
|||
if (doc == null) { |
|||
logger.warn("获取第 {} 页失败", page + 1); |
|||
throw new ParseException("Failed to fetch Douban books page: " + (page + 1)); |
|||
} |
|||
|
|||
Elements bookElements = doc.select("table.item"); |
|||
if (bookElements.isEmpty()) { |
|||
bookElements = doc.select(".item"); |
|||
} |
|||
|
|||
for (Element bookElement : bookElements) { |
|||
try { |
|||
String name = extractName(bookElement); |
|||
String author = extractAuthor(bookElement); |
|||
String publisher = extractPublisher(bookElement); |
|||
String publishYear = extractPublishYear(bookElement); |
|||
String price = extractPrice(bookElement); |
|||
String description = extractDescription(bookElement); |
|||
|
|||
BookItem book = new BookItem(name, author, publisher, publishYear, |
|||
price, description); |
|||
books.add(book); |
|||
} catch (Exception e) { |
|||
continue; |
|||
} |
|||
} |
|||
|
|||
logger.debug("第 {} 页解析完成,累计 {} 条", page + 1, books.size()); |
|||
HttpUtil.randomDelay(); |
|||
} catch (CrawlerException e) { |
|||
logger.error("解析第 {} 页失败", page + 1, e); |
|||
throw new ParseException("Error parsing Douban books page " + (page + 1), e); |
|||
} |
|||
} |
|||
|
|||
logger.info("豆瓣读书Top250爬取完成,共 {} 条数据", books.size()); |
|||
return books; |
|||
} |
|||
|
|||
private String extractName(Element element) { |
|||
Element titleElement = element.selectFirst("td:nth-child(2) h2 a, .pl2 a, .title"); |
|||
return titleElement != null ? titleElement.text().trim() |
|||
.replaceAll("^\\s*", "").replaceAll("\\s*$", "") : "未知"; |
|||
} |
|||
|
|||
private String extractAuthor(Element element) { |
|||
Element authorElement = element.selectFirst("td:nth-child(2) .pl, .pub, .author"); |
|||
if (authorElement != null) { |
|||
String text = authorElement.text().trim(); |
|||
if (text.contains("/")) { |
|||
String[] parts = text.split("/"); |
|||
return parts[0].trim(); |
|||
} |
|||
return text; |
|||
} |
|||
return "未知"; |
|||
} |
|||
|
|||
private String extractPublisher(Element element) { |
|||
Element pubElement = element.selectFirst("td:nth-child(2) .pl, .pub, .publisher"); |
|||
if (pubElement != null) { |
|||
String text = pubElement.text().trim(); |
|||
String[] parts = text.split("/"); |
|||
if (parts.length >= 2) { |
|||
return parts[parts.length - 2].trim(); |
|||
} |
|||
return text; |
|||
} |
|||
return "未知"; |
|||
} |
|||
|
|||
private String extractPublishYear(Element element) { |
|||
Element yearElement = element.selectFirst("td:nth-child(2) .pl, .pub, .year"); |
|||
if (yearElement != null) { |
|||
String text = yearElement.text().trim(); |
|||
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("\\d{4}"); |
|||
java.util.regex.Matcher matcher = pattern.matcher(text); |
|||
if (matcher.find()) { |
|||
return matcher.group(); |
|||
} |
|||
} |
|||
return "未知"; |
|||
} |
|||
|
|||
private String extractPrice(Element element) { |
|||
Element priceElement = element.selectFirst("td:nth-child(2) .pl, .pub, .price"); |
|||
if (priceElement != null) { |
|||
String text = priceElement.text().trim(); |
|||
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("[0-9.]+\\s*元"); |
|||
java.util.regex.Matcher matcher = pattern.matcher(text); |
|||
if (matcher.find()) { |
|||
return matcher.group(); |
|||
} |
|||
} |
|||
return "未知"; |
|||
} |
|||
|
|||
private double extractRating(Element element) { |
|||
Element ratingElement = element.selectFirst("span.rating_nums, .rating_nums, .star"); |
|||
if (ratingElement != null) { |
|||
try { |
|||
String ratingStr = ratingElement.text().trim(); |
|||
return Double.parseDouble(ratingStr); |
|||
} catch (NumberFormatException e) { |
|||
return 0.0; |
|||
} |
|||
} |
|||
return 0.0; |
|||
} |
|||
|
|||
private int extractRatingCount(Element element) { |
|||
Element countElement = element.selectFirst("span.inq, .rating_sum, .pl"); |
|||
if (countElement != null) { |
|||
try { |
|||
String text = countElement.text().trim(); |
|||
java.util.regex.Pattern pattern = java.util.regex.Pattern.compile("\\d+"); |
|||
java.util.regex.Matcher matcher = pattern.matcher(text); |
|||
if (matcher.find()) { |
|||
return Integer.parseInt(matcher.group()); |
|||
} |
|||
} catch (NumberFormatException e) { |
|||
return 0; |
|||
} |
|||
} |
|||
return 0; |
|||
} |
|||
|
|||
private String extractDescription(Element element) { |
|||
Element descElement = element.selectFirst("p.quote, .quote, .intro"); |
|||
return descElement != null ? descElement.text().trim() : "暂无简介"; |
|||
} |
|||
|
|||
@Override |
|||
public String getOutputFileName() { |
|||
return "douban_books.csv"; |
|||
} |
|||
|
|||
@Override |
|||
public String[] getCsvHeaders() { |
|||
return new String[]{"书籍名称", "作者/译者", "出版社", "出版年份", "定价", "豆瓣评分", "评价人数", "书籍简介"}; |
|||
} |
|||
|
|||
@Override |
|||
public void saveToFile(List<BookItem> data) throws CrawlerException { |
|||
CsvWriterUtil.writeBookItems(data, getOutputFileName()); |
|||
} |
|||
} |
|||
@ -0,0 +1,138 @@ |
|||
package strategy; |
|||
|
|||
import exception.CrawlerException; |
|||
import exception.ParseException; |
|||
import model.HeritageItem; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import util.CsvWriterUtil; |
|||
import util.HttpUtil; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class HeritageCrawlStrategy implements CrawlStrategy<HeritageItem> { |
|||
private static final Logger logger = LoggerFactory.getLogger(HeritageCrawlStrategy.class); |
|||
private static final String API_URL = "https://www.ihchina.cn/Article/Index/getProject.html?province=&rx_time=&type=&cate=&keywords=&category_id=16&limit=%d&p=%d"; |
|||
private static final int BATCH_SIZE = 500; |
|||
|
|||
@Override |
|||
public List<HeritageItem> crawl() throws CrawlerException { |
|||
List<HeritageItem> items = new ArrayList<>(); |
|||
logger.info("开始爬取中国非物质文化遗产数据..."); |
|||
|
|||
try { |
|||
String url = String.format(API_URL, BATCH_SIZE, 1); |
|||
logger.debug("请求API接口: {}", url); |
|||
|
|||
String jsonText = HttpUtil.getJsonContent(url); |
|||
logger.info("成功获取API响应数据"); |
|||
|
|||
String listContent = extractJsonArray(jsonText, "list"); |
|||
|
|||
if (listContent == null || listContent.isEmpty()) { |
|||
logger.warn("未找到数据列表"); |
|||
return items; |
|||
} |
|||
|
|||
Pattern itemPattern = Pattern.compile("\\{[^}]+\\}"); |
|||
Matcher itemMatcher = itemPattern.matcher(listContent); |
|||
|
|||
while (itemMatcher.find() && items.size() < 200) { |
|||
String itemStr = itemMatcher.group(); |
|||
|
|||
String name = cleanHtml(extractJsonValue(itemStr, "title")); |
|||
String category = cleanHtml(extractJsonValue(itemStr, "type")); |
|||
String region = cleanHtml(extractJsonValue(itemStr, "province")); |
|||
String description = cleanHtml(extractJsonValue(itemStr, "content")); |
|||
|
|||
if (name == null || name.isEmpty()) { |
|||
name = "未知"; |
|||
} |
|||
if (category == null) { |
|||
category = "未知"; |
|||
} |
|||
if (region == null) { |
|||
region = "未知"; |
|||
} |
|||
if (description == null || description.isEmpty()) { |
|||
description = "暂无简介"; |
|||
} |
|||
|
|||
HeritageItem heritageItem = new HeritageItem(name, category, region, description); |
|||
items.add(heritageItem); |
|||
} |
|||
|
|||
logger.info("成功解析 {} 条非遗项目数据", items.size()); |
|||
|
|||
} catch (CrawlerException e) { |
|||
logger.error("解析非遗API响应失败", e); |
|||
throw new ParseException("Error parsing heritage API response", e); |
|||
} |
|||
|
|||
return items; |
|||
} |
|||
|
|||
private String cleanHtml(String text) { |
|||
if (text == null) { |
|||
return null; |
|||
} |
|||
text = text.replaceAll("<", "<"); |
|||
text = text.replaceAll(">", ">"); |
|||
text = text.replaceAll(" ", " "); |
|||
text = text.replaceAll("<[^>]*>", ""); |
|||
text = text.replaceAll("\\\\/", "/"); |
|||
text = text.replace("\\r\\n", " "); |
|||
text = text.replace("\\n", " "); |
|||
text = text.replace("\\r", " "); |
|||
text = text.replaceAll("\\s+", " "); |
|||
return text.trim(); |
|||
} |
|||
|
|||
private String extractJsonArray(String text, String key) { |
|||
Pattern pattern = Pattern.compile("\"" + key + "\"\\s*:\\s*\\[([^\\]]+)\\]"); |
|||
Matcher matcher = pattern.matcher(text); |
|||
if (matcher.find()) { |
|||
return matcher.group(1); |
|||
} |
|||
return null; |
|||
} |
|||
|
|||
private String extractJsonValue(String jsonStr, String key) { |
|||
Pattern pattern = Pattern.compile("\"" + key + "\"\\s*:\\s*\"([^\"]*)\""); |
|||
Matcher matcher = pattern.matcher(jsonStr); |
|||
if (matcher.find()) { |
|||
String value = matcher.group(1); |
|||
return decodeUnicode(value); |
|||
} |
|||
return null; |
|||
} |
|||
|
|||
private String decodeUnicode(String str) { |
|||
Pattern pattern = Pattern.compile("\\\\u([0-9a-fA-F]{4})"); |
|||
Matcher matcher = pattern.matcher(str); |
|||
StringBuilder sb = new StringBuilder(); |
|||
while (matcher.find()) { |
|||
matcher.appendReplacement(sb, String.valueOf((char) Integer.parseInt(matcher.group(1), 16))); |
|||
} |
|||
matcher.appendTail(sb); |
|||
return sb.toString(); |
|||
} |
|||
|
|||
@Override |
|||
public String getOutputFileName() { |
|||
return "intangible_heritage.csv"; |
|||
} |
|||
|
|||
@Override |
|||
public String[] getCsvHeaders() { |
|||
return new String[]{"非遗项目名称", "所属类别", "申报地区/单位", "项目简介"}; |
|||
} |
|||
|
|||
@Override |
|||
public void saveToFile(List<HeritageItem> data) throws CrawlerException { |
|||
CsvWriterUtil.writeHeritageItems(data, getOutputFileName()); |
|||
} |
|||
} |
|||
@ -0,0 +1,103 @@ |
|||
package strategy; |
|||
|
|||
import exception.CrawlerException; |
|||
import model.PerCapitaIncome; |
|||
import org.jsoup.nodes.Document; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import util.CsvWriterUtil; |
|||
import util.HttpUtil; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class PerCapitaIncomeCrawlStrategy implements CrawlStrategy<PerCapitaIncome> { |
|||
private static final Logger logger = LoggerFactory.getLogger(PerCapitaIncomeCrawlStrategy.class); |
|||
|
|||
private static final String[] URLs = { |
|||
"https://www.stats.gov.cn/zwfwck/sjfb/202502/t20250228_1958817.html", // 2024年
|
|||
"https://www.stats.gov.cn/sj/zxfb/202401/t20240116_1946622.html", // 2023年
|
|||
"https://www.stats.gov.cn/sj/zxfb/202302/t20230203_1901715.html", // 2022年
|
|||
"https://www.stats.gov.cn/xxgk/sjfb/zxfb2020/202201/t20220117_1826436.html", // 2021年
|
|||
"https://www.stats.gov.cn/xxgk/jd/sjjd2020/202101/t20210119_1812623.html", // 2020年
|
|||
"https://www.stats.gov.cn/WZWSREL3NqL3p4ZmIvMjAyMzAyL3QyMDIzMDIwM18xOTAwNjAwLmh0bWw=?wzwschallenge=V1pXU19DT05GSVJNX1BSRUZJWF9MQUJFTDc1OTIyNzA=&wzwsinfos=eyJob3N0bmFtZSI6Ind3dy5zdGF0cy5nb3YuY24iLCJzY2hlbWUiOiJodHRwcyIsInZlcmlmeSI6IjMwODIxMDk5ZGQ1MjhjZWNiYzZiZDgxZjc0ODY5MTJhNmFmNWExYmMwZDFlODRlNjg3NTkwMjNhMzJmNjc5NjI3ZTc4MzNhYTQxN2MzNWIxMmE0NTU4YTc2OGZhMzMyMTA3ZDkzZSJ9" // 2019年
|
|||
}; |
|||
|
|||
private static final int[] YEARS = {2024, 2023, 2022, 2021, 2020, 2019}; |
|||
|
|||
@Override |
|||
public List<PerCapitaIncome> crawl() throws CrawlerException { |
|||
List<PerCapitaIncome> dataList = new ArrayList<>(); |
|||
logger.info("开始爬取全国居民人均可支配收入数据(2019-2024年)..."); |
|||
|
|||
for (int i = 0; i < URLs.length; i++) { |
|||
String url = URLs[i]; |
|||
int year = YEARS[i]; |
|||
logger.info("正在爬取 {} 年数据...", year); |
|||
|
|||
Document doc = HttpUtil.getDocument(url); |
|||
String text = doc.text(); |
|||
|
|||
PerCapitaIncome income = extractNationalIncome(text, String.valueOf(year)); |
|||
if (income == null) { |
|||
income = extractNationalIncomeAlt(text, String.valueOf(year)); |
|||
} |
|||
|
|||
if (income != null) { |
|||
dataList.add(income); |
|||
logger.info("成功爬取 {} 年数据: {}元, 增速: {}%", year, income.getPerCapitaIncome(), income.getGrowthRate()); |
|||
} |
|||
} |
|||
|
|||
logger.info("人均可支配收入数据爬取完成,共 {} 年数据", dataList.size()); |
|||
|
|||
return dataList; |
|||
} |
|||
|
|||
private PerCapitaIncome extractNationalIncome(String text, String year) { |
|||
Pattern pattern = Pattern.compile("全国居民人均可支配收入(\\d+)元,比上年名义增长([0-9.]+)%"); |
|||
Matcher matcher = pattern.matcher(text); |
|||
|
|||
if (matcher.find()) { |
|||
PerCapitaIncome income = new PerCapitaIncome(); |
|||
income.setYear(year); |
|||
income.setProvince("全国"); |
|||
income.setPerCapitaIncome(Double.parseDouble(matcher.group(1))); |
|||
income.setGrowthRate(Double.parseDouble(matcher.group(2))); |
|||
return income; |
|||
} |
|||
return null; |
|||
} |
|||
|
|||
private PerCapitaIncome extractNationalIncomeAlt(String text, String year) { |
|||
Pattern pattern = Pattern.compile("全年全国居民人均可支配收入(\\d+)元,比上年增长([0-9.]+)%"); |
|||
Matcher matcher = pattern.matcher(text); |
|||
|
|||
if (matcher.find()) { |
|||
PerCapitaIncome income = new PerCapitaIncome(); |
|||
income.setYear(year); |
|||
income.setProvince("全国"); |
|||
income.setPerCapitaIncome(Double.parseDouble(matcher.group(1))); |
|||
income.setGrowthRate(Double.parseDouble(matcher.group(2))); |
|||
return income; |
|||
} |
|||
return null; |
|||
} |
|||
|
|||
@Override |
|||
public String getOutputFileName() { |
|||
return "per_capita_income.csv"; |
|||
} |
|||
|
|||
@Override |
|||
public String[] getCsvHeaders() { |
|||
return new String[]{"地区/全国", "年份", "人均可支配收入(元)", "同比增速(%)"}; |
|||
} |
|||
|
|||
@Override |
|||
public void saveToFile(List<PerCapitaIncome> data) throws CrawlerException { |
|||
CsvWriterUtil.writePerCapitaIncome(data, getOutputFileName()); |
|||
} |
|||
} |
|||
@ -0,0 +1,120 @@ |
|||
package util; |
|||
|
|||
import exception.FileWriteException; |
|||
import model.BookItem; |
|||
import model.HeritageItem; |
|||
import model.PerCapitaIncome; |
|||
import org.apache.commons.csv.CSVFormat; |
|||
import org.apache.commons.csv.CSVPrinter; |
|||
|
|||
import java.io.BufferedWriter; |
|||
import java.io.IOException; |
|||
import java.nio.file.Files; |
|||
import java.nio.file.Paths; |
|||
import java.util.List; |
|||
|
|||
public class CsvWriterUtil { |
|||
|
|||
public static void writeHeritageItems(List<HeritageItem> items, String fileName) throws FileWriteException { |
|||
String[] headers = {"非遗项目名称", "所属类别", "申报地区/单位", "项目简介"}; |
|||
try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(fileName)); |
|||
CSVPrinter printer = new CSVPrinter(writer, CSVFormat.DEFAULT.builder() |
|||
.setHeader(headers) |
|||
.build())) { |
|||
|
|||
for (HeritageItem item : items) { |
|||
printer.printRecord( |
|||
escapeCsvField(item.getName()), |
|||
escapeCsvField(item.getCategory()), |
|||
escapeCsvField(item.getRegion()), |
|||
escapeCsvField(item.getDescription()) |
|||
); |
|||
} |
|||
printer.flush(); |
|||
} catch (IOException e) { |
|||
throw new FileWriteException("Failed to write CSV file: " + fileName, e); |
|||
} |
|||
} |
|||
|
|||
public static void writeBookItems(List<BookItem> books, String fileName) throws FileWriteException { |
|||
String[] headers = {"书籍名称", "作者/译者", "出版社", "出版年份", "定价", "书籍简介"}; |
|||
try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(fileName)); |
|||
CSVPrinter printer = new CSVPrinter(writer, CSVFormat.DEFAULT.builder() |
|||
.setHeader(headers) |
|||
.build())) { |
|||
|
|||
for (BookItem item : books) { |
|||
printer.printRecord( |
|||
escapeCsvField(item.getName()), |
|||
escapeCsvField(item.getAuthor()), |
|||
escapeCsvField(item.getPublisher()), |
|||
escapeCsvField(item.getPublishYear()), |
|||
escapeCsvField(item.getPrice()), |
|||
escapeCsvField(item.getDescription()) |
|||
); |
|||
} |
|||
printer.flush(); |
|||
} catch (IOException e) { |
|||
throw new FileWriteException("Failed to write CSV file: " + fileName, e); |
|||
} |
|||
} |
|||
|
|||
public static void writePerCapitaIncome(List<PerCapitaIncome> data, String fileName) throws FileWriteException { |
|||
String[] headers = {"地区/全国", "年份", "人均可支配收入(元)", "同比增速(%)"}; |
|||
try (BufferedWriter writer = Files.newBufferedWriter(Paths.get(fileName)); |
|||
CSVPrinter printer = new CSVPrinter(writer, CSVFormat.DEFAULT.builder() |
|||
.setHeader(headers) |
|||
.build())) { |
|||
|
|||
for (PerCapitaIncome item : data) { |
|||
printer.printRecord( |
|||
item.getProvince(), |
|||
item.getYear(), |
|||
formatNumber(item.getPerCapitaIncome()), |
|||
formatRate(item.getGrowthRate()) |
|||
); |
|||
} |
|||
printer.flush(); |
|||
} catch (IOException e) { |
|||
throw new FileWriteException("Failed to write CSV file: " + fileName, e); |
|||
} |
|||
} |
|||
|
|||
private static String formatNumber(double value) { |
|||
return (value == Math.floor(value)) ? String.valueOf((int) value) : String.valueOf(value); |
|||
} |
|||
|
|||
private static String formatRate(double value) { |
|||
return String.valueOf(value); |
|||
} |
|||
|
|||
private static String escapeCsvField(String field) { |
|||
if (field == null) { |
|||
return ""; |
|||
} |
|||
field = cleanHtml(field); |
|||
field = field.trim().replaceAll("\\s+", " "); |
|||
if (field.contains(",") || field.contains("\"") || field.contains("\n")) { |
|||
return "\"" + field.replace("\"", "\"\"") + "\""; |
|||
} |
|||
return field; |
|||
} |
|||
|
|||
private static String cleanHtml(String text) { |
|||
if (text == null) return ""; |
|||
text = text.replaceAll("<[^>]*>", ""); |
|||
text = text.replaceAll(" ", " "); |
|||
text = text.replaceAll("<", "<"); |
|||
text = text.replaceAll(">", ">"); |
|||
text = text.replaceAll("&", "&"); |
|||
text = text.replaceAll(""", "\""); |
|||
text = text.replaceAll("'", "'"); |
|||
text = text.replaceAll("\\\\/", "/"); |
|||
text = text.replaceAll("\\s*\\\\r\\\\n\\s*", " "); |
|||
text = text.replaceAll("\\s*\\\\n\\s*", " "); |
|||
text = text.replaceAll("\\s*\\\\r\\s*", " "); |
|||
text = text.replaceAll("\r\n", "\n"); |
|||
text = text.replaceAll("\r", "\n"); |
|||
return text; |
|||
} |
|||
} |
|||
@ -0,0 +1,100 @@ |
|||
package util; |
|||
|
|||
import exception.HttpRequestException; |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
|
|||
import javax.net.ssl.*; |
|||
import java.io.BufferedReader; |
|||
import java.io.IOException; |
|||
import java.io.InputStreamReader; |
|||
import java.net.HttpURLConnection; |
|||
import java.net.URL; |
|||
import java.security.cert.X509Certificate; |
|||
import java.util.Random; |
|||
import java.util.concurrent.TimeUnit; |
|||
|
|||
public class HttpUtil { |
|||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"; |
|||
private static final int TIMEOUT_MS = 30000; |
|||
private static final Random RANDOM = new Random(); |
|||
|
|||
static { |
|||
try { |
|||
SSLContext sslContext = SSLContext.getInstance("TLS"); |
|||
sslContext.init(null, new TrustManager[]{new X509TrustManager() { |
|||
public X509Certificate[] getAcceptedIssuers() { |
|||
return new X509Certificate[0]; |
|||
} |
|||
public void checkClientTrusted(X509Certificate[] certs, String authType) { |
|||
} |
|||
public void checkServerTrusted(X509Certificate[] certs, String authType) { |
|||
} |
|||
}}, new java.security.SecureRandom()); |
|||
HttpsURLConnection.setDefaultSSLSocketFactory(sslContext.getSocketFactory()); |
|||
HttpsURLConnection.setDefaultHostnameVerifier(new HostnameVerifier() { |
|||
public boolean verify(String hostname, SSLSession session) { |
|||
return true; |
|||
} |
|||
}); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
|
|||
public static Document getDocument(String url) throws HttpRequestException { |
|||
try { |
|||
Document doc = Jsoup.connect(url) |
|||
.userAgent(USER_AGENT) |
|||
.timeout(TIMEOUT_MS) |
|||
.maxBodySize(0) |
|||
.get(); |
|||
return doc; |
|||
} catch (IOException e) { |
|||
throw new HttpRequestException("Failed to fetch URL: " + url, e); |
|||
} |
|||
} |
|||
|
|||
public static String getJsonContent(String url) throws HttpRequestException { |
|||
try { |
|||
URL urlObj = new URL(url); |
|||
HttpURLConnection conn = (HttpURLConnection) urlObj.openConnection(); |
|||
conn.setRequestMethod("GET"); |
|||
conn.setRequestProperty("User-Agent", USER_AGENT); |
|||
conn.setConnectTimeout(TIMEOUT_MS); |
|||
conn.setReadTimeout(TIMEOUT_MS); |
|||
|
|||
int responseCode = conn.getResponseCode(); |
|||
if (responseCode != 200) { |
|||
throw new HttpRequestException("HTTP error code: " + responseCode + " for URL: " + url); |
|||
} |
|||
|
|||
BufferedReader reader = new BufferedReader(new InputStreamReader(conn.getInputStream(), "UTF-8")); |
|||
StringBuilder response = new StringBuilder(); |
|||
String line; |
|||
while ((line = reader.readLine()) != null) { |
|||
response.append(line); |
|||
} |
|||
reader.close(); |
|||
conn.disconnect(); |
|||
|
|||
return response.toString(); |
|||
} catch (IOException e) { |
|||
throw new HttpRequestException("Failed to fetch JSON from URL: " + url, e); |
|||
} |
|||
} |
|||
|
|||
public static void randomDelay() throws HttpRequestException { |
|||
int delayMillis = 1000 + RANDOM.nextInt(2000); |
|||
try { |
|||
TimeUnit.MILLISECONDS.sleep(delayMillis); |
|||
} catch (InterruptedException e) { |
|||
Thread.currentThread().interrupt(); |
|||
throw new HttpRequestException("Delay interrupted", e); |
|||
} |
|||
} |
|||
|
|||
public static String getUserAgent() { |
|||
return USER_AGENT; |
|||
} |
|||
} |
|||
@ -0,0 +1,77 @@ |
|||
package view; |
|||
|
|||
import command.Command; |
|||
|
|||
import java.util.List; |
|||
import java.util.Scanner; |
|||
|
|||
public class CliView { |
|||
private final Scanner scanner; |
|||
|
|||
public CliView() { |
|||
this.scanner = new Scanner(System.in); |
|||
} |
|||
|
|||
public void displayWelcome() { |
|||
System.out.println("========================================"); |
|||
System.out.println(" Java 爬虫课程作业系统"); |
|||
System.out.println("========================================"); |
|||
System.out.println(); |
|||
} |
|||
|
|||
public void displayMenu(List<Command> commands) { |
|||
System.out.println("请选择要执行的爬虫任务:"); |
|||
System.out.println(); |
|||
for (int i = 0; i < commands.size(); i++) { |
|||
System.out.println(" " + (i + 1) + ". " + commands.get(i).getName()); |
|||
} |
|||
System.out.println(" 0. 退出系统"); |
|||
System.out.println(); |
|||
System.out.print("请输入选项: "); |
|||
} |
|||
|
|||
public int getUserChoice() { |
|||
try { |
|||
String input = scanner.nextLine().trim(); |
|||
return Integer.parseInt(input); |
|||
} catch (NumberFormatException e) { |
|||
return -1; |
|||
} |
|||
} |
|||
|
|||
public void displayMessage(String message) { |
|||
System.out.println("[INFO] " + message); |
|||
} |
|||
|
|||
public void displayError(String message) { |
|||
System.out.println("[ERROR] " + message); |
|||
} |
|||
|
|||
public void displayErrorWithStackTrace(Exception e) { |
|||
System.out.println("[ERROR] " + e.getMessage()); |
|||
e.printStackTrace(); |
|||
} |
|||
|
|||
public void displaySuccess(String message) { |
|||
System.out.println("[SUCCESS] " + message); |
|||
} |
|||
|
|||
public void displayDivider() { |
|||
System.out.println("----------------------------------------"); |
|||
} |
|||
|
|||
public void displayExitMessage() { |
|||
System.out.println(); |
|||
System.out.println("感谢使用,再见!"); |
|||
} |
|||
|
|||
public void displayInvalidChoice() { |
|||
System.out.println("[WARNING] 无效的选项,请重新输入!"); |
|||
} |
|||
|
|||
public void close() { |
|||
if (scanner != null) { |
|||
scanner.close(); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,26 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<configuration> |
|||
<property name="LOG_HOME" value="." /> |
|||
<property name="LOG_FILE" value="${LOG_HOME}/crawler.log" /> |
|||
|
|||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder> |
|||
<charset>GBK</charset> |
|||
<pattern>[%d{HH:mm:ss}] [%level] %logger{36} - %msg%n</pattern> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<appender name="FILE" class="ch.qos.logback.core.FileAppender"> |
|||
<file>${LOG_FILE}</file> |
|||
<append>true</append> |
|||
<encoder> |
|||
<charset>UTF-8</charset> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<root level="info"> |
|||
<appender-ref ref="STDOUT" /> |
|||
<appender-ref ref="FILE" /> |
|||
</root> |
|||
</configuration> |
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,26 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<configuration> |
|||
<property name="LOG_HOME" value="." /> |
|||
<property name="LOG_FILE" value="${LOG_HOME}/crawler.log" /> |
|||
|
|||
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder> |
|||
<charset>GBK</charset> |
|||
<pattern>[%d{HH:mm:ss}] [%level] %logger{36} - %msg%n</pattern> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<appender name="FILE" class="ch.qos.logback.core.FileAppender"> |
|||
<file>${LOG_FILE}</file> |
|||
<append>true</append> |
|||
<encoder> |
|||
<charset>UTF-8</charset> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<root level="info"> |
|||
<appender-ref ref="STDOUT" /> |
|||
<appender-ref ref="FILE" /> |
|||
</root> |
|||
</configuration> |
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,30 @@ |
|||
command\DoubanCommand.class |
|||
NetworkTest.class |
|||
strategy\HeritageCrawlStrategy.class |
|||
command\Command.class |
|||
util\HttpUtil$1.class |
|||
IncomeTest.class |
|||
DebugHeritage.class |
|||
strategy\CrawlStrategy.class |
|||
Main.class |
|||
DoubanTest.class |
|||
HeritageTest.class |
|||
model\PerCapitaIncome.class |
|||
controller\CrawlerController.class |
|||
command\CommandInvoker.class |
|||
DebugJson.class |
|||
command\PerCapitaIncomeCommand.class |
|||
TestRunner.class |
|||
util\HttpUtil$2.class |
|||
exception\CrawlerException.class |
|||
exception\FileWriteException.class |
|||
strategy\PerCapitaIncomeCrawlStrategy.class |
|||
model\BookItem.class |
|||
exception\ParseException.class |
|||
view\CliView.class |
|||
command\HeritageCommand.class |
|||
strategy\DoubanCrawlStrategy.class |
|||
util\CsvWriterUtil.class |
|||
util\HttpUtil.class |
|||
exception\HttpRequestException.class |
|||
model\HeritageItem.class |
|||
@ -0,0 +1,28 @@ |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\Main.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\model\BookItem.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\exception\FileWriteException.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\command\PerCapitaIncomeCommand.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\DebugHeritage.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\exception\ParseException.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\HeritageTest.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\DebugJson.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\model\HeritageItem.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\exception\HttpRequestException.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\NetworkTest.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\strategy\HeritageCrawlStrategy.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\exception\CrawlerException.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\model\PerCapitaIncome.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\strategy\DoubanCrawlStrategy.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\command\DoubanCommand.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\command\HeritageCommand.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\util\HttpUtil.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\view\CliView.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\DoubanTest.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\strategy\PerCapitaIncomeCrawlStrategy.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\command\Command.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\IncomeTest.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\TestRunner.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\controller\CrawlerController.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\strategy\CrawlStrategy.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\command\CommandInvoker.java |
|||
E:\新建文件夹 (2)\java\爬虫项目\src\main\java\util\CsvWriterUtil.java |
|||
Loading…
Reference in new issue