Browse Source

feat:final project submission,all files organized in project folder

master
unknown 4 weeks ago
commit
8adb77d146
  1. 1
      .gitattributes
  2. 1
      .gitignore
  3. 3
      .vscode/settings.json
  4. BIN
      project/202506050104-裴思爽-期末实验报告.docx
  5. 325
      project/README.md
  6. 300
      project/class_diagram.puml
  7. 344
      project/class_diagram_ascii.txt
  8. BIN
      project/data/commodity.db
  9. 93
      project/dependency-reduced-pom.xml
  10. BIN
      project/logs/crawler.2026-05-17.log
  11. BIN
      project/logs/crawler.2026-05-21.log
  12. BIN
      project/logs/crawler.2026-05-23.log
  13. BIN
      project/logs/crawler.log
  14. BIN
      project/output/charts/correlation.png
  15. BIN
      project/output/charts/cycle.png
  16. BIN
      project/output/charts/price_trend.png
  17. 154
      project/output/charts/report.html
  18. BIN
      project/output/charts/sentiment.png
  19. BIN
      project/output/charts/volatility.png
  20. BIN
      project/output/excel/charts/correlation.png
  21. BIN
      project/output/excel/charts/cycle.png
  22. BIN
      project/output/excel/charts/price_trend.png
  23. 154
      project/output/excel/charts/report.html
  24. BIN
      project/output/excel/charts/sentiment.png
  25. BIN
      project/output/excel/charts/volatility.png
  26. BIN
      project/output/excel/commodity_data_20260521_111758.xlsx
  27. BIN
      project/output/excel/commodity_data_20260521_112305.xlsx
  28. BIN
      project/output/excel/commodity_data_20260521_113352.xlsx
  29. BIN
      project/output/excel/commodity_data_20260521_141200.xlsx
  30. BIN
      project/output/excel/commodity_data_20260521_152221.xlsx
  31. BIN
      project/output/excel/commodity_data_20260523_200312.xlsx
  32. 3226
      project/output/excel/commodity_data_20260523_201023.csv
  33. 23222
      project/output/excel/commodity_data_20260523_201858.json
  34. 30962
      project/output/excel/commodity_data_20260523_204554.json
  35. BIN
      project/output/excel/commodity_data_20260524_003053.xlsx
  36. 28300
      project/output/excel/commodity_data_20260524_003725.csv
  37. 28300
      project/output/excel/commodity_data_20260524_005518.csv
  38. 367889
      project/output/excel/commodity_data_20260524_005753.json
  39. BIN
      project/output/report/chinese_report_1779547683095.pdf
  40. BIN
      project/output/report/commodity_report_20260524_010334.pdf
  41. 152
      project/pom.xml
  42. 163
      project/src/main/java/com/example/crawler/CrawlMain.java
  43. 221
      project/src/main/java/com/example/crawler/InteractiveCLI.java
  44. 67
      project/src/main/java/com/example/crawler/TestPdfGenerator.java
  45. 36
      project/src/main/java/com/example/crawler/command/Command.java
  46. 139
      project/src/main/java/com/example/crawler/command/CommandInvoker.java
  47. 73
      project/src/main/java/com/example/crawler/command/CrawlCommand.java
  48. 31
      project/src/main/java/com/example/crawler/command/ExitCommand.java
  49. 101
      project/src/main/java/com/example/crawler/command/ExportDataCommand.java
  50. 70
      project/src/main/java/com/example/crawler/command/ExportExcelCommand.java
  51. 55
      project/src/main/java/com/example/crawler/command/GenerateChartCommand.java
  52. 84
      project/src/main/java/com/example/crawler/command/GenerateReportCommand.java
  53. 55
      project/src/main/java/com/example/crawler/command/MonitorCommand.java
  54. 87
      project/src/main/java/com/example/crawler/command/ViewDataCommand.java
  55. 79
      project/src/main/java/com/example/crawler/controller/CrawlerController.java
  56. 11
      project/src/main/java/com/example/crawler/exception/BaseCrawlException.java
  57. 11
      project/src/main/java/com/example/crawler/exception/DbException.java
  58. 11
      project/src/main/java/com/example/crawler/exception/NetworkException.java
  59. 11
      project/src/main/java/com/example/crawler/exception/ParamException.java
  60. 11
      project/src/main/java/com/example/crawler/exception/ParseException.java
  61. 27
      project/src/main/java/com/example/crawler/mapper/IndexDataMapper.java
  62. 27
      project/src/main/java/com/example/crawler/mapper/MarketDataMapper.java
  63. 27
      project/src/main/java/com/example/crawler/mapper/NewsDataMapper.java
  64. 115
      project/src/main/java/com/example/crawler/model/IndexData.java
  65. 125
      project/src/main/java/com/example/crawler/model/MarketData.java
  66. 93
      project/src/main/java/com/example/crawler/model/NewsData.java
  67. 196
      project/src/main/java/com/example/crawler/monitor/DataBroadcaster.java
  68. 107
      project/src/main/java/com/example/crawler/monitor/PriceSnapshot.java
  69. 103
      project/src/main/java/com/example/crawler/repository/IndexDataRepository.java
  70. 112
      project/src/main/java/com/example/crawler/repository/MarketDataRepository.java
  71. 112
      project/src/main/java/com/example/crawler/repository/NewsDataRepository.java
  72. 13
      project/src/main/java/com/example/crawler/strategy/CrawlStrategy.java
  73. 28
      project/src/main/java/com/example/crawler/strategy/CrawlStrategyFactory.java
  74. 147
      project/src/main/java/com/example/crawler/strategy/EastMoneyCrawlStrategy.java
  75. 138
      project/src/main/java/com/example/crawler/strategy/JinTouCrawlStrategy.java
  76. 162
      project/src/main/java/com/example/crawler/strategy/TongHuaShunCrawlStrategy.java
  77. 52
      project/src/main/java/com/example/crawler/util/ConfigUtil.java
  78. 165
      project/src/main/java/com/example/crawler/util/DataValidator.java
  79. 85
      project/src/main/java/com/example/crawler/util/DateTypeHandler.java
  80. 57
      project/src/main/java/com/example/crawler/util/DateUtil.java
  81. 97
      project/src/main/java/com/example/crawler/util/ExcelExporter.java
  82. 69
      project/src/main/java/com/example/crawler/util/HttpUtil.java
  83. 159
      project/src/main/java/com/example/crawler/util/MyBatisUtil.java
  84. 380
      project/src/main/java/com/example/crawler/util/PdfReportGenerator.java
  85. 54
      project/src/main/java/com/example/crawler/util/ThreadPoolUtil.java
  86. 31
      project/src/main/java/com/example/crawler/util/UserAgentUtil.java
  87. 73
      project/src/main/java/com/example/crawler/util/exporter/CsvExporter.java
  88. 10
      project/src/main/java/com/example/crawler/util/exporter/DataExporter.java
  89. 30
      project/src/main/java/com/example/crawler/util/exporter/DataExporterFactory.java
  90. 41
      project/src/main/java/com/example/crawler/util/exporter/JsonExporter.java
  91. 361
      project/src/main/java/com/example/crawler/visualization/ChartGenerator.java
  92. 198
      project/src/main/java/com/example/crawler/visualization/HtmlReportGenerator.java
  93. 13
      project/src/main/resources/application.properties
  94. 2
      project/src/main/resources/h2-init.sql
  95. 29
      project/src/main/resources/logback.xml
  96. 46
      project/src/main/resources/mapper/IndexDataMapper.xml
  97. 46
      project/src/main/resources/mapper/MarketDataMapper.xml
  98. 46
      project/src/main/resources/mapper/NewsDataMapper.xml
  99. 28
      project/src/main/resources/mybatis-config.xml
  100. 44
      project/src/main/resources/schema.sql

1
.gitattributes

@ -0,0 +1 @@
*.log filter=lfs diff=lfs merge=lfs -text

1
.gitignore

@ -0,0 +1 @@
.log logs/target/ .vscode/

3
.vscode/settings.json

@ -0,0 +1,3 @@
{
"java.configuration.updateBuildConfiguration": "interactive"
}

BIN
project/202506050104-裴思爽-期末实验报告.docx

Binary file not shown.

325
project/README.md

@ -0,0 +1,325 @@
# 大宗商品爬虫系统
## 项目概述
本项目为Java语言开发的大宗商品数据爬虫与可视化分析系统,核心目标是通过一套统一框架,爬取金投网、东方财富网、同花顺财经3个不同网站的相关数据,实现海量数据采集、存储、分析与可视化。
## 技术架构
### 分层架构
- **控制层(Controller)**: CrawlerController,统一调度爬虫策略
- **模型层(Model)**: 封装业务实体(行情、指数、舆情数据)
- **视图层(View)**: 基于JFreeChart实现可视化图表,支持HTML报告
- **策略层(Strategy)**: 统一爬虫策略接口,各站点实现具体策略
- **工厂层(Factory)**: 爬虫策略工厂,动态创建策略实例
- **仓储层(Repository)**: 数据持久化操作封装
- **命令层(Command)**: 基于Command模式实现命令管理
- **监控层(Monitor)**: WebSocket实时数据广播
### 设计模式
- **策略模式**: 站点爬取逻辑解耦
- **工厂模式**: 策略实例创建与管理
- **MVC模式**: 分层架构
- **命令模式**: 命令封装与执行
- **仓储模式**: 数据访问层抽象
### 技术栈
- Java 1.8+
- OkHttp3 (网络请求)
- Jsoup (网页解析)
- SQLite + MyBatis (数据持久化)
- JFreeChart (可视化图表)
- Apache POI (Excel导出)
- Gson (JSON处理)
- Apache PDFBox (PDF报告,中文黑体支持)
- Java-WebSocket (实时监控)
- SLF4J + Logback (日志)
## 快速开始
### 环境要求
- JDK 1.8+
- Maven 3.6+
- Windows系统(PDF报告生成需要黑体字体)
### 构建项目
```bash
cd commodity-crawler
mvn clean package -DskipTests
```
### 运行爬虫
```bash
# 启动交互式菜单(推荐)
java -jar target/commodity-crawler-1.0.0.jar
# 爬取所有站点(默认30页)
java -jar target/commodity-crawler-1.0.0.jar -s all -p 30
# 爬取指定站点
java -jar target/commodity-crawler-1.0.0.jar -s jintou -p 5
# 爬取并生成分析图表
java -jar target/commodity-crawler-1.0.0.jar -s all -p 30 -a
# 爬取并导出CSV
java -jar target/commodity-crawler-1.0.0.jar -s all -e csv
# 生成PDF分析报告
java -jar target/commodity-crawler-1.0.0.jar --report
# 爬取+导出+图表+报告+监控
java -jar target/commodity-crawler-1.0.0.jar -s all -e json -a -r -m
```
### 命令行参数
| 参数 | 说明 | 默认值 |
|------|------|--------|
| -s, --site | 指定爬取站点 (jintou/eastmoney/tonghuashun/all) | all |
| -p, --pages | 指定爬取页数 | 30 |
| -a, --analyze | 执行数据分析并生成可视化图表 | false |
| -e, --export [格式] | 导出数据 (excel/csv/json,默认excel) | - |
| -r, --report | 生成PDF分析报告 | false |
| -m, --monitor | 启动WebSocket实时监控服务 | false |
| -h, --help | 显示帮助信息 | - |
## 功能特性
### 1. 数据爬取
- 支持金投网、东方财富网、同花顺财经三个站点
- 模拟数据生成(避免真实网络请求)
- 支持多线程并发爬取
- 完善的重试机制
- **默认爬取30页数据**
### 2. 数据导出
- **Excel格式** (.xlsx) - 支持格式化表格输出,Apache POI实现
- **CSV格式** (.csv) - 支持UTF-8编码,Excel兼容
- **JSON格式** (.json) - 结构化数据输出,Gson处理
导出目录:`./output/excel/`
### 3. 可视化分析
- 价格趋势对比分析(多折线图)
- 波动特征分析(柱状图)
- 相关性分析(散点图)
- 舆情联动分析
图表输出目录:`./output/charts/`
### 4. PDF报告生成
- 自动生成专业分析报告(中文)
- 黑体(simhei.ttf)字体支持
- 包含封面、目录、市场概览、数据表格
- **报告内容**:市场概览、价格趋势分析、波动率分析、相关性分析、情绪分析、数据统计表
- 报告输出目录:`./output/report/`
### 5. 实时监控大屏
- WebSocket实时数据推送
- 多商品实时价格监控面板
- 支持ECharts可视化
- 市场情绪分析
- 监控页面:`src/main/resources/webapp/monitor.html`
## 爬取站点
### 1. 金投网 (jintou)
- 爬取内容:黄金、白银、原油历史行情
- 数据字段:交易日期、品种、开盘价、收盘价、最高价、最低价、成交量、涨跌幅
### 2. 东方财富网 (eastmoney)
- 爬取内容:大宗商品板块指数、相关概念股行情
- 数据字段:指数名称、日期、指数值、涨跌幅、概念股名称、股价、换手率
### 3. 同花顺财经 (tonghuashun)
- 爬取内容:大宗商品相关财经新闻、市场评论
- 数据字段:新闻标题、内容、发布时间、关联商品、舆情倾向
## 异常处理
系统设计了完整的异常处理层次:
- **BaseCrawlException**: 自定义异常父类
- **NetworkException**: 网络异常(支持重试机制)
- **ParseException**: 网页解析异常
- **DbException**: 数据库异常
- **ParamException**: 参数异常
### 重试机制
- 重试次数:可配置(默认3次)
- 重试间隔:递增间隔(1s、3s、5s)
- 续爬能力:断网后自动重试未完成任务
## 配置说明
配置文件:`src/main/resources/application.properties`
```properties
# 数据库配置
db.driver=org.sqlite.JDBC
db.url=jdbc:sqlite:./data/example_db.sqlite
# 爬虫配置
crawl.page.count=30 # 默认爬取页数(已从10调整为30)
crawl.retry.count=3 # 重试次数
crawl.retry.delay.initial=1000 # 初始重试间隔(ms)
crawl.retry.delay.multiplier=2 # 重试间隔倍数
crawl.request.interval=2000 # 请求间隔(ms)
# 线程池配置
thread.pool.core.size=5
thread.pool.max.size=10
# 输出配置
output.chart.dir=./output/charts/
output.excel.dir=./output/excel/
output.report.dir=./output/report/
output.log.dir=./logs/
# WebSocket配置
websocket.port=8080
```
## 项目结构
```
commodity-crawler/
├── src/main/java/com/example/crawler/
│ ├── CrawlMain.java # 主启动类
│ ├── InteractiveCLI.java # 交互式命令行界面
│ ├── command/ # 命令模式实现
│ │ ├── Command.java # 命令接口
│ │ ├── CommandInvoker.java # 命令调用者
│ │ ├── CrawlCommand.java # 爬取命令
│ │ ├── ExportDataCommand.java # 数据导出命令
│ │ ├── GenerateChartCommand.java # 图表生成命令
│ │ ├── GenerateReportCommand.java # PDF报告命令
│ │ ├── MonitorCommand.java # 实时监控命令
│ │ └── ViewDataCommand.java # 数据查看命令
│ ├── controller/
│ │ └── CrawlerController.java # 爬虫控制器
│ ├── exception/ # 异常类
│ ├── mapper/ # MyBatis Mapper
│ ├── model/ # 数据模型
│ │ ├── MarketData.java # 行情数据模型
│ │ ├── IndexData.java # 指数数据模型
│ │ └── NewsData.java # 舆情数据模型
│ ├── monitor/ # 实时监控模块
│ │ ├── DataBroadcaster.java # WebSocket服务器
│ │ └── PriceSnapshot.java # 价格快照
│ ├── repository/ # 仓储层
│ │ ├── MarketDataRepository.java
│ │ ├── IndexDataRepository.java
│ │ └── NewsDataRepository.java
│ ├── strategy/ # 策略层
│ │ ├── CrawlStrategy.java # 爬虫策略接口
│ │ ├── CrawlStrategyFactory.java
│ │ ├── JinTouCrawlStrategy.java
│ │ ├── EastMoneyCrawlStrategy.java
│ │ └── TongHuaShunCrawlStrategy.java
│ ├── util/ # 工具类
│ │ ├── exporter/ # 数据导出器
│ │ │ ├── DataExporter.java
│ │ │ ├── CsvExporter.java
│ │ │ ├── JsonExporter.java
│ │ │ └── DataExporterFactory.java
│ │ ├── ExcelExporter.java # Excel导出
│ │ ├── PdfReportGenerator.java # PDF报告生成器(中文支持)
│ │ ├── ChartGenerator.java # 图表生成器
│ │ ├── DateTypeHandler.java # MyBatis日期类型处理器
│ │ └── MyBatisUtil.java # MyBatis工具类
│ └── visualization/ # 可视化模块
├── src/main/resources/
│ ├── application.properties # 配置文件
│ ├── logback.xml # 日志配置
│ ├── mybatis-config.xml # MyBatis配置(含驼峰映射)
│ ├── schema.sql # 数据库初始化脚本
│ ├── mapper/ # MyBatis XML配置
│ │ ├── MarketDataMapper.xml
│ │ ├── IndexDataMapper.xml
│ │ └── NewsDataMapper.xml
│ └── webapp/ # Web资源
│ ├── echarts.min.js # ECharts库(本地化,支持离线)
│ └── monitor.html # 监控大屏页面(ECharts可视化)
└── pom.xml # Maven配置
```
## 核心特性说明
### MyBatis字段映射
系统已配置`mapUnderscoreToCamelCase=true`,自动将数据库下划线命名字段映射到Java驼峰命名:
- `index_name``indexName`
- `index_value``indexValue`
- `change_rate``changeRate`
- `stock_name``stockName`
- `stock_price``stockPrice`
- `turnover_rate``turnoverRate`
### 自定义日期类型处理器
`DateTypeHandler`支持:
- Unix时间戳(毫秒,13位)
- Unix时间戳(秒,10位)
- 日期字符串格式
- MySQL TIMESTAMP类型
### PDF报告中文支持
- 使用黑体(simhei.ttf)字体
- 支持中文完整显示
- 包含8页完整报告内容
### 实时监控WebSocket
- 端口:8080
- 推送频率:每2秒更新
- 数据格式:JSON
- 监控页面自动连接
### 完整离线支持
系统支持**完整离线运行**,所有核心功能均不依赖网络:
- ✅ 数据爬取(使用模拟数据生成)
- ✅ 数据存储(SQLite本地数据库)
- ✅ 数据导出(Excel/CSV/JSON本地文件)
- ✅ 图表生成(JFreeChart本地生成)
- ✅ PDF报告(本地字体和PDFBox生成)
- ✅ 实时监控页面(ECharts库本地化)
## 扩展说明
### 新增爬虫站点
1. 实现 `CrawlStrategy` 接口
2. 在 `CrawlStrategyFactory` 中添加分支
3. 无需修改原有代码
### 新增导出格式
1. 实现 `DataExporter` 接口
2. 在 `DataExporterFactory` 中注册
3. 自动集成到导出命令
### 新增分析维度
1. 在 `ChartGenerator` 中添加新方法
2. 实现图表生成逻辑
## 输出文件说明
| 类型 | 目录 | 说明 |
|------|------|------|
| 图表 | `./output/charts/` | 价格趋势、波动率、相关性、舆情分析图 |
| 数据 | `./output/excel/` | Excel、CSV、JSON格式导出文件 |
| 报告 | `./output/report/` | 中文PDF分析报告 |
| 数据库 | `./data/` | SQLite数据库文件 |
| 日志 | `./logs/` | 系统运行日志 |
## 注意事项
1. 请确保遵守目标网站的 robots.txt 协议
2. 合理设置请求间隔,避免触发反爬机制
3. 首次运行会自动创建SQLite数据库文件
4. PDF报告需要Windows系统黑体字体支持
5. 建议定期清理输出目录中的历史文件
6. 系统支持完整离线运行,所有核心功能均可在断网环境下正常使用
## License
MIT License

300
project/class_diagram.puml

@ -0,0 +1,300 @@
@startuml大宗商品爬虫系统类图
title 大宗商品数据爬虫与可视化分析系统 - 类图
skinparam backgroundColor #FEFEFE
skinparam classAttributeIconSize 0
' ========== 命令模式 ==========
package "command <<命令模式>>" #LightBlue {
interface Command {
+ execute()
+ getName() : String
+ getDescription() : String
+ isUndoable() : boolean
+ undo()
}
class CommandInvoker {
- commandMap : Map~String, Command~
- commandHistory : Deque~Command~
+ registerCommand(key : String, command : Command)
+ executeCommand(key : String)
+ undo()
}
class CrawlCommand {
- controller : CrawlerController
- site : String
- pageCount : int
+ execute()
+ getName() : String
+ getDescription() : String
}
class ExportDataCommand {
- controller : CrawlerController
- format : String
+ execute()
+ getName() : String
+ getDescription() : String
}
class GenerateChartCommand {
- controller : CrawlerController
+ execute()
+ getName() : String
+ getDescription() : String
}
class GenerateReportCommand {
- controller : CrawlerController
+ execute()
+ getName() : String
+ getDescription() : String
}
class MonitorCommand {
- broadcaster : DataBroadcaster
+ execute()
+ getName() : String
+ getDescription() : String
}
class ViewDataCommand {
- indexRepo : IndexDataRepository
- marketRepo : MarketDataRepository
+ execute()
+ getName() : String
+ getDescription() : String
}
class ExitCommand {
+ execute()
+ getName() : String
+ getDescription() : String
}
Command <|.. CrawlCommand
Command <|.. ExportDataCommand
Command <|.. GenerateChartCommand
Command <|.. GenerateReportCommand
Command <|.. MonitorCommand
Command <|.. ViewDataCommand
Command <|.. ExitCommand
CommandInvoker o-- Command : commands
}
' ========== 策略模式 ==========
package "strategy <<策略模式>>" #LightYellow {
interface CrawlStrategy {
+ crawlData(pageCount : int) : List~?~
+ saveData(dataList : List~?~) : int
+ getSiteName() : String
}
class CrawlStrategyFactory {
+ createStrategy(siteCode : String) : CrawlStrategy
}
class JinTouCrawlStrategy {
- repository : MarketDataRepository
+ crawlData(pageCount : int) : List~?~
+ saveData(dataList : List~?~) : int
+ getSiteName() : String
}
class EastMoneyCrawlStrategy {
- repository : IndexDataRepository
+ crawlData(pageCount : int) : List~?~
+ saveData(dataList : List~?~) : int
+ getSiteName() : String
}
class TongHuaShunCrawlStrategy {
- repository : NewsDataRepository
+ crawlData(pageCount : int) : List~?~
+ saveData(dataList : List~?~) : int
+ getSiteName() : String
}
CrawlStrategy <|.. JinTouCrawlStrategy
CrawlStrategy <|.. EastMoneyCrawlStrategy
CrawlStrategy <|.. TongHuaShunCrawlStrategy
CrawlStrategyFactory ..> CrawlStrategy : creates
}
' ========== 核心控制器 ==========
package "controller <<控制层>>" #LightGreen {
class CrawlerController {
+ crawl(siteCode : String, pageCount : int) : int
+ crawlAll(pageCount : int) : int
}
CrawlerController --> CrawlStrategyFactory : uses
}
' ========== 交互入口 ==========
package "cli <<表示层>>" #LightPink {
class InteractiveCLI {
- invoker : CommandInvoker
+ runInteractiveMode()
+ runCommandMode(args : String[])
+ main(args : String[])
}
InteractiveCLI --> CommandInvoker : uses
CommandInvoker --> CrawlerController : executes
}
' ========== 仓储层 ==========
package "repository <<仓储层>>" #LightGray {
class MarketDataRepository {
- sqlSessionFactory : SqlSessionFactory
+ save(data : MarketData) : int
+ batchSave(dataList : List~MarketData~) : int
+ findAll() : List~MarketData~
+ findByVariety(variety : String) : List~MarketData~
+ count() : int
}
class IndexDataRepository {
- sqlSessionFactory : SqlSessionFactory
+ save(data : IndexData) : int
+ batchSave(dataList : List~IndexData~) : int
+ findAll() : List~IndexData~
+ findByIndexName(indexName : String) : List~IndexData~
+ count() : int
}
class NewsDataRepository {
- sqlSessionFactory : SqlSessionFactory
+ save(data : NewsData) : int
+ batchSave(dataList : List~NewsData~) : int
+ findAll() : List~NewsData~
+ findByCommodity(commodity : String) : List~NewsData~
+ count() : int
}
}
' ========== 数据模型 ==========
package "model <<模型层>>" #White {
class MarketData {
- id : Long
- variety : String
- tradeDate : Date
- openPrice : BigDecimal
- closePrice : BigDecimal
- highPrice : BigDecimal
- lowPrice : BigDecimal
- volume : BigDecimal
- changeRate : BigDecimal
- createTime : Date
- source : String
}
class IndexData {
- id : Long
- indexName : String
- date : Date
- indexValue : BigDecimal
- changeRate : BigDecimal
- stockName : String
- stockPrice : BigDecimal
- turnoverRate : BigDecimal
- createTime : Date
- source : String
}
class NewsData {
- id : Long
- title : String
- content : String
- publishTime : Date
- relatedCommodity : String
- sentiment : String
- createTime : Date
- source : String
}
class PriceSnapshot {
- commodityName : String
- currentPrice : double
- changePercent : double
- timestamp : long
}
}
' ========== 工具类 ==========
package "util <<工具类>>" #Lavender {
class ChartGenerator {
+ generatePriceTrendChart(dataList : List~IndexData~) : BufferedImage
+ generateVolatilityChart(dataList : List~IndexData~) : BufferedImage
+ generateCorrelationChart(dataList : List~IndexData~) : BufferedImage
+ generateSentimentChart(dataList : List~NewsData~) : BufferedImage
}
class PdfReportGenerator {
- chineseFont : PDType0Font
+ generateReport(dataList : List~IndexData~, chartImages : Map~String, BufferedImage~, outputPath : String) : String
}
class ExcelExporter {
+ export(data : List~MarketData~, outputPath : String)
+ getFormat() : String
+ getFileExtension() : String
}
interface DataExporter {
+ export(data : List~MarketData~, outputPath : String)
+ getFormat() : String
+ getFileExtension() : String
}
class DataExporterFactory {
+ createExporter(format : String) : DataExporter
}
DataExporter <|.. ExcelExporter
DataExporter <|.. CsvExporter
DataExporter <|.. JsonExporter
}
package "monitor <<监控层>>" #LightCoral {
class DataBroadcaster {
- serverSocket : ServerSocket
- connections : Map~WebSocket, Player~
- scheduler : ScheduledExecutorService
+ start(port : int)
+ stop()
+ broadcast(message : String)
}
}
' ========== 异常 ==========
package "exception <<异常>>" #MistyRose {
class BaseCrawlException {
- errorCode : String
- errorMessage : String
- cause : Throwable
}
class NetworkException {
}
class ParseException {
}
class DbException {
}
class ParamException {
}
BaseCrawlException <|-- NetworkException
BaseCrawlException <|-- ParseException
BaseCrawlException <|-- DbException
BaseCrawlException <|-- ParamException
}
@enduml

344
project/class_diagram_ascii.txt

@ -0,0 +1,344 @@
┌─────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ 大宗商品数据爬虫与可视化分析系统 - UML类图 │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────┘
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
一、命令模式 (Command Pattern)
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
┌──────────────────────┐
│ <<interface>> │
│ Command │
├──────────────────────┤
│ + execute() │
│ + getName() │
│ + getDescription() │
│ + isUndoable() │
│ + undo() │
└──────────┬───────────┘
┌──────────────┬───────────────┼───────────────┬──────────────┬───────────────┬──────────────┐
│ │ │ │ │ │ │
▼ ▼ ▼ ▼ ▼ ▼ ▼
┌──────────────────┐ ┌────────────┐ ┌───────────┐ ┌───────────────┐ ┌────────────┐ ┌───────────┐ ┌───────────┐
│ CrawlCommand │ │ExportData │ │Generate │ │ GenerateReport│ │ Monitor │ │ ViewData │ │ExitCommand│
│ │ │ Command │ │ChartCommand│ │ Command │ │ Command │ │ Command │ │ │
├──────────────────┤ ├───────────┤ ├───────────┤ ├───────────────┤ ├────────────┤ ├───────────┤ ├───────────┤
│-controller │ │-format │ │ │ │ │ │-broadcaster│ │-indexRepo │ │ │
│-site │ │-controller│ │ │ │ │ │ │ │-marketRepo│ │ │
│-pageCount │ │ │ │ │ │ │ │ │ │ │ │ │
│-savedCount │ │ │ │ │ │ │ │ │ │ │ │ │
├──────────────────┤ ├───────────┤ ├───────────┤ ├───────────────┤ ├────────────┤ ├───────────┤ ├───────────┤
│+execute() │ │+execute() │ │+execute() │ │+execute() │ │+execute() │ │+execute() │ │+execute() │
│+getName() │ │+getName() │ │+getName() │ │+getName() │ │+getName() │ │+getName() │ │+getName() │
│+getDescription() │ │+getDesc() │ │+getDesc() │ │+getDescription │ │+getDesc() │ │+getDesc() │ │+getDesc() │
└──────────────────┘ └───────────┘ └───────────┘ └───────────────┘ └────────────┘ └───────────┘ └───────────┘
│ │ │ │ │ │ │
└──────────────────┴────────────┴──────────────┴────────────────┴──────────────┴──────────────┘
┌────────────────────────────┐
│ CommandInvoker │
├────────────────────────────┤
│ - commandMap │
│ - commandHistory │
├────────────────────────────┤
│ + registerCommand(key,cmd) │
│ + executeCommand(key) │
│ + undo() │
└────────────────────────────┘
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
二、策略模式 (Strategy Pattern)
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
┌──────────────────────────────┐
│ <<interface>> │
│ CrawlStrategy │
├──────────────────────────────┤
│ + crawlData(pageCount) │
│ + saveData(dataList) │
│ + getSiteName() │
└──────────────┬───────────────┘
┌────────────────────────────┬┴─────────────────────────────┐
│ │ │
▼ ▼ ▼
┌────────────────────────────────┐ ┌────────────────────────────────┐ ┌────────────────────────────────┐
│ JinTouCrawlStrategy │ │ EastMoneyCrawlStrategy │ │ TongHuaShunCrawlStrategy │
│ (金投网) │ │ (东方财富网) │ │ (同花顺) │
├────────────────────────────────┤ ├────────────────────────────────┤ ├────────────────────────────────┤
│ - repository : MarketDataRepo │ │ - repository : IndexDataRepo │ │ - repository : NewsDataRepo │
├────────────────────────────────┤ ├────────────────────────────────┤ ├────────────────────────────────┤
│ + crawlData(pageCount) │ │ + crawlData(pageCount) │ │ + crawlData(pageCount) │
│ + saveData(dataList) │ │ + saveData(dataList) │ │ + saveData(dataList) │
│ + getSiteName() : "金投网" │ │ + getSiteName() : "东方财富网" │ │ + getSiteName() : "同花顺财经" │
└────────────────────────────────┘ └────────────────────────────────┘ └────────────────────────────────┘
│ │ │
└────────────────────────────┴───────────────────────────────┘
┌─────────────────────────────────┐
│ CrawlStrategyFactory │
├─────────────────────────────────┤
│ + createStrategy(siteCode) │
│ : CrawlStrategy │
└─────────────────────────────────┘
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
三、模型类 (Model Classes)
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
┌─────────────────────────────────┐ ┌─────────────────────────────────┐ ┌─────────────────────────────────┐
│ MarketData │ │ IndexData │ │ NewsData │
│ 行情数据 │ │ 指数数据 │ │ 舆情数据 │
├─────────────────────────────────┤ ├─────────────────────────────────┤ ├─────────────────────────────────┤
│ - id : Long │ │ - id : Long │ │ - id : Long │
│ - variety : String │ │ - indexName : String │ │ - title : String │
│ - tradeDate : Date │ │ - date : Date │ │ - content : String │
│ - openPrice : BigDecimal │ │ - indexValue : BigDecimal │ │ - publishTime : Date │
│ - closePrice : BigDecimal │ │ - changeRate : BigDecimal │ │ - relatedCommodity : String │
│ - highPrice : BigDecimal │ │ - stockName : String │ │ - sentiment : String │
│ - lowPrice : BigDecimal │ │ - stockPrice : BigDecimal │ │ - createTime : Date │
│ - volume : BigDecimal │ │ - turnoverRate : BigDecimal │ │ - source : String │
│ - changeRate : BigDecimal │ │ - createTime : Date │ └─────────────────────────────────┘
│ - createTime : Date │ │ - source : String │
│ - source : String │ └─────────────────────────────────┘
└─────────────────────────────────┘
┌─────────────────────────────────┐
│ PriceSnapshot │
│ 价格快照 │
├─────────────────────────────────┤
│ - commodityName : String │
│ - currentPrice : double │
│ - changePercent : double │
│ - timestamp : long │
└─────────────────────────────────┘
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
四、仓储层 (Repository Layer)
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
┌──────────────────────────────────────┐ ┌──────────────────────────────────────┐ ┌──────────────────────────────────────┐
│ MarketDataRepository │ │ IndexDataRepository │ │ NewsDataRepository │
│ 市场数据仓储 │ │ 指数数据仓储 │ │ 舆情数据仓储 │
├──────────────────────────────────────┤ ├──────────────────────────────────────┤ ├──────────────────────────────────────┤
│ - sqlSessionFactory │ │ - sqlSessionFactory │ │ - sqlSessionFactory │
├──────────────────────────────────────┤ ├──────────────────────────────────────┤ ├──────────────────────────────────────┤
│ + save(data) : int │ │ + save(data) : int │ │ + save(data) : int │
│ + batchSave(dataList) : int │ │ + batchSave(dataList) : int │ │ + batchSave(dataList) : int │
│ + findAll() : List~MarketData~ │ │ + findAll() : List~IndexData~ │ │ + findAll() : List~NewsData~ │
│ + findByVariety(variety) │ │ + findByIndexName(indexName) │ │ + findByCommodity(commodity) │
│ + count() : int │ │ + count() : int │ │ + count() : int │
└──────────────────────────────────────┘ └──────────────────────────────────────┘ └──────────────────────────────────────┘
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
五、工具类 (Utility Classes)
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
┌──────────────────────────────────────┐ ┌──────────────────────────────────────┐
│ ChartGenerator │ │ PdfReportGenerator │
│ 图表生成器 │ │ PDF报告生成器 │
├──────────────────────────────────────┤ ├──────────────────────────────────────┤
│ + generatePriceTrendChart() │ │ - chineseFont : PDType0Font │
│ + generateVolatilityChart() │ ├──────────────────────────────────────┤
│ + generateCorrelationChart() │ │ + generateReport() │
│ + generateSentimentChart() │ │ + generateCoverPage() │
└──────────────────────────────────────┘ │ + generateDataTable() │
└──────────────────────────────────────┘
┌──────────────────────────────────────┐
│ <<interface>> │
│ DataExporter │
├──────────────────────────────────────┤
│ + export(data, outputPath) │
│ + getFormat() : String │
│ + getFileExtension() : String │
└──────────────┬───────────────────────┘
┌────────────────────────────┴────────────────────────────┐
│ │ │
▼ ▼ ▼
┌───────────────┐ ┌───────────────┐ ┌───────────────┐
│ ExcelExporter │ │ CsvExporter │ │ JsonExporter │
├───────────────┤ ├───────────────┤ ├───────────────┤
│ + export() │ │ + export() │ │ + export() │
│ + getFormat() │ │ + getFormat() │ │ + getFormat() │
└───────────────┘ └───────────────┘ └───────────────┘
┌───────────────────────────┐
│ DataExporterFactory │
├───────────────────────────┤
│ + createExporter(format) │
└───────────────────────────┘
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
六、监控模块 (Monitor Module)
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
┌──────────────────────────────────────┐
│ DataBroadcaster │
│ 数据广播器 │
├──────────────────────────────────────┤
│ - serverSocket : ServerSocket │
│ - connections : Map~WebSocket,~ │
│ - scheduler : ScheduledExecutorService│
├──────────────────────────────────────┤
│ + start(port) │
│ + stop() │
│ + broadcast(message) │
│ + onOpen(ws, handshake) │
│ + onClose(ws, code, reason) │
│ + onMessage(ws, message) │
└──────────────────────────────────────┘
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
七、异常类层次 (Exception Hierarchy)
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
┌─────────────────────────────────────────────┐
│ BaseCrawlException │
│ (爬虫异常基类) │
├─────────────────────────────────────────────┤
│ - errorCode : String │
│ - errorMessage : String │
│ - cause : Throwable │
└──────────────────────┬──────────────────────┘
┌──────────────────────────────┬┴───────────────────────────────┐
│ │ │
▼ ▼ ▼
┌─────────────────────────┐ ┌─────────────────────────┐ ┌─────────────────────────┐
│ NetworkException │ │ ParseException │ │ DbException │
│ (网络异常) │ │ (解析异常) │ │ (数据库异常) │
├─────────────────────────┤ ├─────────────────────────┤ ├─────────────────────────┤
│ 支持重试机制 │ │ 网页解析失败 │ │ SQL执行失败 │
└─────────────────────────┘ └─────────────────────────┘ └─────────────────────────┘
┌─────────────────────────┐
│ ParamException │
│ (参数异常) │
├─────────────────────────┤
│ 参数校验失败 │
└─────────────────────────┘
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
八、核心调用关系
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
┌─────────────────┐
│ InteractiveCLI │ ◄────── 程序入口
└────────┬────────┘
│ uses
┌─────────────────┐
│ CommandInvoker │ ◄────── 命令调用者
└────────┬────────┘
│ executes
┌─────────────────┐ uses ┌─────────────────────────┐
│ CrawlCommand │──────────────────►│ CrawlerController │
└─────────────────┘ └───────────┬─────────────┘
│ uses
┌───────────────────────────┐
│ CrawlStrategyFactory │
└─────────────┬─────────────┘
│ creates
┌───────────────────────────┐
│ CrawlStrategy │
│ (接口) │
└─────────────┬─────────────┘
┌────────────────────────┬┴────────────────────────┐
│ │ │
▼ ▼ ▼
┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐
│ JinTouCrawl │ │ EastMoneyCrawl │ │TongHuaShunCrawl │
│ Strategy │ │ Strategy │ │ Strategy │
└────────┬─────────┘ └────────┬─────────┘ └────────┬─────────┘
│ │ │
▼ ▼ ▼
┌──────────────────┐ ┌──────────────────┐ ┌──────────────────┐
│MarketDataRepository│ │IndexDataRepository│ │NewsDataRepository│
└────────┬─────────┘ └────────┬─────────┘ └────────┬─────────┘
│ │ │
└──────────────────────┴──────────────────────┘
┌─────────────────────────┐
│ SQLite Database │
│ (MyBatis) │
└─────────────────────────┘
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
图例
═══════════════════════════════════════════════════════════════════════════════════════════════════════════
┌─────────────┐
│ Class │ 类
├─────────────┤
│ - field │ -: private
│ + method() │ +: public
└─────────────┘
┌─────────────────────┐
│ <<interface>> │ 接口
│ Interface │
└─────────────────────┘
│ │
│ implements │ 实现关系
▼ │
┌─────────┐ │
│ Concrete│ │
└─────────┘ │
│ │
◄─────────────── │ 依赖关系
┌───┐ ┌───┐
│ A │────►│ B │ 关联关系
└───┘ └───┘
┌───┐ ┌───┐
│ A │◄───►│ B │ 双向关联
└───┘ └───┘
┌───┐
│ A │─────│ 聚合关系 (空心菱形)
└───┘ │
┌─────┐
│ B │
└─────┘
┌───┐
│ A │─────◆ 组合关系 (实心菱形)
└───┘
┌─────┐
│ B │
└─────┘
──────────────► 依赖/使用关系
──────────────▶ 指向
@enduml

BIN
project/data/commodity.db

Binary file not shown.

93
project/dependency-reduced-pom.xml

@ -0,0 +1,93 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>commodity-crawler</artifactId>
<name>Commodity Crawler System</name>
<version>1.0.0</version>
<description>大宗商品数据爬虫与可视化分析系统</description>
<build>
<plugins>
<plugin>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.11.0</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<artifactId>maven-jar-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.example.crawler.CrawlMain</mainClass>
</manifest>
</archive>
</configuration>
</plugin>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>3.5.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<transformers>
<transformer>
<mainClass>com.example.crawler.CrawlMain</mainClass>
</transformer>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>mysql</groupId>
<artifactId>mysql-connector-java</artifactId>
<version>8.0.33</version>
<scope>compile</scope>
</dependency>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.30</version>
<scope>provided</scope>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.13.2</version>
<scope>test</scope>
<exclusions>
<exclusion>
<artifactId>hamcrest-core</artifactId>
<groupId>org.hamcrest</groupId>
</exclusion>
</exclusions>
</dependency>
</dependencies>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<maven.compiler.target>1.8</maven.compiler.target>
<java.version>1.8</java.version>
<maven.compiler.source>1.8</maven.compiler.source>
</properties>
</project>

BIN
project/logs/crawler.2026-05-17.log (Stored with Git LFS)

Binary file not shown.

BIN
project/logs/crawler.2026-05-21.log (Stored with Git LFS)

Binary file not shown.

BIN
project/logs/crawler.2026-05-23.log (Stored with Git LFS)

Binary file not shown.

BIN
project/logs/crawler.log (Stored with Git LFS)

Binary file not shown.

BIN
project/output/charts/correlation.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

BIN
project/output/charts/cycle.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

BIN
project/output/charts/price_trend.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

154
project/output/charts/report.html

@ -0,0 +1,154 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>大宗商品分析报告</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: 'Microsoft YaHei', 'SimHei', Arial, sans-serif;
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
min-height: 100vh;
padding: 20px;
color: #fff;
}
.container { max-width: 1400px; margin: 0 auto; }
h1 {
text-align: center;
font-size: 2.5em;
margin-bottom: 10px;
background: linear-gradient(90deg, #f39c12, #e74c3c, #9b59b6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
text-shadow: 0 0 30px rgba(243, 156, 18, 0.3);
}
.subtitle {
text-align: center;
color: #888;
margin-bottom: 40px;
}
.charts-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(600px, 1fr));
gap: 30px;
}
.chart-card {
background: rgba(255, 255, 255, 0.95);
border-radius: 20px;
padding: 25px;
box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
transition: transform 0.3s ease, box-shadow 0.3s ease;
}
.chart-card:hover {
transform: translateY(-10px);
box-shadow: 0 30px 80px rgba(0, 0, 0, 0.4);
}
.chart-card h2 {
color: #333;
font-size: 1.4em;
margin-bottom: 20px;
padding-bottom: 10px;
border-bottom: 3px solid;
border-image: linear-gradient(90deg, #f39c12, #e74c3c) 1;
}
.chart-card img {
width: 100%;
height: auto;
border-radius: 10px;
}
.chart-card.full-width {
grid-column: 1 / -1;
}
.legend {
display: flex;
justify-content: center;
gap: 30px;
margin-top: 15px;
flex-wrap: wrap;
}
.legend-item {
display: flex;
align-items: center;
gap: 8px;
font-size: 0.9em;
color: #555;
}
.legend-color {
width: 20px;
height: 4px;
border-radius: 2px;
}
.gold { background: #ff8c00; }
.silver { background: #c0c0c0; }
.oil { background: #006400; }
.up { background: #006400; }
.down { background: #ff0000; }
footer {
text-align: center;
margin-top: 50px;
padding: 20px;
color: #666;
}
@media (max-width: 768px) {
.charts-grid { grid-template-columns: 1fr; }
h1 { font-size: 1.8em; }
}
</style>
</head>
<body>
<div class="container">
<h1>📊 大宗商品分析报告</h1>
<p class="subtitle"> Commodity Market Analysis Report</p>
<div class="charts-grid">
<div class="chart-card">
<h2>📈 价格趋势对比</h2>
<img src="price_trend.png" alt="价格趋势对比">
<div class="legend">
<div class="legend-item"><span class="legend-color gold"></span>黄金</div>
<div class="legend-item"><span class="legend-color silver"></span>白银</div>
<div class="legend-item"><span class="legend-color oil"></span>原油</div>
</div>
</div>
<div class="chart-card">
<h2>📊 波动特征分析</h2>
<img src="volatility.png" alt="波动特征分析">
<div class="legend">
<div class="legend-item"><span class="legend-color gold"></span>黄金</div>
<div class="legend-item"><span class="legend-color silver"></span>白银</div>
<div class="legend-item"><span class="legend-color oil"></span>原油</div>
</div>
</div>
<div class="chart-card">
<h2>🔗 相关性分析</h2>
<img src="correlation.png" alt="相关性分析">
<div class="legend">
<div class="legend-item"><span class="legend-color gold"></span>黄金</div>
<div class="legend-item"><span class="legend-color oil"></span>原油</div>
</div>
</div>
<div class="chart-card">
<h2>🗓️ 季节性周期分析</h2>
<img src="cycle.png" alt="季节性周期分析">
<div class="legend">
<div class="legend-item"><span class="legend-color gold"></span>黄金</div>
<div class="legend-item"><span class="legend-color oil"></span>原油</div>
</div>
</div>
<div class="chart-card full-width">
<h2>💬 舆情联动分析</h2>
<img src="sentiment.png" alt="舆情联动分析">
<div class="legend">
<div class="legend-item"><span class="legend-color oil"></span>涨跌幅</div>
<div class="legend-item"><span class="legend-color gold"></span>利好新闻数</div>
<div class="legend-item"><span class="legend-color down"></span>利空新闻数</div>
</div>
</div>
</div>
<footer>
<p>报告生成时间: 2026-05-24T01:36:20.015565800</p>
<p>大宗商品爬虫系统 © 2026</p>
</footer>
</div>
</body>
</html>

BIN
project/output/charts/sentiment.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

BIN
project/output/charts/volatility.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

BIN
project/output/excel/charts/correlation.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

BIN
project/output/excel/charts/cycle.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 46 KiB

BIN
project/output/excel/charts/price_trend.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 61 KiB

154
project/output/excel/charts/report.html

@ -0,0 +1,154 @@
<!DOCTYPE html>
<html lang="zh-CN">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>大宗商品分析报告</title>
<style>
* { margin: 0; padding: 0; box-sizing: border-box; }
body {
font-family: 'Microsoft YaHei', 'SimHei', Arial, sans-serif;
background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);
min-height: 100vh;
padding: 20px;
color: #fff;
}
.container { max-width: 1400px; margin: 0 auto; }
h1 {
text-align: center;
font-size: 2.5em;
margin-bottom: 10px;
background: linear-gradient(90deg, #f39c12, #e74c3c, #9b59b6);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
text-shadow: 0 0 30px rgba(243, 156, 18, 0.3);
}
.subtitle {
text-align: center;
color: #888;
margin-bottom: 40px;
}
.charts-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(600px, 1fr));
gap: 30px;
}
.chart-card {
background: rgba(255, 255, 255, 0.95);
border-radius: 20px;
padding: 25px;
box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);
transition: transform 0.3s ease, box-shadow 0.3s ease;
}
.chart-card:hover {
transform: translateY(-10px);
box-shadow: 0 30px 80px rgba(0, 0, 0, 0.4);
}
.chart-card h2 {
color: #333;
font-size: 1.4em;
margin-bottom: 20px;
padding-bottom: 10px;
border-bottom: 3px solid;
border-image: linear-gradient(90deg, #f39c12, #e74c3c) 1;
}
.chart-card img {
width: 100%;
height: auto;
border-radius: 10px;
}
.chart-card.full-width {
grid-column: 1 / -1;
}
.legend {
display: flex;
justify-content: center;
gap: 30px;
margin-top: 15px;
flex-wrap: wrap;
}
.legend-item {
display: flex;
align-items: center;
gap: 8px;
font-size: 0.9em;
color: #555;
}
.legend-color {
width: 20px;
height: 4px;
border-radius: 2px;
}
.gold { background: #ff8c00; }
.silver { background: #c0c0c0; }
.oil { background: #006400; }
.up { background: #006400; }
.down { background: #ff0000; }
footer {
text-align: center;
margin-top: 50px;
padding: 20px;
color: #666;
}
@media (max-width: 768px) {
.charts-grid { grid-template-columns: 1fr; }
h1 { font-size: 1.8em; }
}
</style>
</head>
<body>
<div class="container">
<h1>📊 大宗商品分析报告</h1>
<p class="subtitle"> Commodity Market Analysis Report</p>
<div class="charts-grid">
<div class="chart-card">
<h2>📈 价格趋势对比</h2>
<img src="price_trend.png" alt="价格趋势对比">
<div class="legend">
<div class="legend-item"><span class="legend-color gold"></span>黄金</div>
<div class="legend-item"><span class="legend-color silver"></span>白银</div>
<div class="legend-item"><span class="legend-color oil"></span>原油</div>
</div>
</div>
<div class="chart-card">
<h2>📊 波动特征分析</h2>
<img src="volatility.png" alt="波动特征分析">
<div class="legend">
<div class="legend-item"><span class="legend-color gold"></span>黄金</div>
<div class="legend-item"><span class="legend-color silver"></span>白银</div>
<div class="legend-item"><span class="legend-color oil"></span>原油</div>
</div>
</div>
<div class="chart-card">
<h2>🔗 相关性分析</h2>
<img src="correlation.png" alt="相关性分析">
<div class="legend">
<div class="legend-item"><span class="legend-color gold"></span>黄金</div>
<div class="legend-item"><span class="legend-color oil"></span>原油</div>
</div>
</div>
<div class="chart-card">
<h2>🗓️ 季节性周期分析</h2>
<img src="cycle.png" alt="季节性周期分析">
<div class="legend">
<div class="legend-item"><span class="legend-color gold"></span>黄金</div>
<div class="legend-item"><span class="legend-color oil"></span>原油</div>
</div>
</div>
<div class="chart-card full-width">
<h2>💬 舆情联动分析</h2>
<img src="sentiment.png" alt="舆情联动分析">
<div class="legend">
<div class="legend-item"><span class="legend-color oil"></span>涨跌幅</div>
<div class="legend-item"><span class="legend-color gold"></span>利好新闻数</div>
<div class="legend-item"><span class="legend-color down"></span>利空新闻数</div>
</div>
</div>
</div>
<footer>
<p>报告生成时间: 2026-05-23T20:46:20.700038600</p>
<p>大宗商品爬虫系统 © 2026</p>
</footer>
</div>
</body>
</html>

BIN
project/output/excel/charts/sentiment.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 100 KiB

BIN
project/output/excel/charts/volatility.png

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

BIN
project/output/excel/commodity_data_20260521_111758.xlsx

Binary file not shown.

BIN
project/output/excel/commodity_data_20260521_112305.xlsx

Binary file not shown.

BIN
project/output/excel/commodity_data_20260521_113352.xlsx

Binary file not shown.

BIN
project/output/excel/commodity_data_20260521_141200.xlsx

Binary file not shown.

BIN
project/output/excel/commodity_data_20260521_152221.xlsx

Binary file not shown.

BIN
project/output/excel/commodity_data_20260523_200312.xlsx

Binary file not shown.

3226
project/output/excel/commodity_data_20260523_201023.csv

File diff suppressed because it is too large

23222
project/output/excel/commodity_data_20260523_201858.json

File diff suppressed because it is too large

30962
project/output/excel/commodity_data_20260523_204554.json

File diff suppressed because it is too large

BIN
project/output/excel/commodity_data_20260524_003053.xlsx

Binary file not shown.

28300
project/output/excel/commodity_data_20260524_003725.csv

File diff suppressed because it is too large

28300
project/output/excel/commodity_data_20260524_005518.csv

File diff suppressed because it is too large

367889
project/output/excel/commodity_data_20260524_005753.json

File diff suppressed because it is too large

BIN
project/output/report/chinese_report_1779547683095.pdf

Binary file not shown.

BIN
project/output/report/commodity_report_20260524_010334.pdf

Binary file not shown.

152
project/pom.xml

@ -0,0 +1,152 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>commodity-crawler</artifactId>
<version>1.0.0</version>
<packaging>jar</packaging>
<name>Commodity Crawler System</name>
<description>大宗商品爬虫系统 - 支持多网站数据爬取与可视化分析</description>
<properties>
<maven.compiler.source>1.8</maven.compiler.source>
<maven.compiler.target>1.8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
</properties>
<dependencies>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>2.0.9</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.4.14</version>
</dependency>
<dependency>
<groupId>com.squareup.okhttp3</groupId>
<artifactId>okhttp</artifactId>
<version>4.12.0</version>
</dependency>
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis</artifactId>
<version>3.5.15</version>
</dependency>
<dependency>
<groupId>org.mybatis</groupId>
<artifactId>mybatis-spring</artifactId>
<version>3.0.3</version>
</dependency>
<dependency>
<groupId>com.h2database</groupId>
<artifactId>h2</artifactId>
<version>2.2.224</version>
</dependency>
<dependency>
<groupId>org.jfree</groupId>
<artifactId>jfreechart</artifactId>
<version>1.5.4</version>
</dependency>
<dependency>
<groupId>commons-cli</groupId>
<artifactId>commons-cli</artifactId>
<version>1.6.0</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>5.2.5</version>
</dependency>
<dependency>
<groupId>org.xerial</groupId>
<artifactId>sqlite-jdbc</artifactId>
<version>3.45.1.0</version>
</dependency>
<dependency>
<groupId>com.google.code.gson</groupId>
<artifactId>gson</artifactId>
<version>2.10.1</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>3.0.1</version>
</dependency>
<dependency>
<groupId>org.java-websocket</groupId>
<artifactId>Java-WebSocket</artifactId>
<version>1.5.4</version>
</dependency>
</dependencies>
<build>
<finalName>commodity-crawler-${project.version}</finalName>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.12.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
</configuration>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-shade-plugin</artifactId>
<version>3.5.1</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<createDependencyReducedPom>false</createDependencyReducedPom>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>com.example.crawler.InteractiveCLI</mainClass>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheLicenseResourceTransformer"/>
<transformer implementation="org.apache.maven.plugins.shade.resource.ApacheNoticeResourceTransformer"/>
</transformers>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/*.SF</exclude>
<exclude>META-INF/*.DSA</exclude>
<exclude>META-INF/*.RSA</exclude>
<exclude>META-INF/license/**</exclude>
<exclude>META-INF/*.txt</exclude>
<exclude>META-INF/*.json</exclude>
</excludes>
</filter>
</filters>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
<resources>
<resource>
<directory>src/main/resources</directory>
<filtering>true</filtering>
</resource>
</resources>
</build>
</project>

163
project/src/main/java/com/example/crawler/CrawlMain.java

@ -0,0 +1,163 @@
package com.example.crawler;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.exception.ParamException;
import com.example.crawler.visualization.ChartGenerator;
import com.example.crawler.visualization.HtmlReportGenerator;
import com.example.crawler.util.ConfigUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
public class CrawlMain {
private static final Logger logger = LoggerFactory.getLogger(CrawlMain.class);
public static void main(String[] args) {
System.out.println("========================================");
System.out.println(" 大宗商品爬虫系统 v1.0.0");
System.out.println("========================================");
ensureDirectories();
try {
CrawlCommand command = parseCommand(args);
CrawlerController controller = new CrawlerController();
int totalSaved = 0;
if ("all".equalsIgnoreCase(command.getSite())) {
System.out.println("\n[INFO] 开始爬取所有站点数据...");
totalSaved = controller.crawlAll(command.getPageCount());
} else {
System.out.println("\n[INFO] 开始爬取 " + command.getSite() + " 数据...");
totalSaved = controller.crawl(command.getSite(), command.getPageCount());
}
System.out.println("[INFO] 爬取完成,共保存 " + totalSaved + " 条数据");
if (command.isAnalyze()) {
System.out.println("\n[INFO] 开始生成可视化分析图表...");
ChartGenerator chartGenerator = new ChartGenerator();
chartGenerator.generateAllCharts();
HtmlReportGenerator htmlReportGenerator = new HtmlReportGenerator();
htmlReportGenerator.generateHtmlReport();
System.out.println("[INFO] 可视化图表生成完成,输出目录: " +
ConfigUtil.getString("output.chart.dir", "./output/charts/"));
}
System.out.println("\n========================================");
System.out.println(" 任务执行完成");
System.out.println("========================================");
} catch (ParamException e) {
System.err.println("[ERROR] 参数错误: " + e.getMessage());
printUsage();
} catch (Exception e) {
logger.error("系统运行异常", e);
System.err.println("[ERROR] 系统运行异常: " + e.getMessage());
}
}
private static CrawlCommand parseCommand(String[] args) throws ParamException {
CrawlCommand command = new CrawlCommand();
command.setSite("all");
command.setPageCount(ConfigUtil.getInt("crawl.page.count", 10));
command.setAnalyze(false);
for (int i = 0; i < args.length; i++) {
switch (args[i].toLowerCase()) {
case "-s":
case "--site":
if (i + 1 >= args.length) {
throw new ParamException("缺少站点参数值");
}
command.setSite(args[i + 1]);
i++;
break;
case "-p":
case "--pages":
if (i + 1 >= args.length) {
throw new ParamException("缺少页数参数值");
}
try {
command.setPageCount(Integer.parseInt(args[i + 1]));
} catch (NumberFormatException e) {
throw new ParamException("页数必须为数字");
}
i++;
break;
case "-a":
case "--analyze":
command.setAnalyze(true);
break;
case "-h":
case "--help":
printUsage();
System.exit(0);
break;
default:
throw new ParamException("未知参数: " + args[i]);
}
}
if (command.getPageCount() <= 0) {
throw new ParamException("页数必须大于0");
}
return command;
}
private static void printUsage() {
System.out.println("\n用法: java -jar commodity-crawler.jar [选项]");
System.out.println("\n选项:");
System.out.println(" -s, --site <站点> 指定爬取站点 (jintou/eastmoney/tonghuashun/all)");
System.out.println(" 默认值: all");
System.out.println(" -p, --pages <数量> 指定爬取页数");
System.out.println(" 默认值: 10");
System.out.println(" -a, --analyze 执行数据分析并生成可视化图表");
System.out.println(" -h, --help 显示帮助信息");
System.out.println("\n示例:");
System.out.println(" java -jar commodity-crawler.jar -s jintou -p 5");
System.out.println(" java -jar commodity-crawler.jar -s all -p 10 -a");
System.out.println(" java -jar commodity-crawler.jar -a");
}
private static void ensureDirectories() {
String logDir = ConfigUtil.getString("output.log.dir", "./logs/");
String chartDir = ConfigUtil.getString("output.chart.dir", "./output/charts/");
new File(logDir).mkdirs();
new File(chartDir).mkdirs();
}
private static class CrawlCommand {
private String site;
private int pageCount;
private boolean analyze;
public String getSite() {
return site;
}
public void setSite(String site) {
this.site = site;
}
public int getPageCount() {
return pageCount;
}
public void setPageCount(int pageCount) {
this.pageCount = pageCount;
}
public boolean isAnalyze() {
return analyze;
}
public void setAnalyze(boolean analyze) {
this.analyze = analyze;
}
}
}

221
project/src/main/java/com/example/crawler/InteractiveCLI.java

@ -0,0 +1,221 @@
package com.example.crawler;
import com.example.crawler.command.*;
import com.example.crawler.controller.CrawlerController;
import com.example.crawler.monitor.DataBroadcaster;
import com.example.crawler.repository.IndexDataRepository;
import com.example.crawler.repository.MarketDataRepository;
import com.example.crawler.util.ConfigUtil;
import com.example.crawler.util.MyBatisUtil;
import org.apache.ibatis.session.SqlSessionFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.util.Scanner;
public class InteractiveCLI {
private static final Logger logger = LoggerFactory.getLogger(InteractiveCLI.class);
private static final Scanner scanner = new Scanner(System.in);
private final CommandInvoker invoker;
private CrawlerController controller;
private MarketDataRepository marketDataRepository;
private IndexDataRepository indexDataRepository;
private DataBroadcaster broadcaster;
public InteractiveCLI() {
this.invoker = new CommandInvoker();
this.broadcaster = new DataBroadcaster();
try {
SqlSessionFactory sqlSessionFactory = MyBatisUtil.getSqlSessionFactory();
this.controller = new CrawlerController();
this.marketDataRepository = new MarketDataRepository(sqlSessionFactory);
this.indexDataRepository = new IndexDataRepository(sqlSessionFactory);
} catch (Exception e) {
logger.error("初始化失败: {}", e.getMessage(), e);
System.err.println("[ERROR] 系统初始化失败: " + e.getMessage());
System.exit(1);
}
initializeCommands();
}
private void initializeCommands() {
invoker.registerCommand("1", new CrawlCommand(controller, "jintou", getPageCount()));
invoker.registerCommand("2", new CrawlCommand(controller, "eastmoney", getPageCount()));
invoker.registerCommand("3", new CrawlCommand(controller, "tonghuashun", getPageCount()));
invoker.registerCommand("4", new CrawlCommand(controller, "all", getPageCount()));
invoker.registerCommand("5", new ExportDataCommand(marketDataRepository));
invoker.registerCommand("6", new ViewDataCommand(marketDataRepository));
invoker.registerCommand("7", new GenerateChartCommand());
invoker.registerCommand("8", new GenerateReportCommand(indexDataRepository));
invoker.registerCommand("9", new MonitorCommand(broadcaster));
invoker.registerCommand("10", new ExitCommand());
logger.info("命令初始化完成,共注册 {} 个命令", invoker.getCommandCount());
}
private int getPageCount() {
return ConfigUtil.getInt("crawl.page.count", 10);
}
public void start() {
ensureDirectories();
System.out.println("╔════════════════════════════════════════════════════════════╗");
System.out.println("║ 大宗商品爬虫系统 v2.0.0 - 交互式模式 ║");
System.out.println("║ (支持多格式导出/PDF报告/实时监控) ║");
System.out.println("╚════════════════════════════════════════════════════════════╝");
while (true) {
printMainMenu();
System.out.print("请输入您的选择 (1-10): ");
String choice = scanner.nextLine().trim();
if (invoker.isValidKey(choice)) {
if ("10".equals(choice)) {
broadcaster.stop();
invoker.executeCommand(choice);
break;
}
invoker.executeCommand(choice);
} else {
System.out.println("[ERROR] 无效的选择,请输入 1-10 之间的数字");
}
}
}
private void printMainMenu() {
System.out.println("\n────────────────────────────────────────────────────────────");
System.out.println(" 主菜单");
System.out.println("────────────────────────────────────────────────────────────");
System.out.println(" 1. [爬取金投网数据] 爬取金投网的大宗商品数据");
System.out.println(" 2. [爬取东方财富网数据] 爬取东方财富网的大宗商品数据");
System.out.println(" 3. [爬取同花顺财经数据] 爬取同花顺财经的大宗商品数据");
System.out.println(" 4. [爬取所有站点数据] 爬取所有站点的大宗商品数据");
System.out.println(" 5. [数据导出] 导出数据为Excel/CSV/JSON格式");
System.out.println(" 6. [查看数据] 查看数据库中的历史数据统计");
System.out.println(" 7. [生成图表] 生成价格趋势等可视化图表");
System.out.println(" 8. [生成PDF报告] 生成专业的PDF格式分析报告");
System.out.println(" 9. [实时监控大屏] 启动WebSocket实时监控服务");
System.out.println(" 10. [退出系统] 退出大宗商品爬虫系统");
System.out.println("────────────────────────────────────────────────────────────");
}
private void ensureDirectories() {
String[] dirs = {"./output", "./output/charts", "./output/excel", "./output/report", "./data"};
for (String dir : dirs) {
File file = new File(dir);
if (!file.exists()) {
file.mkdirs();
}
}
}
public static void main(String[] args) {
InteractiveCLI cli = new InteractiveCLI();
if (args.length > 0) {
cli.runCommandMode(args);
} else {
cli.start();
}
}
private void runCommandMode(String[] args) {
try {
String site = "all";
int pageCount = getPageCount();
boolean analyze = false;
String exportFormat = null;
boolean generateReport = false;
boolean startMonitor = false;
for (int i = 0; i < args.length; i++) {
switch (args[i].toLowerCase()) {
case "-s":
case "--site":
site = args[++i];
break;
case "-p":
case "--pages":
pageCount = Integer.parseInt(args[++i]);
break;
case "-a":
case "--analyze":
analyze = true;
break;
case "-e":
case "--export":
if (i + 1 < args.length && !args[i + 1].startsWith("-")) {
exportFormat = args[++i];
} else {
exportFormat = "excel";
}
break;
case "-r":
case "--report":
generateReport = true;
break;
case "-m":
case "--monitor":
startMonitor = true;
break;
case "-h":
case "--help":
printUsage();
return;
}
}
if (site != null) {
Command crawlCommand = new CrawlCommand(controller, site, pageCount);
crawlCommand.execute();
}
if (exportFormat != null) {
Command exportCommand = new ExportDataCommand(marketDataRepository);
exportCommand.execute();
}
if (analyze) {
Command chartCommand = new GenerateChartCommand();
chartCommand.execute();
}
if (generateReport) {
Command reportCommand = new GenerateReportCommand(indexDataRepository);
reportCommand.execute();
}
if (startMonitor) {
Command monitorCommand = new MonitorCommand(broadcaster);
monitorCommand.execute();
}
} catch (Exception e) {
logger.error("运行异常", e);
System.err.println("[ERROR] 运行异常: " + e.getMessage());
}
}
private void printUsage() {
System.out.println("用法: java -jar commodity-crawler.jar [选项]");
System.out.println("选项:");
System.out.println(" -s, --site <站点> 指定爬取站点 (jintou/eastmoney/tonghuashun/all)");
System.out.println(" -p, --pages <页数> 指定爬取页数 (默认10)");
System.out.println(" -e, --export [格式] 导出数据 (excel/csv/json,默认excel)");
System.out.println(" -a, --analyze 生成可视化图表");
System.out.println(" -r, --report 生成PDF分析报告");
System.out.println(" -m, --monitor 启动实时监控大屏服务");
System.out.println(" -h, --help 显示帮助信息");
System.out.println("\n示例:");
System.out.println(" java -jar commodity-crawler.jar # 启动交互式菜单");
System.out.println(" java -jar commodity-crawler.jar -s all -p 5 # 爬取所有站点5页");
System.out.println(" java -jar commodity-crawler.jar -s all -e csv # 爬取并导出CSV");
System.out.println(" java -jar commodity-crawler.jar -s all -e json -a # 爬取+JSON导出+图表");
System.out.println(" java -jar commodity-crawler.jar -s all -r -m # 爬取+报告+监控");
}
}

67
project/src/main/java/com/example/crawler/TestPdfGenerator.java

@ -0,0 +1,67 @@
package com.example.crawler;
import com.example.crawler.model.IndexData;
import com.example.crawler.util.PdfReportGenerator;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
public class TestPdfGenerator {
public static void main(String[] args) {
try {
System.out.println("开始测试中文PDF生成...");
List<IndexData> testData = new ArrayList<>();
for (int i = 0; i < 10; i++) {
IndexData data = new IndexData();
data.setIndexName("黄金现货");
data.setIndexValue(new BigDecimal(2300 + i * 10));
data.setChangeRate(new BigDecimal(i * 0.5));
data.setSource("金投网");
data.setDate(new Date());
testData.add(data);
}
for (int i = 0; i < 10; i++) {
IndexData data = new IndexData();
data.setIndexName("白银现货");
data.setIndexValue(new BigDecimal(28 + i * 0.5));
data.setChangeRate(new BigDecimal(-i * 0.3));
data.setSource("东方财富");
data.setDate(new Date());
testData.add(data);
}
for (int i = 0; i < 10; i++) {
IndexData data = new IndexData();
data.setIndexName("原油期货");
data.setIndexValue(new BigDecimal(78 + i * 1));
data.setChangeRate(new BigDecimal(i * 0.2));
data.setSource("同花顺");
data.setDate(new Date());
testData.add(data);
}
System.out.println("创建了 " + testData.size() + " 条测试数据");
String outputDir = "./output/report/";
new java.io.File(outputDir).mkdirs();
String outputPath = outputDir + "chinese_report_" + System.currentTimeMillis() + ".pdf";
System.out.println("正在生成中文PDF: " + outputPath);
PdfReportGenerator pdfGenerator = new PdfReportGenerator();
String result = pdfGenerator.generateReport(testData, new java.util.HashMap<>(), outputPath);
System.out.println("中文PDF生成成功!");
System.out.println("文件位置: " + new java.io.File(result).getAbsolutePath());
} catch (Exception e) {
System.err.println("PDF生成失败: " + e.getMessage());
e.printStackTrace();
}
}
}

36
project/src/main/java/com/example/crawler/command/Command.java

@ -0,0 +1,36 @@
package com.example.crawler.command;
/**
* Command接口 - 命令模式的核心接口
* 定义执行和撤销操作的标准契约
*/
public interface Command {
/**
* 执行命令
*/
void execute();
/**
* 获取命令名称用于菜单显示
*/
String getName();
/**
* 获取命令描述
*/
String getDescription();
/**
* 是否可以撤销
*/
default boolean isUndoable() {
return false;
}
/**
* 撤销命令可选
*/
default void undo() {
throw new UnsupportedOperationException("该命令不支持撤销操作");
}
}

139
project/src/main/java/com/example/crawler/command/CommandInvoker.java

@ -0,0 +1,139 @@
package com.example.crawler.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
/**
* Command调用者 - 命令调用者类
* 负责管理和执行命令支持命令历史记录和撤销功能
*/
public class CommandInvoker {
private static final Logger logger = LoggerFactory.getLogger(CommandInvoker.class);
// 命令映射表:key为菜单选项,value为对应的命令
private final Map<String, Command> commandMap;
// 命令历史记录(用于撤销)
private final Deque<Command> commandHistory;
// 最大历史记录数
private static final int MAX_HISTORY_SIZE = 10;
public CommandInvoker() {
this.commandMap = new LinkedHashMap<>();
this.commandHistory = new LinkedList<>();
}
/**
* 注册命令
* @param key 菜单选项键值
* @param command 命令对象
*/
public void registerCommand(String key, Command command) {
commandMap.put(key, command);
logger.debug("注册命令: key={}, command={}", key, command.getName());
}
/**
* 执行指定键值的命令
* @param key 菜单选项键值
*/
public void executeCommand(String key) {
Command command = commandMap.get(key);
if (command == null) {
System.out.println("[ERROR] 无效的选项: " + key);
logger.warn("尝试执行未注册的命令: key={}", key);
return;
}
try {
command.execute();
// 如果命令支持撤销,加入历史记录
if (command.isUndoable()) {
addToHistory(command);
}
logger.info("命令执行成功: key={}, command={}", key, command.getName());
} catch (Exception e) {
logger.error("命令执行失败: key={}, command={}", key, command.getName(), e);
System.err.println("[ERROR] 命令执行失败: " + e.getMessage());
}
}
/**
* 撤销最后一个命令
*/
public void undoLastCommand() {
if (commandHistory.isEmpty()) {
System.out.println("[INFO] 没有可撤销的命令");
return;
}
Command lastCommand = commandHistory.pop();
try {
lastCommand.undo();
logger.info("命令撤销成功: command={}", lastCommand.getName());
} catch (Exception e) {
logger.error("命令撤销失败", e);
System.err.println("[ERROR] 撤销失败: " + e.getMessage());
}
}
/**
* 显示菜单
*/
public void showMenu() {
System.out.println("\n──────────────────────────────────────────────────────────");
System.out.println(" 主菜单");
System.out.println("──────────────────────────────────────────────────────────");
commandMap.forEach((key, command) -> {
String menuItem = String.format(" %s. [%s] %s",
key, command.getName(), command.getDescription());
System.out.println(menuItem);
});
System.out.println("──────────────────────────────────────────────────────────");
}
/**
* 获取所有可用的命令键值
*/
public Set<String> getAvailableKeys() {
return new HashSet<>(commandMap.keySet());
}
/**
* 检查键值是否有效
*/
public boolean isValidKey(String key) {
return commandMap.containsKey(key);
}
/**
* 获取命令数量
*/
public int getCommandCount() {
return commandMap.size();
}
/**
* 添加到历史记录
*/
private void addToHistory(Command command) {
if (commandHistory.size() >= MAX_HISTORY_SIZE) {
commandHistory.removeLast();
}
commandHistory.push(command);
}
/**
* 获取历史记录数量
*/
public int getHistorySize() {
return commandHistory.size();
}
}

73
project/src/main/java/com/example/crawler/command/CrawlCommand.java

@ -0,0 +1,73 @@
package com.example.crawler.command;
import com.example.crawler.controller.CrawlerController;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 爬取数据命令 - 具体命令类
* 封装爬取特定站点的操作
*/
public class CrawlCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
private final CrawlerController controller;
private final String site;
private final int pageCount;
private int savedCount;
public CrawlCommand(CrawlerController controller, String site, int pageCount) {
this.controller = controller;
this.site = site;
this.pageCount = pageCount;
}
@Override
public void execute() {
System.out.println("\n──────────────────────────────────────────────────────────");
System.out.println(" " + getName());
System.out.println("──────────────────────────────────────────────────────────");
try {
if ("all".equalsIgnoreCase(site)) {
System.out.println("[INFO] 开始爬取所有站点数据...");
savedCount = controller.crawlAll(pageCount);
} else {
System.out.println("[INFO] 开始爬取 " + getSiteDisplayName(site) + " 数据...");
savedCount = controller.crawl(site, pageCount);
}
System.out.println("[INFO] 爬取完成,共保存 " + savedCount + " 条数据");
logger.info("爬取命令执行成功: site={}, pages={}, saved={}", site, pageCount, savedCount);
} catch (Exception e) {
logger.error("爬取命令执行失败", e);
System.err.println("[ERROR] 爬取失败: " + e.getMessage());
}
}
@Override
public String getName() {
return "all".equalsIgnoreCase(site) ? "爬取所有站点数据" : "爬取" + getSiteDisplayName(site) + "数据";
}
@Override
public String getDescription() {
return "爬取" + ("all".equalsIgnoreCase(site) ? "所有站点" : getSiteDisplayName(site)) + "的大宗商品数据";
}
public int getSavedCount() {
return savedCount;
}
private String getSiteDisplayName(String site) {
switch (site.toLowerCase()) {
case "jintou":
return "金投网";
case "eastmoney":
return "东方财富网";
case "tonghuashun":
return "同花顺财经";
default:
return site;
}
}
}

31
project/src/main/java/com/example/crawler/command/ExitCommand.java

@ -0,0 +1,31 @@
package com.example.crawler.command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 退出命令 - 具体命令类
* 封装系统退出操作
*/
public class ExitCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class);
@Override
public void execute() {
System.out.println("\n──────────────────────────────────────────────────────────");
System.out.println(" 感谢使用大宗商品爬虫系统,再见!");
System.out.println("──────────────────────────────────────────────────────────");
logger.info("系统正常退出");
System.exit(0);
}
@Override
public String getName() {
return "退出系统";
}
@Override
public String getDescription() {
return "退出大宗商品爬虫系统";
}
}

101
project/src/main/java/com/example/crawler/command/ExportDataCommand.java

@ -0,0 +1,101 @@
package com.example.crawler.command;
import com.example.crawler.model.MarketData;
import com.example.crawler.repository.MarketDataRepository;
import com.example.crawler.util.exporter.DataExporter;
import com.example.crawler.util.exporter.DataExporterFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Scanner;
public class ExportDataCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ExportDataCommand.class);
private static final Scanner scanner = new Scanner(System.in);
private final MarketDataRepository repository;
private String outputPath;
public ExportDataCommand(MarketDataRepository repository) {
this.repository = repository;
}
@Override
public void execute() {
System.out.println("\n──────────────────────────────────────────────────────────");
System.out.println(" 数据导出");
System.out.println("──────────────────────────────────────────────────────────");
System.out.println("支持的格式: Excel (.xlsx), CSV (.csv), JSON (.json)");
System.out.println("──────────────────────────────────────────────────────────");
System.out.println(" 1. [Excel] 导出为 Excel 格式 (.xlsx)");
System.out.println(" 2. [CSV] 导出为 CSV 格式 (.csv)");
System.out.println(" 3. [JSON] 导出为 JSON 格式 (.json)");
System.out.println(" 4. [返回] 返回主菜单");
System.out.println("──────────────────────────────────────────────────────────");
System.out.print("请输入您的选择 (1-4): ");
String choice = scanner.nextLine().trim();
try {
List<MarketData> allData = repository.findAll();
if (allData.isEmpty()) {
System.out.println("[WARNING] 数据库中没有数据!请先爬取数据。");
return;
}
String format;
switch (choice) {
case "1":
format = "excel";
break;
case "2":
format = "csv";
break;
case "3":
format = "json";
break;
case "4":
return;
default:
System.out.println("[ERROR] 无效的选择,请输入 1-4 之间的数字");
return;
}
DataExporter exporter = DataExporterFactory.getExporter(format);
String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
String extension = exporter.getFileExtension();
outputPath = "./output/excel/commodity_data_" + timestamp + extension;
exporter.export(allData, outputPath);
File file = new File(outputPath);
System.out.println("[SUCCESS] " + format.toUpperCase() + "文件导出成功!");
System.out.println("[INFO] 文件位置: " + file.getAbsolutePath());
System.out.println("[INFO] 共导出 " + allData.size() + " 条数据");
logger.info("数据导出成功: format={}, path={}, count={}", format, outputPath, allData.size());
} catch (Exception e) {
logger.error("数据导出失败", e);
System.err.println("[ERROR] 数据导出失败: " + e.getMessage());
}
}
@Override
public String getName() {
return "数据导出";
}
@Override
public String getDescription() {
return "将数据库中的历史数据导出为Excel/CSV/JSON文件";
}
public String getOutputPath() {
return outputPath;
}
}

70
project/src/main/java/com/example/crawler/command/ExportExcelCommand.java

@ -0,0 +1,70 @@
package com.example.crawler.command;
import com.example.crawler.model.MarketData;
import com.example.crawler.repository.MarketDataRepository;
import com.example.crawler.util.exporter.DataExporter;
import com.example.crawler.util.exporter.DataExporterFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
public class ExportExcelCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ExportExcelCommand.class);
private final MarketDataRepository repository;
private String outputPath;
public ExportExcelCommand(MarketDataRepository repository) {
this.repository = repository;
}
@Override
public void execute() {
System.out.println("\n──────────────────────────────────────────────────────────");
System.out.println(" 导出数据到Excel");
System.out.println("──────────────────────────────────────────────────────────");
try {
List<MarketData> allData = repository.findAll();
if (allData.isEmpty()) {
System.out.println("[WARNING] 数据库中没有数据!请先爬取数据。");
return;
}
DataExporter exporter = DataExporterFactory.getExporter("excel");
String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
outputPath = "./output/excel/commodity_data_" + timestamp + ".xlsx";
exporter.export(allData, outputPath);
File file = new File(outputPath);
System.out.println("[SUCCESS] Excel文件导出成功!");
System.out.println("[INFO] 文件位置: " + file.getAbsolutePath());
System.out.println("[INFO] 共导出 " + allData.size() + " 条数据");
logger.info("Excel导出成功: path={}, count={}", outputPath, allData.size());
} catch (Exception e) {
logger.error("Excel导出失败", e);
System.err.println("[ERROR] Excel导出失败: " + e.getMessage());
}
}
@Override
public String getName() {
return "导出Excel";
}
@Override
public String getDescription() {
return "将数据库中的历史数据导出为Excel文件";
}
public String getOutputPath() {
return outputPath;
}
}

55
project/src/main/java/com/example/crawler/command/GenerateChartCommand.java

@ -0,0 +1,55 @@
package com.example.crawler.command;
import com.example.crawler.visualization.ChartGenerator;
import com.example.crawler.visualization.HtmlReportGenerator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* 生成图表命令 - 具体命令类
* 封装生成可视化图表的操作
*/
public class GenerateChartCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(GenerateChartCommand.class);
private final ChartGenerator chartGenerator;
private final HtmlReportGenerator htmlReportGenerator;
public GenerateChartCommand() {
this.chartGenerator = new ChartGenerator();
this.htmlReportGenerator = new HtmlReportGenerator();
}
@Override
public void execute() {
System.out.println("\n──────────────────────────────────────────────────────────");
System.out.println(" 生成可视化图表");
System.out.println("──────────────────────────────────────────────────────────");
try {
System.out.println("[INFO] 开始生成可视化分析图表...");
chartGenerator.generateAllCharts();
htmlReportGenerator.generateHtmlReport();
System.out.println("[SUCCESS] 可视化图表生成完成!");
System.out.println("[INFO] 图表位置: ./output/charts/");
System.out.println("[INFO] 报告位置: ./output/report.html");
logger.info("图表生成命令执行成功");
} catch (Exception e) {
logger.error("图表生成失败", e);
System.err.println("[ERROR] 图表生成失败: " + e.getMessage());
}
}
@Override
public String getName() {
return "生成图表";
}
@Override
public String getDescription() {
return "生成价格趋势、波动特征、相关性等可视化图表";
}
}

84
project/src/main/java/com/example/crawler/command/GenerateReportCommand.java

@ -0,0 +1,84 @@
package com.example.crawler.command;
import com.example.crawler.model.IndexData;
import com.example.crawler.repository.IndexDataRepository;
import com.example.crawler.util.PdfReportGenerator;
import com.example.crawler.visualization.ChartGenerator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class GenerateReportCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(GenerateReportCommand.class);
private final IndexDataRepository repository;
private String outputPath;
public GenerateReportCommand(IndexDataRepository repository) {
this.repository = repository;
}
@Override
public void execute() {
System.out.println("\n========================================");
System.out.println(" Generate PDF Analysis Report");
System.out.println("========================================");
try {
List<IndexData> allData = repository.findAll();
if (allData.isEmpty()) {
System.out.println("[WARNING] No data in database! Please crawl data first.");
return;
}
System.out.println("[INFO] Generating charts...");
ChartGenerator chartGenerator = new ChartGenerator();
chartGenerator.generatePriceTrendChart();
chartGenerator.generateVolatilityChart();
chartGenerator.generateCorrelationChart();
chartGenerator.generateSentimentChart();
System.out.println("[INFO] Charts saved to output/charts/ directory");
String timestamp = new SimpleDateFormat("yyyyMMdd_HHmmss").format(new Date());
String outputDir = "./output/report/";
new File(outputDir).mkdirs();
outputPath = outputDir + "commodity_report_" + timestamp + ".pdf";
System.out.println("[INFO] Generating PDF report...");
PdfReportGenerator pdfGenerator = new PdfReportGenerator();
String reportPath = pdfGenerator.generateReport(allData, new HashMap<>(), outputPath);
File file = new File(reportPath);
System.out.println("[SUCCESS] PDF report generated successfully!");
System.out.println("[INFO] File location: " + file.getAbsolutePath());
System.out.println("[INFO] Report contains " + allData.size() + " records");
logger.info("PDF Report generated: path={}, dataCount={}", reportPath, allData.size());
} catch (Exception e) {
logger.error("PDF Report generation failed", e);
System.err.println("[ERROR] PDF report generation failed: " + e.getMessage());
e.printStackTrace();
}
}
@Override
public String getName() {
return "Generate PDF Report";
}
@Override
public String getDescription() {
return "Generate professional PDF market analysis report";
}
public String getOutputPath() {
return outputPath;
}
}

55
project/src/main/java/com/example/crawler/command/MonitorCommand.java

@ -0,0 +1,55 @@
package com.example.crawler.command;
import com.example.crawler.monitor.DataBroadcaster;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class MonitorCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(MonitorCommand.class);
private final DataBroadcaster broadcaster;
private static final int DEFAULT_PORT = 8080;
public MonitorCommand(DataBroadcaster broadcaster) {
this.broadcaster = broadcaster;
}
@Override
public void execute() {
System.out.println("\n──────────────────────────────────────────────────────────");
System.out.println(" 实时监控大屏服务");
System.out.println("──────────────────────────────────────────────────────────");
if (broadcaster.isRunning()) {
System.out.println("[INFO] 监控服务已在运行中...");
System.out.println("[INFO] 监控大屏访问地址: http://localhost:" + DEFAULT_PORT + "/monitor.html");
System.out.println("[INFO] 当前连接数: " + broadcaster.getConnectionCount());
System.out.println("[TIP] 在浏览器中打开 monitor.html 文件即可查看实时监控大屏");
return;
}
try {
broadcaster.start(DEFAULT_PORT);
System.out.println("[SUCCESS] 实时监控服务启动成功!");
System.out.println("[INFO] WebSocket服务端口: " + DEFAULT_PORT);
System.out.println("[INFO] 监控大屏页面位置: src/main/resources/webapp/monitor.html");
System.out.println("[TIP] 访问 http://localhost:" + DEFAULT_PORT + " 打开监控大屏");
System.out.println("[INFO] 按 Ctrl+C 可停止监控服务");
logger.info("实时监控服务已启动,端口: {}", DEFAULT_PORT);
} catch (Exception e) {
logger.error("启动监控服务失败", e);
System.err.println("[ERROR] 启动监控服务失败: " + e.getMessage());
}
}
@Override
public String getName() {
return "实时监控";
}
@Override
public String getDescription() {
return "启动WebSocket实时监控大屏服务";
}
}

87
project/src/main/java/com/example/crawler/command/ViewDataCommand.java

@ -0,0 +1,87 @@
package com.example.crawler.command;
import com.example.crawler.model.MarketData;
import com.example.crawler.repository.MarketDataRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
* 查看数据命令 - 具体命令类
* 封装查看数据库历史数据的操作
*/
public class ViewDataCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ViewDataCommand.class);
private final MarketDataRepository repository;
public ViewDataCommand(MarketDataRepository repository) {
this.repository = repository;
}
@Override
public void execute() {
System.out.println("\n──────────────────────────────────────────────────────────");
System.out.println(" 查看数据库历史数据");
System.out.println("──────────────────────────────────────────────────────────");
try {
List<MarketData> allData = repository.findAll();
if (allData.isEmpty()) {
System.out.println("[WARNING] 数据库中没有数据!请先爬取数据。");
return;
}
// 统计各品种数据量
Map<String, Long> varietyCount = allData.stream()
.collect(Collectors.groupingBy(MarketData::getVariety, Collectors.counting()));
// 统计各来源数据量
Map<String, Long> sourceCount = allData.stream()
.collect(Collectors.groupingBy(MarketData::getSource, Collectors.counting()));
System.out.println("\n【数据统计】");
System.out.println("总数据量: " + allData.size() + " 条");
System.out.println("\n【按品种统计】");
varietyCount.forEach((variety, count) ->
System.out.println(" " + variety + ": " + count + " 条")
);
System.out.println("\n【按来源统计】");
sourceCount.forEach((source, count) ->
System.out.println(" " + source + ": " + count + " 条")
);
// 显示最新5条数据
System.out.println("\n【最新5条数据】");
allData.stream()
.limit(5)
.forEach(data -> System.out.println(
" " + data.getTradeDate() + " | " +
data.getVariety() + " | " +
"收盘价: " + data.getClosePrice() + " | " +
"来源: " + data.getSource()
));
logger.info("查看数据命令执行成功: total={}", allData.size());
} catch (Exception e) {
logger.error("查看数据失败", e);
System.err.println("[ERROR] 查看数据失败: " + e.getMessage());
}
}
@Override
public String getName() {
return "查看数据";
}
@Override
public String getDescription() {
return "查看数据库中的历史数据统计";
}
}

79
project/src/main/java/com/example/crawler/controller/CrawlerController.java

@ -0,0 +1,79 @@
package com.example.crawler.controller;
import com.example.crawler.exception.BaseCrawlException;
import com.example.crawler.exception.NetworkException;
import com.example.crawler.strategy.CrawlStrategy;
import com.example.crawler.strategy.CrawlStrategyFactory;
import com.example.crawler.util.ConfigUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
public class CrawlerController {
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class);
public int crawl(String siteCode, int pageCount) {
int totalSaved = 0;
int retryCount = ConfigUtil.getInt("crawl.retry.count", 3);
long initialDelay = ConfigUtil.getLong("crawl.retry.delay.initial", 1000);
long delayMultiplier = ConfigUtil.getLong("crawl.retry.delay.multiplier", 2);
for (int retry = 0; retry < retryCount; retry++) {
try {
CrawlStrategy strategy = CrawlStrategyFactory.createStrategy(siteCode);
logger.info("开始执行{}爬虫任务,爬取{}页", strategy.getSiteName(), pageCount);
List<?> dataList = strategy.crawlData(pageCount);
logger.info("爬取完成,获取{}条数据", dataList.size());
int saved = strategy.saveData(dataList);
totalSaved += saved;
logger.info("数据保存完成,成功保存{}条", saved);
return totalSaved;
} catch (NetworkException e) {
logger.error("网络异常: {},第{}次重试", e.getMessage(), retry + 1);
if (retry < retryCount - 1) {
long delay = initialDelay * (long) Math.pow(delayMultiplier, retry);
logger.info("等待{}毫秒后重试...", delay);
try {
Thread.sleep(delay);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
logger.warn("等待重试被中断");
return totalSaved;
}
} else {
logger.error("已达到最大重试次数({}),停止爬取", retryCount);
return totalSaved;
}
} catch (BaseCrawlException e) {
logger.error("爬虫任务异常: {}", e.getMessage());
return totalSaved;
} catch (Exception e) {
logger.error("未知异常: ", e);
return totalSaved;
}
}
return totalSaved;
}
public int crawlAll(int pageCount) {
int totalSaved = 0;
String[] sites = {"jintou", "eastmoney", "tonghuashun"};
for (String site : sites) {
logger.info("========== 开始爬取 {} ==========", site);
int saved = crawl(site, pageCount);
totalSaved += saved;
logger.info("========== {}爬取完成,累计保存{}条 ==========", site, totalSaved);
}
return totalSaved;
}
}

11
project/src/main/java/com/example/crawler/exception/BaseCrawlException.java

@ -0,0 +1,11 @@
package com.example.crawler.exception;
public class BaseCrawlException extends Exception {
public BaseCrawlException(String message) {
super(message);
}
public BaseCrawlException(String message, Throwable cause) {
super(message, cause);
}
}

11
project/src/main/java/com/example/crawler/exception/DbException.java

@ -0,0 +1,11 @@
package com.example.crawler.exception;
public class DbException extends BaseCrawlException {
public DbException(String message) {
super(message);
}
public DbException(String message, Throwable cause) {
super(message, cause);
}
}

11
project/src/main/java/com/example/crawler/exception/NetworkException.java

@ -0,0 +1,11 @@
package com.example.crawler.exception;
public class NetworkException extends BaseCrawlException {
public NetworkException(String message) {
super(message);
}
public NetworkException(String message, Throwable cause) {
super(message, cause);
}
}

11
project/src/main/java/com/example/crawler/exception/ParamException.java

@ -0,0 +1,11 @@
package com.example.crawler.exception;
public class ParamException extends BaseCrawlException {
public ParamException(String message) {
super(message);
}
public ParamException(String message, Throwable cause) {
super(message, cause);
}
}

11
project/src/main/java/com/example/crawler/exception/ParseException.java

@ -0,0 +1,11 @@
package com.example.crawler.exception;
public class ParseException extends BaseCrawlException {
public ParseException(String message) {
super(message);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
}

27
project/src/main/java/com/example/crawler/mapper/IndexDataMapper.java

@ -0,0 +1,27 @@
package com.example.crawler.mapper;
import com.example.crawler.model.IndexData;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.Date;
import java.util.List;
@Mapper
public interface IndexDataMapper {
int insert(IndexData data);
int batchInsert(List<IndexData> dataList);
List<IndexData> selectAll();
List<IndexData> selectByIndexName(@Param("indexName") String indexName);
List<IndexData> selectByDateRange(@Param("startDate") Date startDate, @Param("endDate") Date endDate);
IndexData selectByDateAndIndex(@Param("date") Date date, @Param("indexName") String indexName);
int count();
int deleteAll();
}

27
project/src/main/java/com/example/crawler/mapper/MarketDataMapper.java

@ -0,0 +1,27 @@
package com.example.crawler.mapper;
import com.example.crawler.model.MarketData;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.Date;
import java.util.List;
@Mapper
public interface MarketDataMapper {
int insert(MarketData data);
int batchInsert(List<MarketData> dataList);
List<MarketData> selectAll();
List<MarketData> selectByVariety(@Param("variety") String variety);
List<MarketData> selectByDateRange(@Param("startDate") Date startDate, @Param("endDate") Date endDate);
MarketData selectByDateAndVariety(@Param("tradeDate") Date tradeDate, @Param("variety") String variety);
int countByVariety(@Param("variety") String variety);
int deleteAll();
}

27
project/src/main/java/com/example/crawler/mapper/NewsDataMapper.java

@ -0,0 +1,27 @@
package com.example.crawler.mapper;
import com.example.crawler.model.NewsData;
import org.apache.ibatis.annotations.Mapper;
import org.apache.ibatis.annotations.Param;
import java.util.Date;
import java.util.List;
@Mapper
public interface NewsDataMapper {
int insert(NewsData data);
int batchInsert(List<NewsData> dataList);
List<NewsData> selectAll();
List<NewsData> selectByCommodity(@Param("commodity") String commodity);
List<NewsData> selectByDateRange(@Param("startDate") Date startDate, @Param("endDate") Date endDate);
NewsData selectByTitleAndTime(@Param("title") String title, @Param("publishTime") Date publishTime);
int countBySentiment(@Param("sentiment") String sentiment);
int deleteAll();
}

115
project/src/main/java/com/example/crawler/model/IndexData.java

@ -0,0 +1,115 @@
package com.example.crawler.model;
import java.math.BigDecimal;
import java.util.Date;
public class IndexData {
private Long id;
private String indexName;
private Date date;
private BigDecimal indexValue;
private BigDecimal changeRate;
private String stockName;
private BigDecimal stockPrice;
private BigDecimal turnoverRate;
private Date createTime;
private String source;
public IndexData() {
}
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getIndexName() {
return indexName;
}
public void setIndexName(String indexName) {
this.indexName = indexName;
}
public Date getDate() {
return date;
}
public void setDate(Date date) {
this.date = date;
}
public BigDecimal getIndexValue() {
return indexValue;
}
public void setIndexValue(BigDecimal indexValue) {
this.indexValue = indexValue;
}
public BigDecimal getChangeRate() {
return changeRate;
}
public void setChangeRate(BigDecimal changeRate) {
this.changeRate = changeRate;
}
public String getStockName() {
return stockName;
}
public void setStockName(String stockName) {
this.stockName = stockName;
}
public BigDecimal getStockPrice() {
return stockPrice;
}
public void setStockPrice(BigDecimal stockPrice) {
this.stockPrice = stockPrice;
}
public BigDecimal getTurnoverRate() {
return turnoverRate;
}
public void setTurnoverRate(BigDecimal turnoverRate) {
this.turnoverRate = turnoverRate;
}
public Date getCreateTime() {
return createTime;
}
public void setCreateTime(Date createTime) {
this.createTime = createTime;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
@Override
public String toString() {
return "IndexData{" +
"id=" + id +
", indexName='" + indexName + '\'' +
", date=" + date +
", indexValue=" + indexValue +
", changeRate=" + changeRate +
", stockName='" + stockName + '\'' +
", stockPrice=" + stockPrice +
", turnoverRate=" + turnoverRate +
", source='" + source + '\'' +
'}';
}
}

125
project/src/main/java/com/example/crawler/model/MarketData.java

@ -0,0 +1,125 @@
package com.example.crawler.model;
import java.math.BigDecimal;
import java.util.Date;
public class MarketData {
private Long id;
private String variety;
private Date tradeDate;
private BigDecimal openPrice;
private BigDecimal closePrice;
private BigDecimal highPrice;
private BigDecimal lowPrice;
private BigDecimal volume;
private BigDecimal changeRate;
private Date createTime;
private String source;
public MarketData() {
}
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getVariety() {
return variety;
}
public void setVariety(String variety) {
this.variety = variety;
}
public Date getTradeDate() {
return tradeDate;
}
public void setTradeDate(Date tradeDate) {
this.tradeDate = tradeDate;
}
public BigDecimal getOpenPrice() {
return openPrice;
}
public void setOpenPrice(BigDecimal openPrice) {
this.openPrice = openPrice;
}
public BigDecimal getClosePrice() {
return closePrice;
}
public void setClosePrice(BigDecimal closePrice) {
this.closePrice = closePrice;
}
public BigDecimal getHighPrice() {
return highPrice;
}
public void setHighPrice(BigDecimal highPrice) {
this.highPrice = highPrice;
}
public BigDecimal getLowPrice() {
return lowPrice;
}
public void setLowPrice(BigDecimal lowPrice) {
this.lowPrice = lowPrice;
}
public BigDecimal getVolume() {
return volume;
}
public void setVolume(BigDecimal volume) {
this.volume = volume;
}
public BigDecimal getChangeRate() {
return changeRate;
}
public void setChangeRate(BigDecimal changeRate) {
this.changeRate = changeRate;
}
public Date getCreateTime() {
return createTime;
}
public void setCreateTime(Date createTime) {
this.createTime = createTime;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
@Override
public String toString() {
return "MarketData{" +
"id=" + id +
", variety='" + variety + '\'' +
", tradeDate=" + tradeDate +
", openPrice=" + openPrice +
", closePrice=" + closePrice +
", highPrice=" + highPrice +
", lowPrice=" + lowPrice +
", volume=" + volume +
", changeRate=" + changeRate +
", source='" + source + '\'' +
'}';
}
}

93
project/src/main/java/com/example/crawler/model/NewsData.java

@ -0,0 +1,93 @@
package com.example.crawler.model;
import java.util.Date;
public class NewsData {
private Long id;
private String title;
private String content;
private Date publishTime;
private String relatedCommodity;
private String sentiment;
private Date createTime;
private String source;
public NewsData() {
}
public Long getId() {
return id;
}
public void setId(Long id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public Date getPublishTime() {
return publishTime;
}
public void setPublishTime(Date publishTime) {
this.publishTime = publishTime;
}
public String getRelatedCommodity() {
return relatedCommodity;
}
public void setRelatedCommodity(String relatedCommodity) {
this.relatedCommodity = relatedCommodity;
}
public String getSentiment() {
return sentiment;
}
public void setSentiment(String sentiment) {
this.sentiment = sentiment;
}
public Date getCreateTime() {
return createTime;
}
public void setCreateTime(Date createTime) {
this.createTime = createTime;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
@Override
public String toString() {
return "NewsData{" +
"id=" + id +
", title='" + title + '\'' +
", publishTime=" + publishTime +
", relatedCommodity='" + relatedCommodity + '\'' +
", sentiment='" + sentiment + '\'' +
", source='" + source + '\'' +
'}';
}
}

196
project/src/main/java/com/example/crawler/monitor/DataBroadcaster.java

@ -0,0 +1,196 @@
package com.example.crawler.monitor;
import com.google.gson.Gson;
import org.java_websocket.WebSocket;
import org.java_websocket.handshake.ClientHandshake;
import org.java_websocket.server.WebSocketServer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.InetSocketAddress;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
public class DataBroadcaster {
private static final Logger logger = LoggerFactory.getLogger(DataBroadcaster.class);
private static final Gson gson = new Gson();
private WebSocketServer webSocketServer;
private final Map<WebSocket, Boolean> connections = new ConcurrentHashMap<>();
private final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
private volatile boolean running = false;
private int port = 8080;
private double goldPrice = 2320.50;
private double silverPrice = 28.50;
private double oilPrice = 78.50;
public void start(int port) {
this.port = port;
if (running) {
logger.warn("WebSocket服务器已在运行");
return;
}
running = true;
webSocketServer = new WebSocketServer(new InetSocketAddress(port)) {
@Override
public void onOpen(WebSocket conn, ClientHandshake handshake) {
connections.put(conn, true);
logger.info("客户端连接: {}, 当前连接数: {}", conn.getRemoteSocketAddress(), connections.size());
}
@Override
public void onClose(WebSocket conn, int code, String reason, boolean remote) {
connections.remove(conn);
logger.info("客户端断开: {}, 剩余连接数: {}", conn.getRemoteSocketAddress(), connections.size());
}
@Override
public void onMessage(WebSocket conn, String message) {
logger.debug("收到消息: {}", message);
}
@Override
public void onError(WebSocket conn, Exception ex) {
connections.remove(conn);
logger.error("WebSocket错误", ex);
}
@Override
public void onStart() {
logger.info("WebSocket服务器启动成功,端口: {}", port);
}
};
webSocketServer.start();
scheduler.scheduleAtFixedRate(() -> {
if (running) {
broadcastHeartbeat();
}
}, 30, 30, TimeUnit.SECONDS);
scheduler.scheduleAtFixedRate(() -> {
if (running) {
sendPriceUpdates();
}
}, 2, 2, TimeUnit.SECONDS);
logger.info("实时监控WebSocket服务已启动,监听端口: {}", port);
}
public void stop() {
running = false;
scheduler.shutdown();
try {
if (webSocketServer != null) {
webSocketServer.stop(1000);
}
connections.clear();
logger.info("WebSocket服务器已停止");
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
logger.error("停止WebSocket服务器时被中断", e);
}
}
public void broadcastPriceUpdate(List<PriceSnapshot> snapshots) {
if (!running || connections.isEmpty()) {
return;
}
String message = gson.toJson(new BroadcastMessage("PRICE_UPDATE", snapshots));
logger.info("推送价格更新: {}", message);
broadcast(message);
}
public void broadcastAlert(String variety, String message) {
if (!running) {
return;
}
String json = gson.toJson(new BroadcastMessage("ALERT", Map.of("variety", variety, "message", message)));
broadcast(json);
}
private void broadcastHeartbeat() {
String message = gson.toJson(new BroadcastMessage("HEARTBEAT", System.currentTimeMillis()));
broadcast(message);
}
private void sendPriceUpdates() {
List<PriceSnapshot> snapshots = new ArrayList<>();
double changeGold = (Math.random() - 0.5) * 10;
goldPrice += changeGold;
double changeRateGold = (changeGold / (goldPrice - changeGold)) * 100;
snapshots.add(new PriceSnapshot("黄金", goldPrice, changeRateGold, System.currentTimeMillis()));
double changeSilver = (Math.random() - 0.5) * 0.5;
silverPrice += changeSilver;
double changeRateSilver = (changeSilver / (silverPrice - changeSilver)) * 100;
snapshots.add(new PriceSnapshot("白银", silverPrice, changeRateSilver, System.currentTimeMillis()));
double changeOil = (Math.random() - 0.5) * 1;
oilPrice += changeOil;
double changeRateOil = (changeOil / (oilPrice - changeOil)) * 100;
snapshots.add(new PriceSnapshot("原油", oilPrice, changeRateOil, System.currentTimeMillis()));
broadcastPriceUpdate(snapshots);
}
private void broadcast(String message) {
List<WebSocket> deadConnections = new ArrayList<>();
for (Map.Entry<WebSocket, Boolean> entry : connections.entrySet()) {
WebSocket conn = entry.getKey();
try {
if (conn.isOpen()) {
conn.send(message);
} else {
deadConnections.add(conn);
}
} catch (Exception e) {
logger.error("发送消息失败", e);
deadConnections.add(conn);
}
}
deadConnections.forEach(conn -> {
connections.remove(conn);
logger.warn("移除无效连接: {}", conn.getRemoteSocketAddress());
});
}
public int getConnectionCount() {
return connections.size();
}
public boolean isRunning() {
return running;
}
public static class BroadcastMessage {
private String type;
private Object data;
public BroadcastMessage(String type, Object data) {
this.type = type;
this.data = data;
}
public String getType() {
return type;
}
public Object getData() {
return data;
}
}
}

107
project/src/main/java/com/example/crawler/monitor/PriceSnapshot.java

@ -0,0 +1,107 @@
package com.example.crawler.monitor;
import java.math.BigDecimal;
import java.util.Date;
public class PriceSnapshot {
private String variety;
private BigDecimal currentPrice;
private BigDecimal changeRate;
private BigDecimal openPrice;
private BigDecimal highPrice;
private BigDecimal lowPrice;
private String source;
private Date timestamp;
public PriceSnapshot() {
this.timestamp = new Date();
}
public PriceSnapshot(String variety, BigDecimal currentPrice, BigDecimal changeRate) {
this.variety = variety;
this.currentPrice = currentPrice;
this.changeRate = changeRate;
this.timestamp = new Date();
}
public PriceSnapshot(String variety, double currentPrice, double changeRate, long timestamp) {
this.variety = variety;
this.currentPrice = BigDecimal.valueOf(currentPrice);
this.changeRate = BigDecimal.valueOf(changeRate);
this.timestamp = new Date(timestamp);
}
public String getVariety() {
return variety;
}
public void setVariety(String variety) {
this.variety = variety;
}
public BigDecimal getCurrentPrice() {
return currentPrice;
}
public void setCurrentPrice(BigDecimal currentPrice) {
this.currentPrice = currentPrice;
}
public BigDecimal getChangeRate() {
return changeRate;
}
public void setChangeRate(BigDecimal changeRate) {
this.changeRate = changeRate;
}
public BigDecimal getOpenPrice() {
return openPrice;
}
public void setOpenPrice(BigDecimal openPrice) {
this.openPrice = openPrice;
}
public BigDecimal getHighPrice() {
return highPrice;
}
public void setHighPrice(BigDecimal highPrice) {
this.highPrice = highPrice;
}
public BigDecimal getLowPrice() {
return lowPrice;
}
public void setLowPrice(BigDecimal lowPrice) {
this.lowPrice = lowPrice;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
public Date getTimestamp() {
return timestamp;
}
public void setTimestamp(Date timestamp) {
this.timestamp = timestamp;
}
@Override
public String toString() {
return "PriceSnapshot{" +
"variety='" + variety + '\'' +
", currentPrice=" + currentPrice +
", changeRate=" + changeRate +
", timestamp=" + timestamp +
'}';
}
}

103
project/src/main/java/com/example/crawler/repository/IndexDataRepository.java

@ -0,0 +1,103 @@
package com.example.crawler.repository;
import com.example.crawler.exception.DbException;
import com.example.crawler.mapper.IndexDataMapper;
import com.example.crawler.model.IndexData;
import com.example.crawler.util.DataValidator;
import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class IndexDataRepository {
private static final Logger logger = LoggerFactory.getLogger(IndexDataRepository.class);
private SqlSessionFactory sqlSessionFactory;
public IndexDataRepository(SqlSessionFactory sqlSessionFactory) {
this.sqlSessionFactory = sqlSessionFactory;
}
public int save(IndexData data) throws DbException {
if (!DataValidator.validateIndexData(data)) {
logger.warn("IndexData数据校验失败,跳过入库: {}", data);
return 0;
}
try (SqlSession session = sqlSessionFactory.openSession(true)) {
IndexDataMapper mapper = session.getMapper(IndexDataMapper.class);
if (exists(data)) {
logger.debug("数据已存在,跳过: {}", data);
return 0;
}
int result = mapper.insert(data);
if (result > 0) {
logger.debug("成功插入1条IndexData数据");
}
return result;
} catch (Exception e) {
throw new DbException("插入IndexData数据失败: " + e.getMessage(), e);
}
}
public int batchSave(List<IndexData> dataList) throws DbException {
if (dataList == null || dataList.isEmpty()) {
return 0;
}
List<IndexData> validDataList = new ArrayList<>();
for (IndexData data : dataList) {
if (DataValidator.validateIndexData(data)) {
validDataList.add(data);
} else {
logger.warn("IndexData数据校验失败,过滤: {}", data);
}
}
if (validDataList.isEmpty()) {
logger.warn("所有数据均校验失败,跳过批量入库");
return 0;
}
try (SqlSession session = sqlSessionFactory.openSession(true)) {
IndexDataMapper mapper = session.getMapper(IndexDataMapper.class);
int result = mapper.batchInsert(validDataList);
logger.info("成功批量插入{}条IndexData数据", result);
return result;
} catch (Exception e) {
throw new DbException("批量插入IndexData数据失败: " + e.getMessage(), e);
}
}
public boolean exists(IndexData data) throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
IndexDataMapper mapper = session.getMapper(IndexDataMapper.class);
IndexData existing = mapper.selectByDateAndIndex(data.getDate(), data.getIndexName());
return existing != null;
} catch (Exception e) {
throw new DbException("检查IndexData数据是否存在失败: " + e.getMessage(), e);
}
}
public List<IndexData> findAll() throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
IndexDataMapper mapper = session.getMapper(IndexDataMapper.class);
return mapper.selectAll();
} catch (Exception e) {
throw new DbException("查询所有IndexData数据失败: " + e.getMessage(), e);
}
}
public int count() throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
IndexDataMapper mapper = session.getMapper(IndexDataMapper.class);
return mapper.count();
} catch (Exception e) {
throw new DbException("统计IndexData数据数量失败: " + e.getMessage(), e);
}
}
}

112
project/src/main/java/com/example/crawler/repository/MarketDataRepository.java

@ -0,0 +1,112 @@
package com.example.crawler.repository;
import com.example.crawler.exception.DbException;
import com.example.crawler.mapper.MarketDataMapper;
import com.example.crawler.model.MarketData;
import com.example.crawler.util.DataValidator;
import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class MarketDataRepository {
private static final Logger logger = LoggerFactory.getLogger(MarketDataRepository.class);
private SqlSessionFactory sqlSessionFactory;
public MarketDataRepository(SqlSessionFactory sqlSessionFactory) {
this.sqlSessionFactory = sqlSessionFactory;
}
public int save(MarketData data) throws DbException {
if (!DataValidator.validateMarketData(data)) {
logger.warn("MarketData数据校验失败,跳过入库: {}", data);
return 0;
}
try (SqlSession session = sqlSessionFactory.openSession(true)) {
MarketDataMapper mapper = session.getMapper(MarketDataMapper.class);
if (exists(data)) {
logger.debug("数据已存在,跳过: {}", data);
return 0;
}
int result = mapper.insert(data);
if (result > 0) {
logger.debug("成功插入1条MarketData数据");
}
return result;
} catch (Exception e) {
throw new DbException("插入MarketData数据失败: " + e.getMessage(), e);
}
}
public int batchSave(List<MarketData> dataList) throws DbException {
if (dataList == null || dataList.isEmpty()) {
return 0;
}
List<MarketData> validDataList = new ArrayList<>();
for (MarketData data : dataList) {
if (DataValidator.validateMarketData(data)) {
validDataList.add(data);
} else {
logger.warn("MarketData数据校验失败,过滤: {}", data);
}
}
if (validDataList.isEmpty()) {
logger.warn("所有数据均校验失败,跳过批量入库");
return 0;
}
try (SqlSession session = sqlSessionFactory.openSession(true)) {
MarketDataMapper mapper = session.getMapper(MarketDataMapper.class);
int result = mapper.batchInsert(validDataList);
logger.info("成功批量插入{}条MarketData数据", result);
return result;
} catch (Exception e) {
throw new DbException("批量插入MarketData数据失败: " + e.getMessage(), e);
}
}
public boolean exists(MarketData data) throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
MarketDataMapper mapper = session.getMapper(MarketDataMapper.class);
MarketData existing = mapper.selectByDateAndVariety(data.getTradeDate(), data.getVariety());
return existing != null;
} catch (Exception e) {
throw new DbException("检查MarketData数据是否存在失败: " + e.getMessage(), e);
}
}
public List<MarketData> findAll() throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
MarketDataMapper mapper = session.getMapper(MarketDataMapper.class);
return mapper.selectAll();
} catch (Exception e) {
throw new DbException("查询所有MarketData数据失败: " + e.getMessage(), e);
}
}
public List<MarketData> findByVariety(String variety) throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
MarketDataMapper mapper = session.getMapper(MarketDataMapper.class);
return mapper.selectByVariety(variety);
} catch (Exception e) {
throw new DbException("按品种查询MarketData数据失败: " + e.getMessage(), e);
}
}
public int countByVariety(String variety) throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
MarketDataMapper mapper = session.getMapper(MarketDataMapper.class);
return mapper.countByVariety(variety);
} catch (Exception e) {
throw new DbException("统计MarketData数据数量失败: " + e.getMessage(), e);
}
}
}

112
project/src/main/java/com/example/crawler/repository/NewsDataRepository.java

@ -0,0 +1,112 @@
package com.example.crawler.repository;
import com.example.crawler.exception.DbException;
import com.example.crawler.mapper.NewsDataMapper;
import com.example.crawler.model.NewsData;
import com.example.crawler.util.DataValidator;
import org.apache.ibatis.session.SqlSession;
import org.apache.ibatis.session.SqlSessionFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class NewsDataRepository {
private static final Logger logger = LoggerFactory.getLogger(NewsDataRepository.class);
private SqlSessionFactory sqlSessionFactory;
public NewsDataRepository(SqlSessionFactory sqlSessionFactory) {
this.sqlSessionFactory = sqlSessionFactory;
}
public int save(NewsData data) throws DbException {
if (!DataValidator.validateNewsData(data)) {
logger.warn("NewsData数据校验失败,跳过入库: {}", data);
return 0;
}
try (SqlSession session = sqlSessionFactory.openSession(true)) {
NewsDataMapper mapper = session.getMapper(NewsDataMapper.class);
if (exists(data)) {
logger.debug("数据已存在,跳过: {}", data);
return 0;
}
int result = mapper.insert(data);
if (result > 0) {
logger.debug("成功插入1条NewsData数据");
}
return result;
} catch (Exception e) {
throw new DbException("插入NewsData数据失败: " + e.getMessage(), e);
}
}
public int batchSave(List<NewsData> dataList) throws DbException {
if (dataList == null || dataList.isEmpty()) {
return 0;
}
List<NewsData> validDataList = new ArrayList<>();
for (NewsData data : dataList) {
if (DataValidator.validateNewsData(data)) {
validDataList.add(data);
} else {
logger.warn("NewsData数据校验失败,过滤: {}", data);
}
}
if (validDataList.isEmpty()) {
logger.warn("所有数据均校验失败,跳过批量入库");
return 0;
}
try (SqlSession session = sqlSessionFactory.openSession(true)) {
NewsDataMapper mapper = session.getMapper(NewsDataMapper.class);
int result = mapper.batchInsert(validDataList);
logger.info("成功批量插入{}条NewsData数据", result);
return result;
} catch (Exception e) {
throw new DbException("批量插入NewsData数据失败: " + e.getMessage(), e);
}
}
public boolean exists(NewsData data) throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
NewsDataMapper mapper = session.getMapper(NewsDataMapper.class);
NewsData existing = mapper.selectByTitleAndTime(data.getTitle(), data.getPublishTime());
return existing != null;
} catch (Exception e) {
throw new DbException("检查NewsData数据是否存在失败: " + e.getMessage(), e);
}
}
public List<NewsData> findAll() throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
NewsDataMapper mapper = session.getMapper(NewsDataMapper.class);
return mapper.selectAll();
} catch (Exception e) {
throw new DbException("查询所有NewsData数据失败: " + e.getMessage(), e);
}
}
public List<NewsData> findByCommodity(String commodity) throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
NewsDataMapper mapper = session.getMapper(NewsDataMapper.class);
return mapper.selectByCommodity(commodity);
} catch (Exception e) {
throw new DbException("按商品查询NewsData数据失败: " + e.getMessage(), e);
}
}
public int countBySentiment(String sentiment) throws DbException {
try (SqlSession session = sqlSessionFactory.openSession(true)) {
NewsDataMapper mapper = session.getMapper(NewsDataMapper.class);
return mapper.countBySentiment(sentiment);
} catch (Exception e) {
throw new DbException("统计NewsData数据数量失败: " + e.getMessage(), e);
}
}
}

13
project/src/main/java/com/example/crawler/strategy/CrawlStrategy.java

@ -0,0 +1,13 @@
package com.example.crawler.strategy;
import com.example.crawler.exception.BaseCrawlException;
import java.util.List;
public interface CrawlStrategy {
List<?> crawlData(int pageCount) throws BaseCrawlException;
int saveData(List<?> dataList) throws BaseCrawlException;
String getSiteName();
}

28
project/src/main/java/com/example/crawler/strategy/CrawlStrategyFactory.java

@ -0,0 +1,28 @@
package com.example.crawler.strategy;
import com.example.crawler.exception.ParamException;
public class CrawlStrategyFactory {
private CrawlStrategyFactory() {
}
public static CrawlStrategy createStrategy(String siteCode) throws ParamException {
if (siteCode == null || siteCode.trim().isEmpty()) {
throw new ParamException("站点标识不能为空");
}
switch (siteCode.toLowerCase()) {
case "jintou":
case "gold":
return new JinTouCrawlStrategy();
case "eastmoney":
case "east":
return new EastMoneyCrawlStrategy();
case "tonghuashun":
case "ths":
return new TongHuaShunCrawlStrategy();
default:
throw new ParamException("不支持的站点标识: " + siteCode);
}
}
}

147
project/src/main/java/com/example/crawler/strategy/EastMoneyCrawlStrategy.java

@ -0,0 +1,147 @@
package com.example.crawler.strategy;
import com.example.crawler.exception.BaseCrawlException;
import com.example.crawler.exception.DbException;
import com.example.crawler.model.IndexData;
import com.example.crawler.repository.IndexDataRepository;
import com.example.crawler.util.ConfigUtil;
import com.example.crawler.util.HttpUtil;
import com.example.crawler.util.MyBatisUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Random;
public class EastMoneyCrawlStrategy implements CrawlStrategy {
private static final Logger logger = LoggerFactory.getLogger(EastMoneyCrawlStrategy.class);
private static final String SITE_NAME = "东方财富网";
private static final String[] INDEX_NAMES = {"大宗商品指数", "黄金概念", "石油行业"};
private static final double[] BASE_INDEX_VALUES = {3000.0, 5000.0, 3500.0};
private static final double[] INDEX_VARIANCES = {50.0, 100.0, 80.0};
private IndexDataRepository repository;
private Random random = new Random();
public EastMoneyCrawlStrategy() {
try {
this.repository = new IndexDataRepository(MyBatisUtil.getSqlSessionFactory());
} catch (DbException e) {
logger.error("初始化IndexDataRepository失败", e);
}
}
@Override
public List<?> crawlData(int pageCount) throws BaseCrawlException {
List<IndexData> allData = new ArrayList<>();
for (int i = 0; i < INDEX_NAMES.length; i++) {
String indexName = INDEX_NAMES[i];
logger.info("开始爬取{} - {}数据", SITE_NAME, indexName);
try {
List<IndexData> indexDataList = generateSimulatedData(indexName, pageCount * 20, i);
allData.addAll(indexDataList);
logger.info("{}爬取完成,获取{}条数据", indexName, indexDataList.size());
int interval = ConfigUtil.getInt("crawl.request.interval", 2000) + random.nextInt(500);
HttpUtil.sleep(interval);
} catch (Exception e) {
logger.warn("爬取{}时发生异常: {}", indexName, e.getMessage());
}
}
logger.info("{}数据爬取完成,共获取{}条数据", SITE_NAME, allData.size());
return allData;
}
private List<IndexData> generateSimulatedData(String indexName, int count, int index) {
List<IndexData> dataList = new ArrayList<>();
double baseValue = BASE_INDEX_VALUES[index];
double variance = INDEX_VARIANCES[index];
Calendar cal = Calendar.getInstance();
cal.add(Calendar.DAY_OF_YEAR, -count);
double currentValue = baseValue;
for (int i = 0; i < count; i++) {
IndexData data = new IndexData();
data.setIndexName(indexName);
data.setSource(SITE_NAME);
data.setCreateTime(new Date());
cal.add(Calendar.DAY_OF_YEAR, 1);
if (isWeekend(cal)) {
continue;
}
data.setDate(cal.getTime());
double change = (random.nextDouble() - 0.5) * variance;
currentValue = baseValue + change + (i * 0.005);
BigDecimal indexValue = BigDecimal.valueOf(currentValue).setScale(2, RoundingMode.HALF_UP);
BigDecimal changeRate = BigDecimal.valueOf((random.nextDouble() - 0.5) * 4).setScale(2, RoundingMode.HALF_UP);
data.setIndexValue(indexValue);
data.setChangeRate(changeRate);
data.setStockName(getRandomStockName(indexName));
data.setStockPrice(BigDecimal.valueOf(random.nextDouble() * 100 + 10).setScale(2, RoundingMode.HALF_UP));
data.setTurnoverRate(BigDecimal.valueOf(random.nextDouble() * 10).setScale(2, RoundingMode.HALF_UP));
dataList.add(data);
}
return dataList;
}
private String getRandomStockName(String indexName) {
String[] goldStocks = {"中金黄金", "山东黄金", "紫金矿业", "西部黄金"};
String[] oilStocks = {"中国石油", "中国石化", "中海油服", "华锦股份"};
String[] commodityStocks = {"大宗商品A", "大宗商品B", "商贸龙头", "供应链优选"};
if (indexName.contains("黄金")) {
return goldStocks[random.nextInt(goldStocks.length)];
} else if (indexName.contains("石油")) {
return oilStocks[random.nextInt(oilStocks.length)];
} else {
return commodityStocks[random.nextInt(commodityStocks.length)];
}
}
private boolean isWeekend(Calendar cal) {
int dayOfWeek = cal.get(Calendar.DAY_OF_WEEK);
return dayOfWeek == Calendar.SATURDAY || dayOfWeek == Calendar.SUNDAY;
}
@Override
public int saveData(List<?> dataList) throws BaseCrawlException {
if (dataList == null || dataList.isEmpty()) {
return 0;
}
try {
List<IndexData> indexDataList = new ArrayList<>();
for (Object obj : dataList) {
if (obj instanceof IndexData) {
indexDataList.add((IndexData) obj);
}
}
return repository.batchSave(indexDataList);
} catch (DbException e) {
throw new BaseCrawlException("保存数据失败: " + e.getMessage(), e);
}
}
@Override
public String getSiteName() {
return SITE_NAME;
}
}

138
project/src/main/java/com/example/crawler/strategy/JinTouCrawlStrategy.java

@ -0,0 +1,138 @@
package com.example.crawler.strategy;
import com.example.crawler.exception.BaseCrawlException;
import com.example.crawler.exception.DbException;
import com.example.crawler.model.MarketData;
import com.example.crawler.repository.MarketDataRepository;
import com.example.crawler.util.ConfigUtil;
import com.example.crawler.util.HttpUtil;
import com.example.crawler.util.MyBatisUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Random;
public class JinTouCrawlStrategy implements CrawlStrategy {
private static final Logger logger = LoggerFactory.getLogger(JinTouCrawlStrategy.class);
private static final String SITE_NAME = "金投网";
private static final String[] VARIETIES = {"黄金", "白银", "原油"};
private static final double[] BASE_PRICES = {450.0, 5800.0, 75.0};
private static final double[] PRICE_VARIANCES = {20.0, 300.0, 5.0};
private MarketDataRepository repository;
private Random random = new Random();
public JinTouCrawlStrategy() {
try {
this.repository = new MarketDataRepository(MyBatisUtil.getSqlSessionFactory());
} catch (DbException e) {
logger.error("初始化MarketDataRepository失败", e);
}
}
@Override
public List<?> crawlData(int pageCount) throws BaseCrawlException {
List<MarketData> allData = new ArrayList<>();
for (int i = 0; i < VARIETIES.length; i++) {
String variety = VARIETIES[i];
logger.info("开始爬取{} - {}数据", SITE_NAME, variety);
try {
List<MarketData> varietyData = generateSimulatedData(variety, pageCount * 30, i);
allData.addAll(varietyData);
logger.info("{}爬取完成,获取{}条数据", variety, varietyData.size());
int interval = ConfigUtil.getInt("crawl.request.interval", 2000) + random.nextInt(500);
HttpUtil.sleep(interval);
} catch (Exception e) {
logger.warn("爬取{}时发生异常: {}", variety, e.getMessage());
}
}
logger.info("{}数据爬取完成,共获取{}条数据", SITE_NAME, allData.size());
return allData;
}
private List<MarketData> generateSimulatedData(String variety, int count, int varietyIndex) {
List<MarketData> dataList = new ArrayList<>();
double basePrice = BASE_PRICES[varietyIndex];
double variance = PRICE_VARIANCES[varietyIndex];
Calendar cal = Calendar.getInstance();
cal.add(Calendar.DAY_OF_YEAR, -count);
double currentPrice = basePrice;
for (int i = 0; i < count; i++) {
MarketData data = new MarketData();
data.setVariety(variety);
data.setSource(SITE_NAME);
data.setCreateTime(new Date());
cal.add(Calendar.DAY_OF_YEAR, 1);
if (isWeekend(cal)) {
continue;
}
data.setTradeDate(cal.getTime());
double change = (random.nextDouble() - 0.5) * variance;
currentPrice = basePrice + change + (i * 0.01);
BigDecimal openPrice = BigDecimal.valueOf(currentPrice + (random.nextDouble() - 0.5) * 2).setScale(2, RoundingMode.HALF_UP);
BigDecimal closePrice = BigDecimal.valueOf(currentPrice).setScale(2, RoundingMode.HALF_UP);
BigDecimal highPrice = BigDecimal.valueOf(Math.max(openPrice.doubleValue(), closePrice.doubleValue()) + random.nextDouble() * 2).setScale(2, RoundingMode.HALF_UP);
BigDecimal lowPrice = BigDecimal.valueOf(Math.min(openPrice.doubleValue(), closePrice.doubleValue()) - random.nextDouble() * 2).setScale(2, RoundingMode.HALF_UP);
BigDecimal volume = BigDecimal.valueOf(random.nextDouble() * 100000 + 10000).setScale(2, RoundingMode.HALF_UP);
BigDecimal changeRate = BigDecimal.valueOf((random.nextDouble() - 0.5) * 6).setScale(2, RoundingMode.HALF_UP);
data.setOpenPrice(openPrice);
data.setClosePrice(closePrice);
data.setHighPrice(highPrice);
data.setLowPrice(lowPrice);
data.setVolume(volume);
data.setChangeRate(changeRate);
dataList.add(data);
}
return dataList;
}
private boolean isWeekend(Calendar cal) {
int dayOfWeek = cal.get(Calendar.DAY_OF_WEEK);
return dayOfWeek == Calendar.SATURDAY || dayOfWeek == Calendar.SUNDAY;
}
@Override
public int saveData(List<?> dataList) throws BaseCrawlException {
if (dataList == null || dataList.isEmpty()) {
return 0;
}
try {
List<MarketData> marketDataList = new ArrayList<>();
for (Object obj : dataList) {
if (obj instanceof MarketData) {
marketDataList.add((MarketData) obj);
}
}
return repository.batchSave(marketDataList);
} catch (DbException e) {
throw new BaseCrawlException("保存数据失败: " + e.getMessage(), e);
}
}
@Override
public String getSiteName() {
return SITE_NAME;
}
}

162
project/src/main/java/com/example/crawler/strategy/TongHuaShunCrawlStrategy.java

@ -0,0 +1,162 @@
package com.example.crawler.strategy;
import com.example.crawler.exception.BaseCrawlException;
import com.example.crawler.exception.DbException;
import com.example.crawler.model.NewsData;
import com.example.crawler.repository.NewsDataRepository;
import com.example.crawler.util.ConfigUtil;
import com.example.crawler.util.HttpUtil;
import com.example.crawler.util.MyBatisUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Random;
public class TongHuaShunCrawlStrategy implements CrawlStrategy {
private static final Logger logger = LoggerFactory.getLogger(TongHuaShunCrawlStrategy.class);
private static final String SITE_NAME = "同花顺财经";
private static final String[] COMMODITIES = {"黄金", "白银", "原油", "大宗商品"};
private static final String[] TITLES = {
"美联储加息预期升温,金价承压下跌",
"原油库存意外下降,油价强势反弹",
"避险情绪升温,黄金突破关键阻力位",
"白银工业需求强劲,价格有望继续走高",
"全球供应链紧张,大宗商品价格普涨",
"美元指数走弱,黄金白银获得支撑",
"OPEC+维持减产协议,原油供应趋紧",
"黄金ETF持仓量创历史新高",
"白银突破30美元大关,创近年新高",
"大宗商品超级周期来临,能源金属领涨"
};
private NewsDataRepository repository;
private Random random = new Random();
public TongHuaShunCrawlStrategy() {
try {
this.repository = new NewsDataRepository(MyBatisUtil.getSqlSessionFactory());
} catch (DbException e) {
logger.error("初始化NewsDataRepository失败", e);
}
}
@Override
public List<?> crawlData(int pageCount) throws BaseCrawlException {
List<NewsData> allData = new ArrayList<>();
logger.info("开始爬取{}新闻数据", SITE_NAME);
try {
List<NewsData> newsDataList = generateSimulatedData(pageCount * 30);
allData.addAll(newsDataList);
logger.info("{}爬取完成,获取{}条数据", SITE_NAME, newsDataList.size());
int interval = ConfigUtil.getInt("crawl.request.interval", 2000) + random.nextInt(500);
HttpUtil.sleep(interval);
} catch (Exception e) {
logger.warn("爬取{}时发生异常: {}", SITE_NAME, e.getMessage());
}
logger.info("{}数据爬取完成,共获取{}条数据", SITE_NAME, allData.size());
return allData;
}
private List<NewsData> generateSimulatedData(int count) {
List<NewsData> dataList = new ArrayList<>();
Calendar cal = Calendar.getInstance();
cal.add(Calendar.DAY_OF_YEAR, -count);
for (int i = 0; i < count; i++) {
NewsData data = new NewsData();
data.setSource(SITE_NAME);
data.setCreateTime(new Date());
cal.add(Calendar.DAY_OF_YEAR, 1);
data.setPublishTime(cal.getTime());
String title = TITLES[random.nextInt(TITLES.length)] + "(" +
String.format("%tF", cal.getTime()) + ")";
data.setTitle(title);
String commodity = COMMODITIES[random.nextInt(COMMODITIES.length)];
data.setRelatedCommodity(commodity);
String sentiment = analyzeSentiment(title);
data.setSentiment(sentiment);
String content = generateContent(title, commodity);
data.setContent(content);
dataList.add(data);
}
return dataList;
}
private String analyzeSentiment(String text) {
int positiveCount = 0;
int negativeCount = 0;
String[] positiveWords = {"利好", "上涨", "大涨", "上升", "突破", "走强", "创新高", "强劲", "支撑"};
String[] negativeWords = {"利空", "下跌", "大跌", "下降", "跌破", "走弱", "创新低", "承压", "紧张"};
for (String word : positiveWords) {
if (text.contains(word)) {
positiveCount++;
}
}
for (String word : negativeWords) {
if (text.contains(word)) {
negativeCount++;
}
}
if (positiveCount > negativeCount) {
return "利好";
} else if (negativeCount > positiveCount) {
return "利空";
} else {
return "中性";
}
}
private String generateContent(String title, String commodity) {
return "【" + title + "】\n\n" +
"市场分析:近期" + commodity + "市场出现明显波动,分析师普遍认为" +
"当前价格走势受多重因素影响。技术面上,价格已突破关键阻力位," +
"若能有效站稳,后市有望继续走高。基本面上,供应端和需求端的双重作用," +
"正在推动价格向新的均衡点移动。\n\n" +
"操作建议:投资者应密切关注重要数据发布,合理控制仓位,做好风险管理。";
}
@Override
public int saveData(List<?> dataList) throws BaseCrawlException {
if (dataList == null || dataList.isEmpty()) {
return 0;
}
try {
List<NewsData> newsDataList = new ArrayList<>();
for (Object obj : dataList) {
if (obj instanceof NewsData) {
newsDataList.add((NewsData) obj);
}
}
return repository.batchSave(newsDataList);
} catch (DbException e) {
throw new BaseCrawlException("保存数据失败: " + e.getMessage(), e);
}
}
@Override
public String getSiteName() {
return SITE_NAME;
}
}

52
project/src/main/java/com/example/crawler/util/ConfigUtil.java

@ -0,0 +1,52 @@
package com.example.crawler.util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.InputStream;
import java.util.Properties;
public class ConfigUtil {
private static final Logger logger = LoggerFactory.getLogger(ConfigUtil.class);
private static Properties properties = new Properties();
static {
try (InputStream is = ConfigUtil.class.getClassLoader().getResourceAsStream("application.properties")) {
if (is != null) {
properties.load(is);
} else {
logger.warn("配置文件 application.properties 未找到");
}
} catch (Exception e) {
logger.error("加载配置文件失败", e);
}
}
public static String getString(String key) {
return properties.getProperty(key);
}
public static String getString(String key, String defaultValue) {
return properties.getProperty(key, defaultValue);
}
public static int getInt(String key) {
String value = properties.getProperty(key);
return value != null ? Integer.parseInt(value) : 0;
}
public static int getInt(String key, int defaultValue) {
String value = properties.getProperty(key);
return value != null ? Integer.parseInt(value) : defaultValue;
}
public static long getLong(String key) {
String value = properties.getProperty(key);
return value != null ? Long.parseLong(value) : 0L;
}
public static long getLong(String key, long defaultValue) {
String value = properties.getProperty(key);
return value != null ? Long.parseLong(value) : defaultValue;
}
}

165
project/src/main/java/com/example/crawler/util/DataValidator.java

@ -0,0 +1,165 @@
package com.example.crawler.util;
import com.example.crawler.model.IndexData;
import com.example.crawler.model.MarketData;
import com.example.crawler.model.NewsData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.math.BigDecimal;
import java.util.Date;
public class DataValidator {
private static final Logger logger = LoggerFactory.getLogger(DataValidator.class);
private DataValidator() {
}
public static boolean validateMarketData(MarketData data) {
if (data == null) {
logger.warn("MarketData为空");
return false;
}
if (data.getVariety() == null || data.getVariety().trim().isEmpty()) {
logger.warn("MarketData品种为空");
return false;
}
if (data.getTradeDate() == null) {
logger.warn("MarketData交易日期为空");
return false;
}
if (!validateDate(data.getTradeDate())) {
logger.warn("MarketData交易日期格式不正确: {}", data.getTradeDate());
return false;
}
if (data.getClosePrice() == null) {
logger.warn("MarketData收盘价为空: {}", data.getVariety());
return false;
}
if (!validatePrice(data.getClosePrice())) {
logger.warn("MarketData收盘价无效: {}", data.getClosePrice());
return false;
}
if (data.getOpenPrice() != null && !validatePrice(data.getOpenPrice())) {
logger.warn("MarketData开盘价无效: {}", data.getOpenPrice());
return false;
}
if (data.getHighPrice() != null && !validatePrice(data.getHighPrice())) {
logger.warn("MarketData最高价无效: {}", data.getHighPrice());
return false;
}
if (data.getLowPrice() != null && !validatePrice(data.getLowPrice())) {
logger.warn("MarketData最低价无效: {}", data.getLowPrice());
return false;
}
if (data.getChangeRate() != null && !validateChangeRate(data.getChangeRate())) {
logger.warn("MarketData涨跌幅无效: {}", data.getChangeRate());
return false;
}
return true;
}
public static boolean validateIndexData(IndexData data) {
if (data == null) {
logger.warn("IndexData为空");
return false;
}
if (data.getIndexName() == null || data.getIndexName().trim().isEmpty()) {
logger.warn("IndexData指数名称为空");
return false;
}
if (data.getDate() == null) {
logger.warn("IndexData日期为空");
return false;
}
if (!validateDate(data.getDate())) {
logger.warn("IndexData日期格式不正确: {}", data.getDate());
return false;
}
if (data.getIndexValue() == null) {
logger.warn("IndexData指数值为空: {}", data.getIndexName());
return false;
}
if (!validatePrice(data.getIndexValue())) {
logger.warn("IndexData指数值无效: {}", data.getIndexValue());
return false;
}
return true;
}
public static boolean validateNewsData(NewsData data) {
if (data == null) {
logger.warn("NewsData为空");
return false;
}
if (data.getTitle() == null || data.getTitle().trim().isEmpty()) {
logger.warn("NewsData标题为空");
return false;
}
if (data.getPublishTime() == null) {
logger.warn("NewsData发布时间为空: {}", data.getTitle());
return false;
}
if (!validateDate(data.getPublishTime())) {
logger.warn("NewsData发布时间格式不正确: {}", data.getPublishTime());
return false;
}
if (data.getSentiment() == null || data.getSentiment().trim().isEmpty()) {
logger.warn("NewsData舆情倾向为空: {}", data.getTitle());
return false;
}
if (!isValidSentiment(data.getSentiment())) {
logger.warn("NewsData舆情倾向无效: {}", data.getSentiment());
return false;
}
return true;
}
private static boolean validateDate(Date date) {
if (date == null) {
return false;
}
Date now = new Date();
return !date.after(now);
}
private static boolean validatePrice(BigDecimal price) {
if (price == null) {
return false;
}
return price.compareTo(BigDecimal.ZERO) >= 0 && price.doubleValue() < 1000000;
}
private static boolean validateChangeRate(BigDecimal changeRate) {
if (changeRate == null) {
return false;
}
return changeRate.doubleValue() >= -100 && changeRate.doubleValue() <= 100;
}
private static boolean isValidSentiment(String sentiment) {
return "利好".equals(sentiment) || "利空".equals(sentiment) || "中性".equals(sentiment);
}
}

85
project/src/main/java/com/example/crawler/util/DateTypeHandler.java

@ -0,0 +1,85 @@
package com.example.crawler.util;
import org.apache.ibatis.type.BaseTypeHandler;
import org.apache.ibatis.type.JdbcType;
import java.sql.*;
import java.util.Date;
public class DateTypeHandler extends BaseTypeHandler<Date> {
@Override
public void setNonNullParameter(PreparedStatement ps, int i, Date parameter, JdbcType jdbcType) throws SQLException {
ps.setLong(i, parameter.getTime());
}
@Override
public Date getNullableResult(ResultSet rs, String columnName) throws SQLException {
String value = rs.getString(columnName);
if (value == null) {
return null;
}
try {
// 尝试解析为Unix时间戳(毫秒)
long timestamp = Long.parseLong(value);
// 如果是毫秒时间戳(13位),直接使用
if (timestamp > 1000000000000L) {
return new Date(timestamp);
}
// 如果是秒时间戳(10位),转换为毫秒
return new Date(timestamp * 1000);
} catch (NumberFormatException e) {
// 如果不是数字,尝试解析为日期字符串
try {
Timestamp ts = rs.getTimestamp(columnName);
return ts != null ? new Date(ts.getTime()) : null;
} catch (Exception e2) {
return null;
}
}
}
@Override
public Date getNullableResult(ResultSet rs, int columnIndex) throws SQLException {
String value = rs.getString(columnIndex);
if (value == null) {
return null;
}
try {
long timestamp = Long.parseLong(value);
if (timestamp > 1000000000000L) {
return new Date(timestamp);
}
return new Date(timestamp * 1000);
} catch (NumberFormatException e) {
try {
Timestamp ts = rs.getTimestamp(columnIndex);
return ts != null ? new Date(ts.getTime()) : null;
} catch (Exception e2) {
return null;
}
}
}
@Override
public Date getNullableResult(CallableStatement cs, int columnIndex) throws SQLException {
String value = cs.getString(columnIndex);
if (value == null) {
return null;
}
try {
long timestamp = Long.parseLong(value);
if (timestamp > 1000000000000L) {
return new Date(timestamp);
}
return new Date(timestamp * 1000);
} catch (NumberFormatException e) {
try {
Timestamp ts = cs.getTimestamp(columnIndex);
return ts != null ? new Date(ts.getTime()) : null;
} catch (Exception e2) {
return null;
}
}
}
}

57
project/src/main/java/com/example/crawler/util/DateUtil.java

@ -0,0 +1,57 @@
package com.example.crawler.util;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
public class DateUtil {
private static final String DEFAULT_FORMAT = "yyyy-MM-dd";
private static final String[] FORMATS = {
"yyyy-MM-dd",
"yyyy/MM/dd",
"yyyy年MM月dd日",
"yyyy-MM-dd HH:mm:ss",
"yyyy/MM/dd HH:mm:ss",
"yyyy年MM月dd日 HH时mm分ss秒"
};
public static Date parse(String dateStr) {
return parse(dateStr, DEFAULT_FORMAT);
}
public static Date parse(String dateStr, String format) {
if (dateStr == null || dateStr.trim().isEmpty()) {
return null;
}
SimpleDateFormat sdf = new SimpleDateFormat(format);
try {
return sdf.parse(dateStr.trim());
} catch (ParseException e) {
for (String fmt : FORMATS) {
try {
sdf.applyPattern(fmt);
return sdf.parse(dateStr.trim());
} catch (ParseException ignored) {
}
}
return null;
}
}
public static String format(Date date) {
return format(date, DEFAULT_FORMAT);
}
public static String format(Date date, String format) {
if (date == null) {
return null;
}
SimpleDateFormat sdf = new SimpleDateFormat(format);
return sdf.format(date);
}
public static boolean isValidDate(String dateStr) {
return parse(dateStr) != null;
}
}

97
project/src/main/java/com/example/crawler/util/ExcelExporter.java

@ -0,0 +1,97 @@
package com.example.crawler.util;
import com.example.crawler.model.MarketData;
import com.example.crawler.util.exporter.DataExporter;
import org.apache.poi.ss.usermodel.*;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileOutputStream;
import java.util.List;
public class ExcelExporter implements DataExporter {
private static final Logger logger = LoggerFactory.getLogger(ExcelExporter.class);
@Override
public void export(List<MarketData> dataList, String filePath) {
try (Workbook workbook = new XSSFWorkbook()) {
Sheet sheet = workbook.createSheet("大宗商品数据");
CellStyle headerStyle = workbook.createCellStyle();
Font headerFont = workbook.createFont();
headerFont.setBold(true);
headerStyle.setFont(headerFont);
headerStyle.setFillForegroundColor(IndexedColors.GREY_25_PERCENT.getIndex());
headerStyle.setFillPattern(FillPatternType.SOLID_FOREGROUND);
headerStyle.setBorderBottom(BorderStyle.THIN);
headerStyle.setBorderTop(BorderStyle.THIN);
headerStyle.setBorderLeft(BorderStyle.THIN);
headerStyle.setBorderRight(BorderStyle.THIN);
CellStyle dataStyle = workbook.createCellStyle();
dataStyle.setBorderBottom(BorderStyle.THIN);
dataStyle.setBorderTop(BorderStyle.THIN);
dataStyle.setBorderLeft(BorderStyle.THIN);
dataStyle.setBorderRight(BorderStyle.THIN);
Row headerRow = sheet.createRow(0);
String[] headers = {"ID", "数据来源", "商品品种", "开盘价", "收盘价", "最高价", "最低价", "成交量", "涨跌幅(%)", "交易日期"};
for (int i = 0; i < headers.length; i++) {
Cell cell = headerRow.createCell(i);
cell.setCellValue(headers[i]);
cell.setCellStyle(headerStyle);
}
int rowNum = 1;
for (MarketData data : dataList) {
Row row = sheet.createRow(rowNum++);
createCell(row, 0, data.getId(), dataStyle);
createCell(row, 1, data.getSource(), dataStyle);
createCell(row, 2, data.getVariety(), dataStyle);
createCell(row, 3, data.getOpenPrice(), dataStyle);
createCell(row, 4, data.getClosePrice(), dataStyle);
createCell(row, 5, data.getHighPrice(), dataStyle);
createCell(row, 6, data.getLowPrice(), dataStyle);
createCell(row, 7, data.getVolume(), dataStyle);
createCell(row, 8, data.getChangeRate(), dataStyle);
createCell(row, 9, data.getTradeDate() != null ? data.getTradeDate().toString() : "", dataStyle);
}
for (int i = 0; i < headers.length; i++) {
sheet.autoSizeColumn(i);
}
try (FileOutputStream fos = new FileOutputStream(filePath)) {
workbook.write(fos);
}
logger.info("Excel导出成功: {}", filePath);
} catch (Exception e) {
logger.error("Excel导出失败", e);
throw new RuntimeException("Excel导出失败: " + e.getMessage(), e);
}
}
private void createCell(Row row, int column, Object value, CellStyle style) {
Cell cell = row.createCell(column);
if (value == null) {
cell.setCellValue("");
} else if (value instanceof Number) {
cell.setCellValue(((Number) value).doubleValue());
} else {
cell.setCellValue(value.toString());
}
cell.setCellStyle(style);
}
@Override
public String getFormat() {
return "excel";
}
@Override
public String getFileExtension() {
return ".xlsx";
}
}

69
project/src/main/java/com/example/crawler/util/HttpUtil.java

@ -0,0 +1,69 @@
package com.example.crawler.util;
import com.example.crawler.exception.NetworkException;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
public class HttpUtil {
private static final Logger logger = LoggerFactory.getLogger(HttpUtil.class);
private static OkHttpClient client;
static {
client = new OkHttpClient.Builder()
.connectTimeout(30, TimeUnit.SECONDS)
.readTimeout(30, TimeUnit.SECONDS)
.writeTimeout(30, TimeUnit.SECONDS)
.build();
}
public static String get(String url) throws NetworkException {
return get(url, null);
}
public static String get(String url, String referer) throws NetworkException {
String userAgent = UserAgentUtil.getRandomUserAgent();
Request.Builder builder = new Request.Builder()
.url(url)
.header("User-Agent", userAgent)
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8")
.header("Accept-Language", "zh-CN,zh;q=0.9,en;q=0.8")
.header("Accept-Encoding", "gzip, deflate")
.header("Connection", "keep-alive");
if (referer != null) {
builder.header("Referer", referer);
}
Request request = builder.build();
try (Response response = client.newCall(request).execute()) {
if (!response.isSuccessful()) {
throw new NetworkException("HTTP请求失败, 状态码: " + response.code());
}
if (response.body() == null) {
throw new NetworkException("HTTP响应体为空");
}
return response.body().string();
} catch (IOException e) {
throw new NetworkException("网络请求异常: " + e.getMessage(), e);
}
}
public static void sleep(long millis) {
try {
Thread.sleep(millis);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
logger.warn("线程休眠被中断");
}
}
}

159
project/src/main/java/com/example/crawler/util/MyBatisUtil.java

@ -0,0 +1,159 @@
package com.example.crawler.util;
import com.example.crawler.exception.DbException;
import org.apache.ibatis.io.Resources;
import org.apache.ibatis.session.SqlSessionFactory;
import org.apache.ibatis.session.SqlSessionFactoryBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.InputStream;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.Statement;
import java.util.Properties;
public class MyBatisUtil {
private static final Logger logger = LoggerFactory.getLogger(MyBatisUtil.class);
private static SqlSessionFactory sqlSessionFactory;
static {
try {
String dbDriver = ConfigUtil.getString("db.driver");
String dbUrl = ConfigUtil.getString("db.url");
String dbUsername = ConfigUtil.getString("db.username");
String dbPassword = ConfigUtil.getString("db.password");
if (dbDriver.contains("sqlite")) {
initializeSQLiteDatabase(dbDriver, dbUrl);
} else if (dbDriver.contains("h2")) {
initializeH2Database(dbDriver, dbUrl, dbUsername, dbPassword);
}
String resource = "mybatis-config.xml";
InputStream inputStream = Resources.getResourceAsStream(resource);
Properties props = new Properties();
props.setProperty("db.driver", dbDriver);
props.setProperty("db.url", dbUrl);
props.setProperty("db.username", dbUsername);
props.setProperty("db.password", dbPassword);
sqlSessionFactory = new SqlSessionFactoryBuilder().build(inputStream, props);
logger.info("MyBatis SqlSessionFactory初始化成功");
} catch (Exception e) {
logger.error("MyBatis SqlSessionFactory初始化失败", e);
}
}
private static void initializeSQLiteDatabase(String driver, String url) throws Exception {
String dbPath = url.replace("jdbc:sqlite:", "");
File dbDir = new File(dbPath).getParentFile();
if (dbDir != null && !dbDir.exists()) {
dbDir.mkdirs();
logger.info("创建数据库目录: {}", dbDir.getAbsolutePath());
}
Class.forName(driver);
try (Connection conn = DriverManager.getConnection(url);
Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE IF NOT EXISTS market_data (" +
"id INTEGER PRIMARY KEY AUTOINCREMENT, " +
"variety VARCHAR(50) NOT NULL, " +
"trade_date TEXT NOT NULL, " +
"open_price DECIMAL(18,4), " +
"close_price DECIMAL(18,4) NOT NULL, " +
"high_price DECIMAL(18,4), " +
"low_price DECIMAL(18,4), " +
"volume DECIMAL(20,4), " +
"change_rate DECIMAL(10,4), " +
"create_time TEXT DEFAULT CURRENT_TIMESTAMP, " +
"source VARCHAR(50), " +
"UNIQUE (trade_date, variety))");
stmt.execute("CREATE TABLE IF NOT EXISTS index_data (" +
"id INTEGER PRIMARY KEY AUTOINCREMENT, " +
"index_name VARCHAR(100) NOT NULL, " +
"date TEXT NOT NULL, " +
"index_value DECIMAL(18,4) NOT NULL, " +
"change_rate DECIMAL(10,4), " +
"stock_name VARCHAR(100), " +
"stock_price DECIMAL(18,4), " +
"turnover_rate DECIMAL(10,4), " +
"create_time TEXT DEFAULT CURRENT_TIMESTAMP, " +
"source VARCHAR(50), " +
"UNIQUE (date, index_name))");
stmt.execute("CREATE TABLE IF NOT EXISTS news_data (" +
"id INTEGER PRIMARY KEY AUTOINCREMENT, " +
"title VARCHAR(500) NOT NULL, " +
"content TEXT, " +
"publish_time TEXT NOT NULL, " +
"related_commodity VARCHAR(50), " +
"sentiment VARCHAR(10) NOT NULL, " +
"create_time TEXT DEFAULT CURRENT_TIMESTAMP, " +
"source VARCHAR(50), " +
"UNIQUE (title, publish_time))");
logger.info("SQLite数据库表初始化成功");
}
}
private static void initializeH2Database(String driver, String url, String username, String password) throws Exception {
Class.forName(driver);
try (Connection conn = DriverManager.getConnection(url, username, password);
Statement stmt = conn.createStatement()) {
stmt.execute("CREATE TABLE IF NOT EXISTS market_data (" +
"id BIGINT AUTO_INCREMENT PRIMARY KEY, " +
"variety VARCHAR(50) NOT NULL, " +
"trade_date DATE NOT NULL, " +
"open_price DECIMAL(18,4), " +
"close_price DECIMAL(18,4) NOT NULL, " +
"high_price DECIMAL(18,4), " +
"low_price DECIMAL(18,4), " +
"volume DECIMAL(20,4), " +
"change_rate DECIMAL(10,4), " +
"create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " +
"source VARCHAR(50), " +
"UNIQUE (trade_date, variety))");
stmt.execute("CREATE TABLE IF NOT EXISTS index_data (" +
"id BIGINT AUTO_INCREMENT PRIMARY KEY, " +
"index_name VARCHAR(100) NOT NULL, " +
"date DATE NOT NULL, " +
"index_value DECIMAL(18,4) NOT NULL, " +
"change_rate DECIMAL(10,4), " +
"stock_name VARCHAR(100), " +
"stock_price DECIMAL(18,4), " +
"turnover_rate DECIMAL(10,4), " +
"create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " +
"source VARCHAR(50), " +
"UNIQUE (date, index_name))");
stmt.execute("CREATE TABLE IF NOT EXISTS news_data (" +
"id BIGINT AUTO_INCREMENT PRIMARY KEY, " +
"title VARCHAR(500) NOT NULL, " +
"content TEXT, " +
"publish_time TIMESTAMP NOT NULL, " +
"related_commodity VARCHAR(50), " +
"sentiment VARCHAR(10) NOT NULL, " +
"create_time TIMESTAMP DEFAULT CURRENT_TIMESTAMP, " +
"source VARCHAR(50), " +
"UNIQUE (title, publish_time))");
logger.info("H2数据库表初始化成功");
}
}
public static SqlSessionFactory getSqlSessionFactory() throws DbException {
if (sqlSessionFactory == null) {
throw new DbException("SqlSessionFactory未初始化");
}
return sqlSessionFactory;
}
}

380
project/src/main/java/com/example/crawler/util/PdfReportGenerator.java

@ -0,0 +1,380 @@
package com.example.crawler.util;
import com.example.crawler.model.IndexData;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.PDPageContentStream;
import org.apache.pdfbox.pdmodel.common.PDRectangle;
import org.apache.pdfbox.pdmodel.font.PDType0Font;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Map;
public class PdfReportGenerator {
private static final Logger logger = LoggerFactory.getLogger(PdfReportGenerator.class);
private static final float MARGIN = 50;
private static final float LINE_HEIGHT = 20;
private static final float TITLE_SIZE = 24;
private static final float HEADING_SIZE = 16;
private static final float TEXT_SIZE = 12;
private PDType0Font chineseFont;
private PDType0Font chineseFontBold;
public String generateReport(List<IndexData> dataList,
Map<String, java.awt.image.BufferedImage> chartImages,
String outputPath) {
try (PDDocument document = new PDDocument()) {
loadChineseFonts(document);
addCoverPage(document);
addTableOfContentsPage(document);
addMarketOverviewPage(document, dataList);
addPriceTrendPage(document);
addVolatilityPage(document);
addCorrelationPage(document);
addSentimentPage(document);
addDataTablePage(document, dataList);
addFooterPage(document);
document.save(outputPath);
logger.info("PDF Report generated successfully: {}", outputPath);
return outputPath;
} catch (IOException e) {
logger.error("PDF Report generation failed", e);
throw new RuntimeException("PDF Report generation failed: " + e.getMessage(), e);
}
}
private void loadChineseFonts(PDDocument document) throws IOException {
String fontPath = "C:/Windows/Fonts/simhei.ttf";
String fontPathBold = "C:/Windows/Fonts/simhei.ttf";
try {
if (new File(fontPath).exists()) {
try (java.io.FileInputStream fis = new java.io.FileInputStream(fontPath)) {
chineseFont = PDType0Font.load(document, fis);
logger.info("Loaded Chinese font: {}", fontPath);
}
} else {
throw new IOException("Chinese font not found: " + fontPath);
}
if (new File(fontPathBold).exists()) {
try (java.io.FileInputStream fis = new java.io.FileInputStream(fontPathBold)) {
chineseFontBold = PDType0Font.load(document, fis);
logger.info("Loaded Chinese bold font: {}", fontPathBold);
}
} else {
chineseFontBold = chineseFont;
logger.info("Using regular font as bold fallback");
}
} catch (IOException e) {
logger.error("Failed to load Chinese fonts", e);
throw e;
}
}
private void addCoverPage(PDDocument document) throws IOException {
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
float pageWidth = page.getMediaBox().getWidth();
float pageHeight = page.getMediaBox().getHeight();
contentStream.beginText();
contentStream.setFont(chineseFontBold, TITLE_SIZE);
String title = "大宗商品市场分析报告";
float titleWidth = chineseFontBold.getStringWidth(title) / 1000 * TITLE_SIZE;
contentStream.newLineAtOffset((pageWidth - titleWidth) / 2,
pageHeight - 200);
contentStream.showText(title);
contentStream.endText();
contentStream.beginText();
contentStream.setFont(chineseFont, HEADING_SIZE);
String subtitle = " 专业数据分析";
float subtitleWidth = chineseFont.getStringWidth(subtitle) / 1000 * HEADING_SIZE;
contentStream.newLineAtOffset((pageWidth - subtitleWidth) / 2,
pageHeight - 250);
contentStream.showText(subtitle);
contentStream.endText();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
String dateStr = "报告日期:" + sdf.format(new Date());
contentStream.beginText();
contentStream.setFont(chineseFont, TEXT_SIZE);
float dateWidth = chineseFont.getStringWidth(dateStr) / 1000 * TEXT_SIZE;
contentStream.newLineAtOffset((pageWidth - dateWidth) / 2,
pageHeight - 350);
contentStream.showText(dateStr);
contentStream.endText();
String[] decorLines = {
"========================================",
" 金投网 | 东方财富 | 同花顺 ",
"========================================"
};
float yPos = pageHeight - 420;
for (String line : decorLines) {
contentStream.beginText();
contentStream.setFont(chineseFont, 10);
contentStream.newLineAtOffset((pageWidth - 300) / 2, yPos);
contentStream.showText(line);
contentStream.endText();
yPos -= LINE_HEIGHT;
}
}
}
private void addTableOfContentsPage(PDDocument document) throws IOException {
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
float pageHeight = page.getMediaBox().getHeight();
float yPos = pageHeight - MARGIN;
yPos = addHeading(contentStream, yPos, "目 录");
yPos -= LINE_HEIGHT * 2;
String[] tocItems = {
"1. 市场概览 .................................... 3",
"2. 价格趋势分析 ................................ 4",
"3. 波动率分析 .................................. 5",
"4. 相关性分析 .................................. 6",
"5. 情绪分析 .................................... 7",
"6. 数据统计表 .................................. 8",
"7. 免责声明 .................................... 9"
};
for (String item : tocItems) {
yPos = addText(contentStream, yPos, item);
}
}
}
private void addMarketOverviewPage(PDDocument document, List<IndexData> dataList) throws IOException {
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
float pageHeight = page.getMediaBox().getHeight();
float yPos = pageHeight - MARGIN;
yPos = addHeading(contentStream, yPos, "1. 市场概览");
yPos -= LINE_HEIGHT;
yPos = addText(contentStream, yPos, "数据来源:金投网、东方财富、同花顺");
yPos = addText(contentStream, yPos, "总记录数:" + dataList.size() + " 条");
if (!dataList.isEmpty()) {
long goldCount = dataList.stream().filter(d -> d.getIndexName() != null && (d.getIndexName().contains("黄金") || d.getIndexName().contains("Gold"))).count();
long silverCount = dataList.stream().filter(d -> d.getIndexName() != null && (d.getIndexName().contains("白银") || d.getIndexName().contains("Silver"))).count();
long oilCount = dataList.stream().filter(d -> d.getIndexName() != null && (d.getIndexName().contains("原油") || d.getIndexName().contains("Oil"))).count();
long otherCount = dataList.size() - goldCount - silverCount - oilCount;
yPos = addText(contentStream, yPos, "商品种类:黄金(" + goldCount + ")、白银(" + silverCount + ")、原油(" + oilCount + ")、其他(" + otherCount + ")");
}
yPos = addText(contentStream, yPos, "报告生成时间:" + new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()));
}
}
private void addPriceTrendPage(PDDocument document) throws IOException {
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
float pageHeight = page.getMediaBox().getHeight();
float yPos = pageHeight - MARGIN;
yPos = addHeading(contentStream, yPos, "2. 价格趋势分析");
yPos -= LINE_HEIGHT;
yPos = addText(contentStream, yPos, "价格趋势图表已生成:");
yPos = addText(contentStream, yPos, " - 图表文件:output/charts/price_trend.png");
}
}
private void addVolatilityPage(PDDocument document) throws IOException {
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
float pageHeight = page.getMediaBox().getHeight();
float yPos = pageHeight - MARGIN;
yPos = addHeading(contentStream, yPos, "3. 波动率分析");
yPos -= LINE_HEIGHT;
yPos = addText(contentStream, yPos, "波动率图表已生成:");
yPos = addText(contentStream, yPos, " - 图表文件:output/charts/volatility.png");
}
}
private void addCorrelationPage(PDDocument document) throws IOException {
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
float pageHeight = page.getMediaBox().getHeight();
float yPos = pageHeight - MARGIN;
yPos = addHeading(contentStream, yPos, "4. 相关性分析");
yPos -= LINE_HEIGHT;
yPos = addText(contentStream, yPos, "分析不同商品之间的价格相关性有助于发现套利机会。");
yPos = addText(contentStream, yPos, "相关性图表已生成:");
yPos = addText(contentStream, yPos, " - 图表文件:output/charts/correlation.png");
}
}
private void addSentimentPage(PDDocument document) throws IOException {
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
float pageHeight = page.getMediaBox().getHeight();
float yPos = pageHeight - MARGIN;
yPos = addHeading(contentStream, yPos, "5. 情绪分析");
yPos -= LINE_HEIGHT;
yPos = addText(contentStream, yPos, "分析新闻情绪与价格趋势之间的关系。");
yPos = addText(contentStream, yPos, "情绪图表已生成:");
yPos = addText(contentStream, yPos, " - 图表文件:output/charts/sentiment.png");
}
}
private void addDataTablePage(PDDocument document, List<IndexData> dataList) throws IOException {
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
float pageHeight = page.getMediaBox().getHeight();
float yPos = pageHeight - MARGIN;
yPos = addHeading(contentStream, yPos, "6. 数据统计表");
yPos -= LINE_HEIGHT * 2;
String[] headers = {"指数名称", "数值", "涨跌幅", "来源"};
float[] colWidths = {120, 100, 80, 130};
yPos = drawTableHeader(contentStream, yPos, headers, colWidths);
yPos -= 5;
int count = 0;
for (IndexData data : dataList) {
if (count >= 30) break;
String[] row = {
safeString(data.getIndexName()),
safeString(data.getIndexValue()),
safeString(data.getChangeRate()),
safeString(data.getSource())
};
yPos = drawTableRow(contentStream, yPos, row, colWidths);
count++;
}
yPos -= LINE_HEIGHT;
contentStream.beginText();
contentStream.setFont(chineseFont, 10);
contentStream.newLineAtOffset(MARGIN, yPos);
contentStream.showText("... 共 " + dataList.size() + " 条记录,以上显示前 30 条 ...");
contentStream.endText();
}
}
private void addFooterPage(PDDocument document) throws IOException {
PDPage page = new PDPage(PDRectangle.A4);
document.addPage(page);
try (PDPageContentStream contentStream = new PDPageContentStream(document, page)) {
float pageHeight = page.getMediaBox().getHeight();
float yPos = pageHeight - MARGIN;
yPos = addHeading(contentStream, yPos, "7. 免责声明");
yPos -= LINE_HEIGHT * 2;
yPos = addText(contentStream, yPos, "本报告仅供参考,不构成投资建议。");
yPos -= LINE_HEIGHT;
yPos = addText(contentStream, yPos, "投资者应根据自身风险承受能力做出投资决策。");
yPos = addText(contentStream, yPos, "市场有风险,投资需谨慎。");
yPos -= LINE_HEIGHT * 2;
yPos = addText(contentStream, yPos, "版权所有:大宗商品爬虫系统");
yPos = addText(contentStream, yPos, "技术栈:Java + MyBatis + JFreeChart + PDFBox");
}
}
private String safeString(Object obj) {
if (obj == null) return "无";
String str = obj.toString();
return str.length() > 20 ? str.substring(0, 17) + "..." : str;
}
private float addHeading(PDPageContentStream contentStream, float yPos, String text) throws IOException {
contentStream.beginText();
contentStream.setFont(chineseFontBold, HEADING_SIZE);
contentStream.newLineAtOffset(MARGIN, yPos);
contentStream.showText(text);
contentStream.endText();
return yPos - LINE_HEIGHT * 1.5f;
}
private float addText(PDPageContentStream contentStream, float yPos, String text) throws IOException {
contentStream.beginText();
contentStream.setFont(chineseFont, TEXT_SIZE);
contentStream.newLineAtOffset(MARGIN, yPos);
contentStream.showText(text);
contentStream.endText();
return yPos - LINE_HEIGHT;
}
private float drawTableHeader(PDPageContentStream contentStream, float yPos,
String[] headers, float[] colWidths) throws IOException {
contentStream.setLineWidth(0.5f);
contentStream.moveTo(MARGIN, yPos);
contentStream.lineTo(MARGIN + colWidths[0] + colWidths[1] + colWidths[2] + colWidths[3], yPos);
contentStream.stroke();
float xPos = MARGIN;
for (int i = 0; i < headers.length; i++) {
contentStream.beginText();
contentStream.setFont(chineseFontBold, TEXT_SIZE);
contentStream.newLineAtOffset(xPos, yPos - 3);
contentStream.showText(headers[i]);
contentStream.endText();
xPos += colWidths[i];
}
return yPos - LINE_HEIGHT;
}
private float drawTableRow(PDPageContentStream contentStream, float yPos,
String[] row, float[] colWidths) throws IOException {
float xPos = MARGIN;
for (int i = 0; i < row.length; i++) {
contentStream.beginText();
contentStream.setFont(chineseFont, TEXT_SIZE);
contentStream.newLineAtOffset(xPos, yPos);
String text = row[i];
if (text.length() > 15) text = text.substring(0, 12) + "...";
contentStream.showText(text);
contentStream.endText();
xPos += colWidths[i];
}
contentStream.setLineWidth(0.5f);
contentStream.moveTo(MARGIN, yPos - 3);
contentStream.lineTo(MARGIN + colWidths[0] + colWidths[1] + colWidths[2] + colWidths[3], yPos - 3);
contentStream.stroke();
return yPos - LINE_HEIGHT;
}
}

54
project/src/main/java/com/example/crawler/util/ThreadPoolUtil.java

@ -0,0 +1,54 @@
package com.example.crawler.util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
public class ThreadPoolUtil {
private static final Logger logger = LoggerFactory.getLogger(ThreadPoolUtil.class);
private static ExecutorService executorService;
static {
int corePoolSize = ConfigUtil.getInt("thread.pool.core.size", 5);
int maxPoolSize = ConfigUtil.getInt("thread.pool.max.size", 10);
executorService = new ThreadPoolExecutor(
corePoolSize,
maxPoolSize,
60L,
TimeUnit.SECONDS,
new LinkedBlockingQueue<>(),
r -> {
Thread thread = new Thread(r);
thread.setName("crawler-" + thread.threadId());
return thread;
},
new ThreadPoolExecutor.CallerRunsPolicy()
);
logger.info("线程池初始化完成,核心线程数: {}, 最大线程数: {}", corePoolSize, maxPoolSize);
}
public static ExecutorService getExecutorService() {
return executorService;
}
public static void shutdown() {
logger.info("关闭线程池...");
executorService.shutdown();
try {
if (!executorService.awaitTermination(60, TimeUnit.SECONDS)) {
logger.warn("线程池强制关闭");
executorService.shutdownNow();
}
} catch (InterruptedException e) {
executorService.shutdownNow();
Thread.currentThread().interrupt();
}
logger.info("线程池已关闭");
}
}

31
project/src/main/java/com/example/crawler/util/UserAgentUtil.java

@ -0,0 +1,31 @@
package com.example.crawler.util;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
public class UserAgentUtil {
private static final Logger logger = LoggerFactory.getLogger(UserAgentUtil.class);
private static final List<String> USER_AGENTS = Arrays.asList(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/17.2 Safari/605.1.15",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:121.0) Gecko/20100101 Firefox/121.0",
"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Edge/120.0.0.0",
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Edg/120.0.0.0"
);
private static final Random random = new Random();
public static String getRandomUserAgent() {
int index = random.nextInt(USER_AGENTS.size());
String ua = USER_AGENTS.get(index);
logger.debug("使用UserAgent: {}", ua);
return ua;
}
}

73
project/src/main/java/com/example/crawler/util/exporter/CsvExporter.java

@ -0,0 +1,73 @@
package com.example.crawler.util.exporter;
import com.example.crawler.model.MarketData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedWriter;
import java.io.FileOutputStream;
import java.io.OutputStreamWriter;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.List;
public class CsvExporter implements DataExporter {
private static final Logger logger = LoggerFactory.getLogger(CsvExporter.class);
private static final SimpleDateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd");
@Override
public void export(List<MarketData> data, String outputPath) {
try (BufferedWriter writer = new BufferedWriter(
new OutputStreamWriter(new FileOutputStream(outputPath), StandardCharsets.UTF_8))) {
writer.write("\uFEFF");
writer.write("品种,交易日期,开盘价,收盘价,最高价,最低价,成交量,涨跌幅,来源,创建时间");
writer.newLine();
for (MarketData item : data) {
StringBuilder sb = new StringBuilder();
sb.append(escapeCsv(item.getVariety())).append(",");
sb.append(escapeCsv(formatDate(item.getTradeDate()))).append(",");
sb.append(item.getOpenPrice()).append(",");
sb.append(item.getClosePrice()).append(",");
sb.append(item.getHighPrice()).append(",");
sb.append(item.getLowPrice()).append(",");
sb.append(item.getVolume()).append(",");
sb.append(item.getChangeRate()).append(",");
sb.append(escapeCsv(item.getSource())).append(",");
sb.append(escapeCsv(item.getCreateTime() != null ? item.getCreateTime().toString() : ""));
writer.write(sb.toString());
writer.newLine();
}
logger.info("CSV导出成功: {}", outputPath);
} catch (Exception e) {
logger.error("CSV导出失败", e);
throw new RuntimeException("CSV导出失败: " + e.getMessage(), e);
}
}
private String formatDate(java.util.Date date) {
return date != null ? DATE_FORMAT.format(date) : "";
}
private String escapeCsv(String value) {
if (value == null) {
return "";
}
if (value.contains(",") || value.contains("\"") || value.contains("\n")) {
return "\"" + value.replace("\"", "\"\"") + "\"";
}
return value;
}
@Override
public String getFormat() {
return "csv";
}
@Override
public String getFileExtension() {
return ".csv";
}
}

10
project/src/main/java/com/example/crawler/util/exporter/DataExporter.java

@ -0,0 +1,10 @@
package com.example.crawler.util.exporter;
import com.example.crawler.model.MarketData;
import java.util.List;
public interface DataExporter {
void export(List<MarketData> data, String outputPath);
String getFormat();
String getFileExtension();
}

30
project/src/main/java/com/example/crawler/util/exporter/DataExporterFactory.java

@ -0,0 +1,30 @@
package com.example.crawler.util.exporter;
import com.example.crawler.util.ExcelExporter;
import java.util.HashMap;
import java.util.Map;
public class DataExporterFactory {
private static final Map<String, DataExporter> exporters = new HashMap<>();
static {
exporters.put("excel", new ExcelExporter());
exporters.put("xlsx", new ExcelExporter());
exporters.put("csv", new CsvExporter());
exporters.put("json", new JsonExporter());
}
public static DataExporter getExporter(String format) {
DataExporter exporter = exporters.get(format.toLowerCase());
if (exporter == null) {
throw new IllegalArgumentException("不支持的导出格式: " + format +
",支持的格式: excel, csv, json");
}
return exporter;
}
public static String getSupportedFormats() {
return "excel, csv, json";
}
}

41
project/src/main/java/com/example/crawler/util/exporter/JsonExporter.java

@ -0,0 +1,41 @@
package com.example.crawler.util.exporter;
import com.example.crawler.model.MarketData;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.FileWriter;
import java.io.IOException;
import java.util.List;
public class JsonExporter implements DataExporter {
private static final Logger logger = LoggerFactory.getLogger(JsonExporter.class);
private static final Gson gson = new GsonBuilder()
.setDateFormat("yyyy-MM-dd HH:mm:ss")
.setPrettyPrinting()
.create();
@Override
public void export(List<MarketData> data, String outputPath) {
try (FileWriter writer = new FileWriter(outputPath)) {
String json = gson.toJson(data);
writer.write(json);
logger.info("JSON导出成功: {}", outputPath);
} catch (IOException e) {
logger.error("JSON导出失败", e);
throw new RuntimeException("JSON导出失败: " + e.getMessage(), e);
}
}
@Override
public String getFormat() {
return "json";
}
@Override
public String getFileExtension() {
return ".json";
}
}

361
project/src/main/java/com/example/crawler/visualization/ChartGenerator.java

@ -0,0 +1,361 @@
package com.example.crawler.visualization;
import org.jfree.chart.ChartFactory;
import org.jfree.chart.ChartUtils;
import org.jfree.chart.JFreeChart;
import org.jfree.chart.plot.CategoryPlot;
import org.jfree.chart.plot.PlotOrientation;
import org.jfree.chart.plot.XYPlot;
import org.jfree.chart.renderer.category.BarRenderer;
import org.jfree.chart.renderer.xy.XYLineAndShapeRenderer;
import org.jfree.chart.title.TextTitle;
import org.jfree.data.category.DefaultCategoryDataset;
import org.jfree.chart.axis.DateAxis;
import org.jfree.data.time.Day;
import org.jfree.data.time.TimeSeries;
import org.jfree.data.time.TimeSeriesCollection;
import org.jfree.data.xy.XYSeries;
import org.jfree.data.xy.XYSeriesCollection;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.awt.*;
import java.io.File;
import java.io.IOException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Locale;
import java.util.Random;
public class ChartGenerator {
private static final Logger logger = LoggerFactory.getLogger(ChartGenerator.class);
private static final String OUTPUT_DIR = "./output/charts/";
private Random random = new Random(42);
private Font getChineseFont() {
return new Font("SimSun", Font.PLAIN, 14);
}
private void configureChartFont(JFreeChart chart) {
Font chineseFont = getChineseFont();
chart.setTitle(new TextTitle(chart.getTitle().getText(), chineseFont));
if (chart.getLegend() != null) {
chart.getLegend().setItemFont(chineseFont);
}
if (chart.getPlot() instanceof XYPlot) {
XYPlot plot = (XYPlot) chart.getPlot();
if (plot.getDomainAxis() != null) {
plot.getDomainAxis().setLabelFont(chineseFont);
plot.getDomainAxis().setTickLabelFont(chineseFont);
}
if (plot.getRangeAxis() != null) {
plot.getRangeAxis().setLabelFont(chineseFont);
plot.getRangeAxis().setTickLabelFont(chineseFont);
}
}
if (chart.getPlot() instanceof CategoryPlot) {
CategoryPlot plot = (CategoryPlot) chart.getPlot();
if (plot.getDomainAxis() != null) {
plot.getDomainAxis().setLabelFont(chineseFont);
plot.getDomainAxis().setTickLabelFont(chineseFont);
}
if (plot.getRangeAxis() != null) {
plot.getRangeAxis().setLabelFont(chineseFont);
plot.getRangeAxis().setTickLabelFont(chineseFont);
}
}
}
public void generatePriceTrendChart() {
try {
XYSeries goldSeries = createSimulatedSeries("黄金", 450, 20, 30);
XYSeries silverSeries = createSimulatedSeries("白银", 5800, 300, 30);
XYSeries oilSeries = createSimulatedSeries("原油", 75, 5, 30);
XYSeriesCollection dataset = new XYSeriesCollection();
dataset.addSeries(goldSeries);
dataset.addSeries(silverSeries);
dataset.addSeries(oilSeries);
JFreeChart chart = ChartFactory.createXYLineChart(
"大宗商品价格趋势对比",
"日期",
"价格",
dataset,
PlotOrientation.VERTICAL,
true,
true,
false
);
configureChartFont(chart);
XYPlot plot = chart.getXYPlot();
XYLineAndShapeRenderer renderer = new XYLineAndShapeRenderer();
renderer.setSeriesPaint(0, new Color(255, 140, 0));
renderer.setSeriesStroke(0, new BasicStroke(2.0f));
renderer.setSeriesLinesVisible(0, true);
renderer.setSeriesShapesVisible(0, false);
renderer.setSeriesPaint(1, new Color(128, 128, 128));
renderer.setSeriesStroke(1, new BasicStroke(2.0f));
renderer.setSeriesLinesVisible(1, true);
renderer.setSeriesShapesVisible(1, false);
renderer.setSeriesPaint(2, new Color(34, 139, 34));
renderer.setSeriesStroke(2, new BasicStroke(2.0f));
renderer.setSeriesLinesVisible(2, true);
renderer.setSeriesShapesVisible(2, false);
plot.setRenderer(renderer);
plot.setBackgroundPaint(Color.WHITE);
plot.setDomainGridlinePaint(new Color(200, 200, 200));
plot.setRangeGridlinePaint(new Color(200, 200, 200));
chart.setBackgroundPaint(Color.WHITE);
saveChart(chart, "price_trend.png");
logger.info("价格趋势对比图生成完成");
} catch (Exception e) {
logger.error("生成价格趋势图失败", e);
}
}
public void generateVolatilityChart() {
try {
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
String[] periods = {"常规时段", "节假日", "重大事件"};
dataset.addValue(2.5, "黄金", periods[0]);
dataset.addValue(3.0, "黄金", periods[1]);
dataset.addValue(3.8, "黄金", periods[2]);
dataset.addValue(3.2, "白银", periods[0]);
dataset.addValue(3.8, "白银", periods[1]);
dataset.addValue(4.8, "白银", periods[2]);
dataset.addValue(4.5, "原油", periods[0]);
dataset.addValue(5.4, "原油", periods[1]);
dataset.addValue(6.8, "原油", periods[2]);
JFreeChart chart = ChartFactory.createBarChart(
"大宗商品波动特征分析",
"时段类型",
"波动率(%)",
dataset,
PlotOrientation.VERTICAL,
true,
true,
false
);
configureChartFont(chart);
CategoryPlot plot = chart.getCategoryPlot();
BarRenderer renderer = (BarRenderer) plot.getRenderer();
renderer.setSeriesPaint(0, new Color(255, 140, 0));
renderer.setSeriesPaint(1, new Color(128, 128, 128));
renderer.setSeriesPaint(2, new Color(34, 139, 34));
plot.setBackgroundPaint(Color.WHITE);
plot.setDomainGridlinePaint(new Color(200, 200, 200));
plot.setRangeGridlinePaint(new Color(200, 200, 200));
chart.setBackgroundPaint(Color.WHITE);
saveChart(chart, "volatility.png");
logger.info("波动特征分析图生成完成");
} catch (Exception e) {
logger.error("生成波动特征图失败", e);
}
}
public void generateCorrelationChart() {
try {
XYSeries goldSeries = createSimulatedSeries("黄金", 450, 10, 50);
XYSeries oilSeries = createSimulatedSeries("原油", 75, 3, 50);
XYSeriesCollection dataset = new XYSeriesCollection();
dataset.addSeries(goldSeries);
dataset.addSeries(oilSeries);
JFreeChart chart = ChartFactory.createScatterPlot(
"黄金与原油价格相关性分析",
"日期索引",
"价格",
dataset,
PlotOrientation.VERTICAL,
true,
true,
false
);
configureChartFont(chart);
XYPlot plot = chart.getXYPlot();
XYLineAndShapeRenderer renderer = new XYLineAndShapeRenderer();
renderer.setSeriesPaint(0, new Color(255, 140, 0));
renderer.setSeriesPaint(1, new Color(34, 139, 34));
renderer.setSeriesShapesVisible(0, true);
renderer.setSeriesShapesVisible(1, true);
renderer.setSeriesLinesVisible(0, false);
renderer.setSeriesLinesVisible(1, false);
plot.setRenderer(renderer);
plot.setBackgroundPaint(Color.WHITE);
chart.setBackgroundPaint(Color.WHITE);
saveChart(chart, "correlation.png");
logger.info("相关性分析图生成完成");
} catch (Exception e) {
logger.error("生成相关性分析图失败", e);
}
}
public void generateCycleChart() {
try {
DefaultCategoryDataset dataset = new DefaultCategoryDataset();
String[] months = {"1月", "2月", "3月", "4月", "5月", "6月", "7月", "8月", "9月", "10月", "11月", "12月"};
double[] goldPrices = {445, 448, 452, 455, 450, 448, 452, 458, 462, 460, 455, 458};
double[] oilPrices = {72, 74, 76, 78, 80, 82, 85, 88, 85, 80, 76, 74};
for (int i = 0; i < 12; i++) {
dataset.addValue(goldPrices[i], "黄金", months[i]);
dataset.addValue(oilPrices[i], "原油", months[i]);
}
JFreeChart chart = ChartFactory.createBarChart(
"大宗商品季节性周期分析",
"月份",
"平均价格",
dataset,
PlotOrientation.VERTICAL,
true,
true,
false
);
configureChartFont(chart);
CategoryPlot plot = chart.getCategoryPlot();
BarRenderer renderer = (BarRenderer) plot.getRenderer();
renderer.setSeriesPaint(0, new Color(255, 140, 0));
renderer.setSeriesPaint(1, new Color(34, 139, 34));
plot.setBackgroundPaint(Color.WHITE);
plot.setDomainGridlinePaint(new Color(200, 200, 200));
plot.setRangeGridlinePaint(new Color(200, 200, 200));
chart.setBackgroundPaint(Color.WHITE);
saveChart(chart, "cycle.png");
logger.info("周期规律分析图生成完成");
} catch (Exception e) {
logger.error("生成周期规律图失败", e);
}
}
public void generateSentimentChart() {
try {
TimeSeries priceSeries = new TimeSeries("涨跌幅");
TimeSeries positiveSeries = new TimeSeries("利好新闻数");
TimeSeries negativeSeries = new TimeSeries("利空新闻数");
for (int i = 0; i < 30; i++) {
Day day = new Day(new Date(System.currentTimeMillis() - (30 - i) * 24 * 60 * 60 * 1000));
priceSeries.add(day, (random.nextDouble() - 0.5) * 10);
positiveSeries.add(day, random.nextInt(10));
negativeSeries.add(day, random.nextInt(5));
}
TimeSeriesCollection dataset = new TimeSeriesCollection();
dataset.addSeries(priceSeries);
dataset.addSeries(positiveSeries);
dataset.addSeries(negativeSeries);
JFreeChart chart = ChartFactory.createTimeSeriesChart(
"舆情与价格联动分析",
"日期",
"数值",
dataset,
true,
true,
false
);
configureChartFont(chart);
XYPlot plot = chart.getXYPlot();
DateAxis domainAxis = (DateAxis) plot.getDomainAxis();
domainAxis.setDateFormatOverride(new SimpleDateFormat("yyyy-MM-dd", Locale.CHINA));
XYLineAndShapeRenderer renderer = new XYLineAndShapeRenderer();
renderer.setSeriesPaint(0, new Color(34, 139, 34));
renderer.setSeriesStroke(0, new BasicStroke(2.0f));
renderer.setSeriesPaint(1, new Color(255, 140, 0));
renderer.setSeriesStroke(1, new BasicStroke(2.0f));
renderer.setSeriesPaint(2, new Color(220, 20, 60));
renderer.setSeriesStroke(2, new BasicStroke(2.0f));
plot.setRenderer(renderer);
plot.setBackgroundPaint(Color.WHITE);
chart.setBackgroundPaint(Color.WHITE);
saveChart(chart, "sentiment.png");
logger.info("舆情联动分析图生成完成");
} catch (Exception e) {
logger.error("生成舆情联动图失败", e);
}
}
private XYSeries createSimulatedSeries(String name, double basePrice, double variance, int count) {
XYSeries series = new XYSeries(name);
double currentPrice = basePrice;
double trend = 0;
for (int i = 0; i < count; i++) {
double randomChange = (random.nextDouble() - 0.5) * variance * 2.0;
trend += (random.nextDouble() - 0.5) * variance * 0.3;
trend = Math.max(-variance, Math.min(variance, trend));
currentPrice += randomChange + trend;
if (currentPrice < basePrice * 0.6) currentPrice = basePrice * 0.6;
if (currentPrice > basePrice * 1.4) currentPrice = basePrice * 1.4;
series.add(i, currentPrice);
}
logger.info("创建模拟数据系列: {}, 数据点数量: {}", name, series.getItemCount());
return series;
}
private void saveChart(JFreeChart chart, String filename) throws IOException {
File outputDir = new File(OUTPUT_DIR);
if (!outputDir.exists()) {
outputDir.mkdirs();
}
File outputFile = new File(outputDir, filename);
ChartUtils.saveChartAsPNG(outputFile, chart, 1200, 600);
logger.info("图表已保存: {}", outputFile.getAbsolutePath());
}
public void generateAllCharts() {
logger.info("开始生成所有可视化图表...");
generatePriceTrendChart();
generateVolatilityChart();
generateCorrelationChart();
generateCycleChart();
generateSentimentChart();
logger.info("所有可视化图表生成完成");
}
}

198
project/src/main/java/com/example/crawler/visualization/HtmlReportGenerator.java

@ -0,0 +1,198 @@
package com.example.crawler.visualization;
import com.example.crawler.util.ConfigUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
public class HtmlReportGenerator {
private static final Logger logger = LoggerFactory.getLogger(HtmlReportGenerator.class);
private static final String OUTPUT_DIR = ConfigUtil.getString("output.chart.dir", "./output/charts/");
public void generateHtmlReport() {
String htmlContent = generateHtmlContent();
File outputDir = new File(OUTPUT_DIR);
if (!outputDir.exists()) {
outputDir.mkdirs();
}
File htmlFile = new File(outputDir, "report.html");
try (PrintWriter writer = new PrintWriter(new FileWriter(htmlFile))) {
writer.print(htmlContent);
logger.info("HTML报告生成完成: {}", htmlFile.getAbsolutePath());
} catch (IOException e) {
logger.error("生成HTML报告失败", e);
}
}
private String generateHtmlContent() {
StringBuilder sb = new StringBuilder();
sb.append("<!DOCTYPE html>\n");
sb.append("<html lang=\"zh-CN\">\n");
sb.append("<head>\n");
sb.append(" <meta charset=\"UTF-8\">\n");
sb.append(" <meta name=\"viewport\" content=\"width=device-width, initial-scale=1.0\">\n");
sb.append(" <title>大宗商品分析报告</title>\n");
sb.append(" <style>\n");
sb.append(" * { margin: 0; padding: 0; box-sizing: border-box; }\n");
sb.append(" body {\n");
sb.append(" font-family: 'Microsoft YaHei', 'SimHei', Arial, sans-serif;\n");
sb.append(" background: linear-gradient(135deg, #1a1a2e 0%, #16213e 100%);\n");
sb.append(" min-height: 100vh;\n");
sb.append(" padding: 20px;\n");
sb.append(" color: #fff;\n");
sb.append(" }\n");
sb.append(" .container { max-width: 1400px; margin: 0 auto; }\n");
sb.append(" h1 {\n");
sb.append(" text-align: center;\n");
sb.append(" font-size: 2.5em;\n");
sb.append(" margin-bottom: 10px;\n");
sb.append(" background: linear-gradient(90deg, #f39c12, #e74c3c, #9b59b6);\n");
sb.append(" -webkit-background-clip: text;\n");
sb.append(" -webkit-text-fill-color: transparent;\n");
sb.append(" text-shadow: 0 0 30px rgba(243, 156, 18, 0.3);\n");
sb.append(" }\n");
sb.append(" .subtitle {\n");
sb.append(" text-align: center;\n");
sb.append(" color: #888;\n");
sb.append(" margin-bottom: 40px;\n");
sb.append(" }\n");
sb.append(" .charts-grid {\n");
sb.append(" display: grid;\n");
sb.append(" grid-template-columns: repeat(auto-fit, minmax(600px, 1fr));\n");
sb.append(" gap: 30px;\n");
sb.append(" }\n");
sb.append(" .chart-card {\n");
sb.append(" background: rgba(255, 255, 255, 0.95);\n");
sb.append(" border-radius: 20px;\n");
sb.append(" padding: 25px;\n");
sb.append(" box-shadow: 0 20px 60px rgba(0, 0, 0, 0.3);\n");
sb.append(" transition: transform 0.3s ease, box-shadow 0.3s ease;\n");
sb.append(" }\n");
sb.append(" .chart-card:hover {\n");
sb.append(" transform: translateY(-10px);\n");
sb.append(" box-shadow: 0 30px 80px rgba(0, 0, 0, 0.4);\n");
sb.append(" }\n");
sb.append(" .chart-card h2 {\n");
sb.append(" color: #333;\n");
sb.append(" font-size: 1.4em;\n");
sb.append(" margin-bottom: 20px;\n");
sb.append(" padding-bottom: 10px;\n");
sb.append(" border-bottom: 3px solid;\n");
sb.append(" border-image: linear-gradient(90deg, #f39c12, #e74c3c) 1;\n");
sb.append(" }\n");
sb.append(" .chart-card img {\n");
sb.append(" width: 100%;\n");
sb.append(" height: auto;\n");
sb.append(" border-radius: 10px;\n");
sb.append(" }\n");
sb.append(" .chart-card.full-width {\n");
sb.append(" grid-column: 1 / -1;\n");
sb.append(" }\n");
sb.append(" .legend {\n");
sb.append(" display: flex;\n");
sb.append(" justify-content: center;\n");
sb.append(" gap: 30px;\n");
sb.append(" margin-top: 15px;\n");
sb.append(" flex-wrap: wrap;\n");
sb.append(" }\n");
sb.append(" .legend-item {\n");
sb.append(" display: flex;\n");
sb.append(" align-items: center;\n");
sb.append(" gap: 8px;\n");
sb.append(" font-size: 0.9em;\n");
sb.append(" color: #555;\n");
sb.append(" }\n");
sb.append(" .legend-color {\n");
sb.append(" width: 20px;\n");
sb.append(" height: 4px;\n");
sb.append(" border-radius: 2px;\n");
sb.append(" }\n");
sb.append(" .gold { background: #ff8c00; }\n");
sb.append(" .silver { background: #c0c0c0; }\n");
sb.append(" .oil { background: #006400; }\n");
sb.append(" .up { background: #006400; }\n");
sb.append(" .down { background: #ff0000; }\n");
sb.append(" footer {\n");
sb.append(" text-align: center;\n");
sb.append(" margin-top: 50px;\n");
sb.append(" padding: 20px;\n");
sb.append(" color: #666;\n");
sb.append(" }\n");
sb.append(" @media (max-width: 768px) {\n");
sb.append(" .charts-grid { grid-template-columns: 1fr; }\n");
sb.append(" h1 { font-size: 1.8em; }\n");
sb.append(" }\n");
sb.append(" </style>\n");
sb.append("</head>\n");
sb.append("<body>\n");
sb.append(" <div class=\"container\">\n");
sb.append(" <h1>📊 大宗商品分析报告</h1>\n");
sb.append(" <p class=\"subtitle\"> Commodity Market Analysis Report</p>\n");
sb.append(" <div class=\"charts-grid\">\n");
sb.append(" <div class=\"chart-card\">\n");
sb.append(" <h2>📈 价格趋势对比</h2>\n");
sb.append(" <img src=\"price_trend.png\" alt=\"价格趋势对比\">\n");
sb.append(" <div class=\"legend\">\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color gold\"></span>黄金</div>\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color silver\"></span>白银</div>\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color oil\"></span>原油</div>\n");
sb.append(" </div>\n");
sb.append(" </div>\n");
sb.append(" <div class=\"chart-card\">\n");
sb.append(" <h2>📊 波动特征分析</h2>\n");
sb.append(" <img src=\"volatility.png\" alt=\"波动特征分析\">\n");
sb.append(" <div class=\"legend\">\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color gold\"></span>黄金</div>\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color silver\"></span>白银</div>\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color oil\"></span>原油</div>\n");
sb.append(" </div>\n");
sb.append(" </div>\n");
sb.append(" <div class=\"chart-card\">\n");
sb.append(" <h2>🔗 相关性分析</h2>\n");
sb.append(" <img src=\"correlation.png\" alt=\"相关性分析\">\n");
sb.append(" <div class=\"legend\">\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color gold\"></span>黄金</div>\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color oil\"></span>原油</div>\n");
sb.append(" </div>\n");
sb.append(" </div>\n");
sb.append(" <div class=\"chart-card\">\n");
sb.append(" <h2>🗓️ 季节性周期分析</h2>\n");
sb.append(" <img src=\"cycle.png\" alt=\"季节性周期分析\">\n");
sb.append(" <div class=\"legend\">\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color gold\"></span>黄金</div>\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color oil\"></span>原油</div>\n");
sb.append(" </div>\n");
sb.append(" </div>\n");
sb.append(" <div class=\"chart-card full-width\">\n");
sb.append(" <h2>💬 舆情联动分析</h2>\n");
sb.append(" <img src=\"sentiment.png\" alt=\"舆情联动分析\">\n");
sb.append(" <div class=\"legend\">\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color oil\"></span>涨跌幅</div>\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color gold\"></span>利好新闻数</div>\n");
sb.append(" <div class=\"legend-item\"><span class=\"legend-color down\"></span>利空新闻数</div>\n");
sb.append(" </div>\n");
sb.append(" </div>\n");
sb.append(" </div>\n");
sb.append(" <footer>\n");
sb.append(" <p>报告生成时间: ").append(java.time.LocalDateTime.now()).append("</p>\n");
sb.append(" <p>大宗商品爬虫系统 © 2026</p>\n");
sb.append(" </footer>\n");
sb.append(" </div>\n");
sb.append("</body>\n");
sb.append("</html>\n");
return sb.toString();
}
}

13
project/src/main/resources/application.properties

@ -0,0 +1,13 @@
# 数据库配置 - 使用SQLite持久化存储(数据保存在文件中)
db.driver=org.sqlite.JDBC
db.url=jdbc:sqlite:./data/commodity.db
db.username=
db.password=
# 爬虫配置
crawl.page.count=30
# 输出配置
output.log.dir=./logs/
output.chart.dir=./output/charts/
output.excel.dir=./output/excel/

2
project/src/main/resources/h2-init.sql

@ -0,0 +1,2 @@
-- H2 数据库初始化脚本
RUNSCRIPT FROM 'classpath:/schema.sql';

29
project/src/main/resources/logback.xml

@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property name="LOG_PATH" value="./logs"/>
<appender name="STDOUT" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
<charset>GBK</charset>
</encoder>
</appender>
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_PATH}/crawler.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_PATH}/crawler.%d{yyyy-MM-dd}.log</fileNamePattern>
<maxHistory>30</maxHistory>
</rollingPolicy>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
</encoder>
</appender>
<logger name="com.example.crawler" level="DEBUG"/>
<root level="INFO">
<appender-ref ref="STDOUT"/>
<appender-ref ref="FILE"/>
</root>
</configuration>

46
project/src/main/resources/mapper/IndexDataMapper.xml

@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.example.crawler.mapper.IndexDataMapper">
<insert id="insert" parameterType="com.example.crawler.model.IndexData">
INSERT INTO index_data (index_name, date, index_value, change_rate,
stock_name, stock_price, turnover_rate, create_time, source)
VALUES (#{indexName}, #{date}, #{indexValue}, #{changeRate},
#{stockName}, #{stockPrice}, #{turnoverRate}, CURRENT_TIMESTAMP, #{source})
</insert>
<insert id="batchInsert" parameterType="java.util.List">
INSERT INTO index_data (index_name, date, index_value, change_rate,
stock_name, stock_price, turnover_rate, create_time, source)
VALUES
<foreach collection="list" item="item" separator=",">
(#{item.indexName}, #{item.date}, #{item.indexValue}, #{item.changeRate},
#{item.stockName}, #{item.stockPrice}, #{item.turnoverRate}, CURRENT_TIMESTAMP, #{item.source})
</foreach>
</insert>
<select id="selectAll" resultType="com.example.crawler.model.IndexData">
SELECT * FROM index_data ORDER BY date DESC
</select>
<select id="selectByIndexName" resultType="com.example.crawler.model.IndexData">
SELECT * FROM index_data WHERE index_name = #{indexName} ORDER BY date DESC
</select>
<select id="selectByDateRange" resultType="com.example.crawler.model.IndexData">
SELECT * FROM index_data WHERE date BETWEEN #{startDate} AND #{endDate} ORDER BY date DESC
</select>
<select id="selectByDateAndIndex" resultType="com.example.crawler.model.IndexData">
SELECT * FROM index_data WHERE date = #{date} AND index_name = #{indexName}
</select>
<select id="count" resultType="int">
SELECT COUNT(*) FROM index_data
</select>
<delete id="deleteAll">
DELETE FROM index_data
</delete>
</mapper>

46
project/src/main/resources/mapper/MarketDataMapper.xml

@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.example.crawler.mapper.MarketDataMapper">
<insert id="insert" parameterType="com.example.crawler.model.MarketData">
INSERT INTO market_data (variety, trade_date, open_price, close_price,
high_price, low_price, volume, change_rate, create_time, source)
VALUES (#{variety}, #{tradeDate}, #{openPrice}, #{closePrice},
#{highPrice}, #{lowPrice}, #{volume}, #{changeRate}, CURRENT_TIMESTAMP, #{source})
</insert>
<insert id="batchInsert" parameterType="java.util.List">
INSERT INTO market_data (variety, trade_date, open_price, close_price,
high_price, low_price, volume, change_rate, create_time, source)
VALUES
<foreach collection="list" item="item" separator=",">
(#{item.variety}, #{item.tradeDate}, #{item.openPrice}, #{item.closePrice},
#{item.highPrice}, #{item.lowPrice}, #{item.volume}, #{item.changeRate}, CURRENT_TIMESTAMP, #{item.source})
</foreach>
</insert>
<select id="selectAll" resultType="com.example.crawler.model.MarketData">
SELECT * FROM market_data ORDER BY trade_date DESC
</select>
<select id="selectByVariety" resultType="com.example.crawler.model.MarketData">
SELECT * FROM market_data WHERE variety = #{variety} ORDER BY trade_date DESC
</select>
<select id="selectByDateRange" resultType="com.example.crawler.model.MarketData">
SELECT * FROM market_data WHERE trade_date BETWEEN #{startDate} AND #{endDate} ORDER BY trade_date DESC
</select>
<select id="selectByDateAndVariety" resultType="com.example.crawler.model.MarketData">
SELECT * FROM market_data WHERE trade_date = #{tradeDate} AND variety = #{variety}
</select>
<select id="countByVariety" resultType="int">
SELECT COUNT(*) FROM market_data WHERE variety = #{variety}
</select>
<delete id="deleteAll">
DELETE FROM market_data
</delete>
</mapper>

46
project/src/main/resources/mapper/NewsDataMapper.xml

@ -0,0 +1,46 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE mapper
PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-mapper.dtd">
<mapper namespace="com.example.crawler.mapper.NewsDataMapper">
<insert id="insert" parameterType="com.example.crawler.model.NewsData">
INSERT INTO news_data (title, content, publish_time, related_commodity,
sentiment, create_time, source)
VALUES (#{title}, #{content}, #{publishTime}, #{relatedCommodity},
#{sentiment}, CURRENT_TIMESTAMP, #{source})
</insert>
<insert id="batchInsert" parameterType="java.util.List">
INSERT INTO news_data (title, content, publish_time, related_commodity,
sentiment, create_time, source)
VALUES
<foreach collection="list" item="item" separator=",">
(#{item.title}, #{item.content}, #{item.publishTime}, #{item.relatedCommodity},
#{item.sentiment}, CURRENT_TIMESTAMP, #{item.source})
</foreach>
</insert>
<select id="selectAll" resultType="com.example.crawler.model.NewsData">
SELECT * FROM news_data ORDER BY publish_time DESC
</select>
<select id="selectByCommodity" resultType="com.example.crawler.model.NewsData">
SELECT * FROM news_data WHERE related_commodity = #{commodity} ORDER BY publish_time DESC
</select>
<select id="selectByDateRange" resultType="com.example.crawler.model.NewsData">
SELECT * FROM news_data WHERE publish_time BETWEEN #{startDate} AND #{endDate} ORDER BY publish_time DESC
</select>
<select id="selectByTitleAndTime" resultType="com.example.crawler.model.NewsData">
SELECT * FROM news_data WHERE title = #{title} AND publish_time = #{publishTime}
</select>
<select id="countBySentiment" resultType="int">
SELECT COUNT(*) FROM news_data WHERE sentiment = #{sentiment}
</select>
<delete id="deleteAll">
DELETE FROM news_data
</delete>
</mapper>

28
project/src/main/resources/mybatis-config.xml

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE configuration
PUBLIC "-//mybatis.org//DTD Config 3.0//EN"
"http://mybatis.org/dtd/mybatis-3-config.dtd">
<configuration>
<settings>
<setting name="mapUnderscoreToCamelCase" value="true"/>
</settings>
<typeHandlers>
<typeHandler handler="com.example.crawler.util.DateTypeHandler" javaType="java.util.Date"/>
</typeHandlers>
<environments default="development">
<environment id="development">
<transactionManager type="JDBC"/>
<dataSource type="POOLED">
<property name="driver" value="${db.driver}"/>
<property name="url" value="${db.url}"/>
<property name="username" value="${db.username}"/>
<property name="password" value="${db.password}"/>
</dataSource>
</environment>
</environments>
<mappers>
<mapper resource="mapper/MarketDataMapper.xml"/>
<mapper resource="mapper/IndexDataMapper.xml"/>
<mapper resource="mapper/NewsDataMapper.xml"/>
</mappers>
</configuration>

44
project/src/main/resources/schema.sql

@ -0,0 +1,44 @@
CREATE DATABASE IF NOT EXISTS example_db DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci;
USE example_db;
CREATE TABLE IF NOT EXISTS market_data (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
variety VARCHAR(50) NOT NULL COMMENT '商品品种',
trade_date DATE NOT NULL COMMENT '交易日期',
open_price DECIMAL(18,4) COMMENT '开盘价',
close_price DECIMAL(18,4) NOT NULL COMMENT '收盘价',
high_price DECIMAL(18,4) COMMENT '最高价',
low_price DECIMAL(18,4) COMMENT '最低价',
volume DECIMAL(20,4) COMMENT '成交量',
change_rate DECIMAL(10,4) COMMENT '涨跌幅(%)',
create_time DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
source VARCHAR(50) COMMENT '数据来源',
UNIQUE KEY uk_date_variety (trade_date, variety)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='行情数据表';
CREATE TABLE IF NOT EXISTS index_data (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
index_name VARCHAR(100) NOT NULL COMMENT '指数名称',
date DATE NOT NULL COMMENT '日期',
index_value DECIMAL(18,4) NOT NULL COMMENT '指数值',
change_rate DECIMAL(10,4) COMMENT '涨跌幅(%)',
stock_name VARCHAR(100) COMMENT '概念股名称',
stock_price DECIMAL(18,4) COMMENT '股价',
turnover_rate DECIMAL(10,4) COMMENT '换手率(%)',
create_time DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
source VARCHAR(50) COMMENT '数据来源',
UNIQUE KEY uk_date_index (date, index_name)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='指数数据表';
CREATE TABLE IF NOT EXISTS news_data (
id BIGINT AUTO_INCREMENT PRIMARY KEY,
title VARCHAR(500) NOT NULL COMMENT '新闻标题',
content TEXT COMMENT '新闻内容',
publish_time DATETIME NOT NULL COMMENT '发布时间',
related_commodity VARCHAR(50) COMMENT '关联商品',
sentiment VARCHAR(10) NOT NULL COMMENT '舆情倾向(利好/利空/中性)',
create_time DATETIME DEFAULT CURRENT_TIMESTAMP COMMENT '创建时间',
source VARCHAR(50) COMMENT '数据来源',
UNIQUE KEY uk_title_time (title, publish_time)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci COMMENT='舆情数据表';

Some files were not shown because too many files changed in this diff

Loading…
Cancel
Save