Browse Source

提交期末实验报告

main
ZhengShiyi 3 weeks ago
parent
commit
b2a9745709
  1. BIN
      project(期末项目报告)/202401070210-郑诗艺-期末实验报告.pdf
  2. 29
      project(期末项目报告)/CrawlerMain2/.gitignore
  3. 10
      project(期末项目报告)/CrawlerMain2/.idea/.gitignore
  4. 9
      project(期末项目报告)/CrawlerMain2/.idea/libraries/fastjson2_2_0_32.xml
  5. 9
      project(期末项目报告)/CrawlerMain2/.idea/libraries/jcommon_1_0_24.xml
  6. 9
      project(期末项目报告)/CrawlerMain2/.idea/libraries/jfreechart_1_5_3.xml
  7. 9
      project(期末项目报告)/CrawlerMain2/.idea/libraries/jsoup_1_17_2.xml
  8. 9
      project(期末项目报告)/CrawlerMain2/.idea/libraries/kumo_core_1_12.xml
  9. 9
      project(期末项目报告)/CrawlerMain2/.idea/libraries/logback_classic_1_4_11.xml
  10. 9
      project(期末项目报告)/CrawlerMain2/.idea/libraries/logback_core_1_4_11.xml
  11. 9
      project(期末项目报告)/CrawlerMain2/.idea/libraries/slf4j_api_2_0_9.xml
  12. 6
      project(期末项目报告)/CrawlerMain2/.idea/misc.xml
  13. 8
      project(期末项目报告)/CrawlerMain2/.idea/modules.xml
  14. 19
      project(期末项目报告)/CrawlerMain2/CrawlerMain2.iml
  15. 150
      project(期末项目报告)/CrawlerMain2/README.md
  16. 100
      project(期末项目报告)/CrawlerMain2/src/ClassDiagram.puml
  17. 101
      project(期末项目报告)/CrawlerMain2/src/CrawlerMain.java
  18. 22
      project(期末项目报告)/CrawlerMain2/src/command/AnalyzeCommand.java
  19. 13
      project(期末项目报告)/CrawlerMain2/src/command/CommandInvoker.java
  20. 12
      project(期末项目报告)/CrawlerMain2/src/command/CrawlCommand.java
  21. 54
      project(期末项目报告)/CrawlerMain2/src/command/HeroCrawlCommand.java
  22. 57
      project(期末项目报告)/CrawlerMain2/src/command/MovieCrawlCommand.java
  23. 20
      project(期末项目报告)/CrawlerMain2/src/command/WeatherAnalyzeCommand.java
  24. 54
      project(期末项目报告)/CrawlerMain2/src/command/WeatherCrawlCommand.java
  25. 28
      project(期末项目报告)/CrawlerMain2/src/controller/CrawlerContext.java
  26. 64
      project(期末项目报告)/CrawlerMain2/src/crawler/BaseCrawler.java
  27. 12
      project(期末项目报告)/CrawlerMain2/src/crawler/Crawler.java
  28. 43
      project(期末项目报告)/CrawlerMain2/src/crawler/HeroCrawler.java
  29. 56
      project(期末项目报告)/CrawlerMain2/src/crawler/MovieCrawler.java
  30. 74
      project(期末项目报告)/CrawlerMain2/src/crawler/WeatherCrawler.java
  31. 10
      project(期末项目报告)/CrawlerMain2/src/exception/CrawlerException.java
  32. 10
      project(期末项目报告)/CrawlerMain2/src/exception/NetworkException.java
  33. 10
      project(期末项目报告)/CrawlerMain2/src/exception/ParseException.java
  34. 13
      project(期末项目报告)/CrawlerMain2/src/logback.xml
  35. 18
      project(期末项目报告)/CrawlerMain2/src/model/Hero.java
  36. 33
      project(期末项目报告)/CrawlerMain2/src/model/Movie.java
  37. 34
      project(期末项目报告)/CrawlerMain2/src/model/Weather.java
  38. 195
      project(期末项目报告)/CrawlerMain2/src/util/DataUtil.java
  39. 33
      project(期末项目报告)/CrawlerMain2/src/view/CrawlerView.java

BIN
project(期末项目报告)/202401070210-郑诗艺-期末实验报告.pdf

Binary file not shown.

29
project(期末项目报告)/CrawlerMain2/.gitignore

@ -0,0 +1,29 @@
### IntelliJ IDEA ###
out/
!**/src/main/**/out/
!**/src/test/**/out/
### Eclipse ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
bin/
!**/src/main/**/bin/
!**/src/test/**/bin/
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
### VS Code ###
.vscode/
### Mac OS ###
.DS_Store

10
project(期末项目报告)/CrawlerMain2/.idea/.gitignore

@ -0,0 +1,10 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/
# 依赖于环境的 Maven 主目录路径
/mavenHomeManager.xml
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml

9
project(期末项目报告)/CrawlerMain2/.idea/libraries/fastjson2_2_0_32.xml

@ -0,0 +1,9 @@
<component name="libraryTable">
<library name="fastjson2-2.0.32">
<CLASSES>
<root url="jar://$USER_HOME$/Downloads/fastjson2-2.0.32.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

9
project(期末项目报告)/CrawlerMain2/.idea/libraries/jcommon_1_0_24.xml

@ -0,0 +1,9 @@
<component name="libraryTable">
<library name="jcommon-1.0.24">
<CLASSES>
<root url="jar://$USER_HOME$/Downloads/jcommon-1.0.24.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

9
project(期末项目报告)/CrawlerMain2/.idea/libraries/jfreechart_1_5_3.xml

@ -0,0 +1,9 @@
<component name="libraryTable">
<library name="jfreechart-1.5.3">
<CLASSES>
<root url="jar://$USER_HOME$/Downloads/jfreechart-1.5.3.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

9
project(期末项目报告)/CrawlerMain2/.idea/libraries/jsoup_1_17_2.xml

@ -0,0 +1,9 @@
<component name="libraryTable">
<library name="jsoup-1.17.2">
<CLASSES>
<root url="jar://$USER_HOME$/Downloads/jsoup-1.17.2.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

9
project(期末项目报告)/CrawlerMain2/.idea/libraries/kumo_core_1_12.xml

@ -0,0 +1,9 @@
<component name="libraryTable">
<library name="kumo-core-1.12">
<CLASSES>
<root url="jar://$USER_HOME$/Downloads/kumo-core-1.12.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

9
project(期末项目报告)/CrawlerMain2/.idea/libraries/logback_classic_1_4_11.xml

@ -0,0 +1,9 @@
<component name="libraryTable">
<library name="logback-classic-1.4.11">
<CLASSES>
<root url="jar://$USER_HOME$/Downloads/logback-classic-1.4.11.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

9
project(期末项目报告)/CrawlerMain2/.idea/libraries/logback_core_1_4_11.xml

@ -0,0 +1,9 @@
<component name="libraryTable">
<library name="logback-core-1.4.11">
<CLASSES>
<root url="jar://$USER_HOME$/Downloads/logback-core-1.4.11.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

9
project(期末项目报告)/CrawlerMain2/.idea/libraries/slf4j_api_2_0_9.xml

@ -0,0 +1,9 @@
<component name="libraryTable">
<library name="slf4j-api-2.0.9">
<CLASSES>
<root url="jar://$USER_HOME$/Downloads/slf4j-api-2.0.9.jar!/" />
</CLASSES>
<JAVADOC />
<SOURCES />
</library>
</component>

6
project(期末项目报告)/CrawlerMain2/.idea/misc.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="openjdk-26" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

8
project(期末项目报告)/CrawlerMain2/.idea/modules.xml

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/CrawlerMain2.iml" filepath="$PROJECT_DIR$/CrawlerMain2.iml" />
</modules>
</component>
</project>

19
project(期末项目报告)/CrawlerMain2/CrawlerMain2.iml

@ -0,0 +1,19 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="jsoup-1.17.2" level="project" />
<orderEntry type="library" name="jfreechart-1.5.3" level="project" />
<orderEntry type="library" name="jcommon-1.0.24" level="project" />
<orderEntry type="library" name="kumo-core-1.12" level="project" />
<orderEntry type="library" name="logback-classic-1.4.11" level="project" />
<orderEntry type="library" name="logback-core-1.4.11" level="project" />
<orderEntry type="library" name="slf4j-api-2.0.9" level="project" />
<orderEntry type="library" name="fastjson2-2.0.32" level="project" />
</component>
</module>

150
project(期末项目报告)/CrawlerMain2/README.md

@ -0,0 +1,150 @@
# Java爬虫综合项目(CLI + MVC + Command + 策略模式)
## 一、项目简介
本项目实现了一个命令行菜单式爬虫,支持爬取豆瓣电影、王者荣耀英雄、中国天气网天气等数据。采用MVC、命令模式、策略模式、多层结构,集成日志体系与异常分包,代码结构规范,易于扩展和维护。
## 二、项目结构
````
CrawlerMain2
├── .idea # IDEA 配置文件
├── out # 编译输出目录
└── src
├── command # 命令模式模块
│ ├── AnalyzeCommand
│ ├── CommandInvoker
│ ├── CrawlCommand
│ ├── HeroCrawlCommand
│ ├── MovieCrawlCommand
│ ├── WeatherAnalyzeCommand
│ └── WeatherCrawlCommand
├── controller # 控制器与上下文
│ └── CrawlerContext
├── crawler # 策略模式爬虫实现
│ ├── BaseCrawler
│ ├── Crawler
│ ├── HeroCrawler
│ ├── MovieCrawler
│ └── WeatherCrawler
├── exception # 自定义异常体系
├── model # 数据实体类
│ ├── Hero
│ ├── Movie
│ └── Weather
├── util # 工具类
│ └── DataUtil # JSON 导入导出、增量去重、文件 IO
├── view # 视图层(CLI 菜单交互)
│ └── CrawlerView
├── CrawlerMain # 程序入口
└── logback.xml # 日志配置
├── .gitignore # Git忽略文件配置
└── CrawlerMain2.iml # IDEA模块配置
````
---
## 三、功能介绍
- 命令行菜单,操作简单,支持多种数据源抓取
- 豆瓣电影、王者荣耀英雄、中国天气网天气数据采集
- 本地数据存储和分析统计
- MVC分层、命令模式、策略模式设计,扩展方便
- try-with-resources安全IO,保证资源释放与数据安全
- 支持JSON序列化导出(movie.json等)与数据备份
- 历史数据导入恢复功能,支持断点/回溯操作
- 增量抓取机制,自动去重,避免重复采集
- 日志体系与异常处理,项目健壮可追踪
---
## 四、依赖说明
- **JDK 8及以上版本**
- **jsoup**(网页解析)
- **slf4j**(日志接口)
- **logback**(日志实现)
- **fastjson2**(JSON处理框架,完成数据序列化/反序列化)
- IO相关类(java.io.File、FileReader、IOException等)实现文件操作
- 集合类(Set、HashSet、Stream),用于增量抓取与数据去重
> 推荐使用 Maven(pom.xml)管理依赖,添加如下:
```xml
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.15.3</version>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
<version>1.7.36</version>
</dependency>
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.2.11</version>
</dependency>
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>2.0.47</version>
</dependency>
```
如果不用 Maven,可手动下载相关 jar 包,并在 IDEA 的 Libraries 中添加。
---
## 五、运行说明(IntelliJ IDEA)
1. 用 IDEA 打开项目根目录(包含 src/,logback.xml 等)。
2. 配置 jsoup、slf4j、logback、fastjson2 依赖(建议用 Maven),或手动添加。
3. 确认DataUtil.java里的数据存储路径有权限。如必要,修改为本地可用的目录。
4. 右键CrawlerMain.java,选择“Run”,即可启动项目。
5. 启动后按照命令行菜单提示输入数字操作,体验各类爬取、分析、导出、恢复功能:
````
1 爬取豆瓣电影
2 爬取王者荣耀英雄
3 爬取全国天气
4 电影、英雄数据分析(仅统计,不存储)
5 天气数据分析
6 导入历史数据
0 退出程序
````
程序在抓取时自动生成 JSON 文件 (movie.json),支持导入历史数据并增量去重。
---
## 六、主要设计与包说明
| 包/类 | 功能描述 |
| ---------------- | ------------------------------------------ |
| exception | 自定义异常体系 |
| command | 命令模式相关类(命令封装、调度) |
| controller | 控制器上下文(业务流转管理) |
| strategy | 策略接口与具体爬虫实现 |
| model | 数据实体类(电影、英雄、天气) |
| util/DataUtil.java | 数据存储、历史恢复、JSON导入导出与增量去重 |
| view | CLI菜单视图,交互展示 |
---
## 七、核心工具/新功能说明
- DataUtil 工具类
- 新增方法封装 JSON 序列化与反序列化(依赖 fastjson2 的 JSON 工具类)
- 支持文件安全读写(采用 try‑with‑resources 结构,自动释放 IO 资源)
- 增量抓取与去重,采用 Set/HashSet 结构自动过滤已采集的对象
- 历史数据校验与导入,支持断点恢复(即重新导入 movie.json,恢复进度)
---
## 八、常见问题
1. 存储权限报错:请修改 DataUtil 的数据路径为本机有权限的盘符或目录
2. 依赖缺失或报错:确保 jsoup、slf4j、logback、fastjson2 已配置到项目
3. JSON导入导出异常:检查 movie.json 是否存在且格式正确,或依赖版本是否兼容
4. 重复抓取问题:增量去重会自动过滤相同对象,手动修改数据时注意唯一性
5. 出现 IOException:请检查文件路径、权限是否正常
---
## 九、扩展与二次开发说明
- 新增网站爬虫或数据实体:
- 新建 strategy 类(如 NewSiteCrawler.java)
- 新建 command 类,添加进菜单与调度
- 扩展 model 数据实体
- 导出和导入功能可支持更多格式,只需扩展 DataUtil 工具类即可
---
## 十、作者信息
- 姓名:郑诗艺
- 学号:202401070210
- 班级:大数据管理与应用2402班
- 日期:2026.5.24

100
project(期末项目报告)/CrawlerMain2/src/ClassDiagram.puml

@ -0,0 +1,100 @@
@startuml 爬虫项目类图_完整竖版一张图
' 全局设置:垂直布局、紧凑、单张大图、禁止分页
skinparam layout topdown
skinparam ranksep 55
skinparam nodesep 18
skinparam dpi 300
skinparam page {
width 100%
height 100%
}
skinparam class {
BackgroundColor #f0f5ff
BorderColor #222222
ArrowColor #222222
}
skinparam package {
BackgroundColor #eef4ff
BorderColor #222222
}
' 从上到下依次排列所有包,不拆分
package model {
class Hero {
-name: String
-type: String
}
class Movie {
-title: String
-rate: Double
}
class Weather {
-city: String
-temp: String
}
}
package crawler {
interface Crawler {
+crawl(): List<?>
}
abstract class BaseCrawler implements Crawler {
+crawl(): List<?>
}
class MovieCrawler extends BaseCrawler
class HeroCrawler extends BaseCrawler
class WeatherCrawler extends BaseCrawler
}
package exception {
class CrawlerException {
+CrawlerException(msg: String)
}
class NetworkException extends CrawlerException
class ParseException extends CrawlerException
}
package command {
interface CrawlCommand {
+execute(): void
}
class CommandInvoker {
+invoke(): void
}
class MovieCrawlCommand implements CrawlCommand
class HeroCrawlCommand implements CrawlCommand
class WeatherCrawlCommand implements CrawlCommand
class AnalyzeCommand implements CrawlCommand
class WeatherAnalyzeCommand implements CrawlCommand
CommandInvoker -[dashed]-> CrawlCommand : 调度
}
package controller {
class CrawlerContext
}
package view {
class CrawlerView {
+showMenu(): void
}
}
package util {
class DataUtil {
+exportJson(): void
+importJson(): List<?>
+removeDuplicate(): List<?>
+analyzeOnly(): void
+analyzeWeatherOnly(): void
}
}
class CrawlerMain
' 依赖关系
CrawlerMain --> CrawlerContext
CrawlerMain --> CrawlerView
CrawlerMain --> DataUtil
DataUtil -[dashed]-> ParseException : throws
@enduml

101
project(期末项目报告)/CrawlerMain2/src/CrawlerMain.java

@ -0,0 +1,101 @@
import command.AnalyzeCommand;
import command.HeroCrawlCommand;
import command.MovieCrawlCommand;
import command.WeatherAnalyzeCommand;
import command.WeatherCrawlCommand;
import command.CommandInvoker;
import controller.CrawlerContext;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import model.Hero;
import model.Movie;
import model.Weather;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import util.DataUtil;
import view.CrawlerView;
import java.io.IOException;
import java.util.List;
// ===================== 10. 主程序入口 =====================
public class CrawlerMain {
private static final Logger logger = LoggerFactory.getLogger(CrawlerMain.class);
public static void main(String[] args) {
logger.info("===== 爬虫程序启动(CLI+MVC+Command+策略模式) =====");
CrawlerView view = new CrawlerView();
CrawlerContext context = new CrawlerContext();
CommandInvoker invoker = new CommandInvoker();
MovieCrawlCommand movieCmd = new MovieCrawlCommand(context);
HeroCrawlCommand heroCmd = new HeroCrawlCommand(context);
WeatherCrawlCommand weatherCmd = new WeatherCrawlCommand(context);
AnalyzeCommand analyzeCmd = new AnalyzeCommand(movieCmd, heroCmd);
WeatherAnalyzeCommand weatherAnalyzeCmd = new WeatherAnalyzeCommand(weatherCmd);
DataUtil.initFolder();
while (true) {
try {
view.showMenu();
int op = view.getInput();
switch (op) {
case 1:
invoker.runCommand(movieCmd);
view.showMsg("电影爬取完成");
break;
case 2:
invoker.runCommand(heroCmd);
view.showMsg("英雄爬取完成");
break;
case 3:
invoker.runCommand(weatherCmd);
view.showMsg("天气爬取完成");
break;
case 4:
analyzeCmd.execute();
break;
case 5:
weatherAnalyzeCmd.execute();
break;
// 第6项:导入历史数据(无fileName:,无报错)
case 6:
try {
List<Movie> movieList = DataUtil.importJson("movie.json", Movie.class);
List<Hero> heroList = DataUtil.importJson("hero.json", Hero.class);
List<Weather> weatherList = DataUtil.importJson("weather.json", Weather.class);
view.showMsg("✅ 历史数据导入成功!");
view.showMsg("电影:" + movieList.size() + " 条");
view.showMsg("英雄:" + heroList.size() + " 条");
view.showMsg("天气:" + weatherList.size() + " 条");
} catch (IOException e) {
view.showMsg("导入失败:" + e.getMessage());
}
break;
case 0:
view.showMsg("程序退出");
System.exit(0);
break;
default:
view.showMsg("指令错误,请重新输入");
}
} catch (NumberFormatException e) {
view.showMsg("请输入数字!");
} catch (NetworkException e) {
logger.error("网络异常:", e);
view.showMsg("网络异常:" + e.getMessage());
} catch (ParseException e) {
logger.error("解析异常:", e);
view.showMsg("解析异常:" + e.getMessage());
} catch (CrawlerException e) {
logger.error("爬虫异常:", e);
view.showMsg("爬虫异常:" + e.getMessage());
}catch (IOException e) {
logger.error("IO异常:", e);
view.showMsg("IO异常:" + e.getMessage());
}
}
}
}

22
project(期末项目报告)/CrawlerMain2/src/command/AnalyzeCommand.java

@ -0,0 +1,22 @@
package command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import util.DataUtil;
public class AnalyzeCommand implements CrawlCommand {
private final MovieCrawlCommand movieCmd;
private final HeroCrawlCommand heroCmd;
private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class);
public AnalyzeCommand(MovieCrawlCommand movieCmd, HeroCrawlCommand heroCmd) {
this.movieCmd = movieCmd;
this.heroCmd = heroCmd;
}
@Override
public void execute() {
DataUtil.analyzeOnly(movieCmd.getResult(), heroCmd.getResult());
logger.info("电影&英雄数据分析命令执行完成(仅统计)");
}
}

13
project(期末项目报告)/CrawlerMain2/src/command/CommandInvoker.java

@ -0,0 +1,13 @@
package command;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import java.io.IOException;
public class CommandInvoker {
public void runCommand(CrawlCommand cmd) throws ParseException, NetworkException, CrawlerException, IOException {
cmd.execute();
}
}

12
project(期末项目报告)/CrawlerMain2/src/command/CrawlCommand.java

@ -0,0 +1,12 @@
package command;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import java.io.IOException;
// ===================== 8. Command模式 =====================
public interface CrawlCommand {
void execute() throws ParseException, NetworkException, CrawlerException, IOException;
}

54
project(期末项目报告)/CrawlerMain2/src/command/HeroCrawlCommand.java

@ -0,0 +1,54 @@
package command;
import controller.CrawlerContext;
import crawler.HeroCrawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import model.Hero;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import util.DataUtil;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
public class HeroCrawlCommand implements CrawlCommand {
private final CrawlerContext context;
private List<Hero> heroList;
private List<Hero> oldHeroList;
private static final Logger logger = LoggerFactory.getLogger(HeroCrawlCommand.class);
public HeroCrawlCommand(CrawlerContext context) {
this.context = context;
}
@Override
public void execute() throws ParseException, NetworkException, CrawlerException, IOException {
oldHeroList = DataUtil.importJson("hero.json", Hero.class);
logger.info("导入历史英雄数据:{}条", oldHeroList.size());
// 第四点:增量抓取,过滤重复英雄
Set<String> existNames = DataUtil.getExistHeroNames("hero.json");
context.setCrawlerStrategy(new HeroCrawler());
heroList = (List<Hero>) context.executeCrawl();
heroList = heroList.stream()
.filter(hero -> !existNames.contains(hero.getName()))
.collect(Collectors.toList());
heroList.addAll(oldHeroList);
DataUtil.addAll("英雄数据.txt", heroList);
DataUtil.exportJson("hero.json", heroList);
logger.info("英雄爬取完成,本次新增:{}条", heroList.size() - oldHeroList.size());
}
public List<Hero> getResult() {
return heroList;
}
}

57
project(期末项目报告)/CrawlerMain2/src/command/MovieCrawlCommand.java

@ -0,0 +1,57 @@
package command;
import controller.CrawlerContext;
import crawler.MovieCrawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import model.Movie;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import util.DataUtil;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
public class MovieCrawlCommand implements CrawlCommand {
private final CrawlerContext context;
private List<Movie> movieList;
private List<Movie> oldMovieList;
private static final Logger logger = LoggerFactory.getLogger(MovieCrawlCommand.class);
public MovieCrawlCommand(CrawlerContext context) {
this.context = context;
}
@Override
public void execute() throws ParseException, NetworkException, CrawlerException, IOException {
// 第三点:导入历史数据
oldMovieList = DataUtil.importJson("movie.json", Movie.class);
logger.info("导入历史电影数据:{}条", oldMovieList.size());
// 第四点:获取已存在的电影,增量抓取,避免重复
Set<String> existTitles = DataUtil.getExistMovieTitles("movie.json");
context.setCrawlerStrategy(new MovieCrawler());
movieList = (List<Movie>) context.executeCrawl();
// 过滤掉已经存在的电影,只保留新数据
movieList = movieList.stream()
.filter(movie -> !existTitles.contains(movie.getTitle()))
.collect(Collectors.toList());
// 合并:新数据 + 历史数据
movieList.addAll(oldMovieList);
DataUtil.addAll("电影数据.txt", movieList);
DataUtil.exportJson("movie.json", movieList);
logger.info("电影爬取完成,本次新增:{}条", movieList.size() - oldMovieList.size());
}
public List<Movie> getResult() {
return movieList;
}
}

20
project(期末项目报告)/CrawlerMain2/src/command/WeatherAnalyzeCommand.java

@ -0,0 +1,20 @@
package command;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import util.DataUtil;
public class WeatherAnalyzeCommand implements CrawlCommand {
private final WeatherCrawlCommand weatherCmd;
private static final Logger logger = LoggerFactory.getLogger(WeatherAnalyzeCommand.class);
public WeatherAnalyzeCommand(WeatherCrawlCommand weatherCmd) {
this.weatherCmd = weatherCmd;
}
@Override
public void execute() {
DataUtil.analyzeWeatherOnly(weatherCmd.getResult());
logger.info("天气数据分析命令执行完成(仅统计)");
}
}

54
project(期末项目报告)/CrawlerMain2/src/command/WeatherCrawlCommand.java

@ -0,0 +1,54 @@
package command;
import controller.CrawlerContext;
import crawler.WeatherCrawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import model.Weather;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import util.DataUtil;
import java.io.IOException;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
public class WeatherCrawlCommand implements CrawlCommand {
private final CrawlerContext context;
private List<Weather> weatherList;
private List<Weather> oldWeatherList;
private static final Logger logger = LoggerFactory.getLogger(WeatherCrawlCommand.class);
public WeatherCrawlCommand(CrawlerContext context) {
this.context = context;
}
@Override
public void execute() throws ParseException, NetworkException, CrawlerException, IOException {
oldWeatherList = DataUtil.importJson("weather.json", Weather.class);
logger.info("导入历史天气数据:{}条", oldWeatherList.size());
// 第四点:增量抓取,过滤重复城市
Set<String> existCities = DataUtil.getExistWeatherCities("weather.json");
context.setCrawlerStrategy(new WeatherCrawler());
weatherList = (List<Weather>) context.executeCrawl();
weatherList = weatherList.stream()
.filter(weather -> !existCities.contains(weather.getCity()))
.collect(Collectors.toList());
weatherList.addAll(oldWeatherList);
DataUtil.addAll("天气数据.txt", weatherList);
DataUtil.exportJson("weather.json", weatherList);
logger.info("天气爬取完成,本次新增:{}条", weatherList.size() - oldWeatherList.size());
}
public List<Weather> getResult() {
return weatherList;
}
}

28
project(期末项目报告)/CrawlerMain2/src/controller/CrawlerContext.java

@ -0,0 +1,28 @@
package controller;
import crawler.Crawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
// ===================== 6. 策略上下文 =====================
public class CrawlerContext {
private Crawler crawlerStrategy;
private static final Logger logger = LoggerFactory.getLogger(CrawlerContext.class);
public void setCrawlerStrategy(Crawler crawlerStrategy) {
this.crawlerStrategy = crawlerStrategy;
}
public List<?> executeCrawl() throws ParseException, NetworkException, CrawlerException {
if (crawlerStrategy == null) {
logger.error("未设置爬取策略");
throw new CrawlerException("爬取策略未配置");
}
return crawlerStrategy.startCrawl();
}
}

64
project(期末项目报告)/CrawlerMain2/src/crawler/BaseCrawler.java

@ -0,0 +1,64 @@
package crawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import org.jsoup.Connection;
import org.jsoup.HttpStatusException;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
public abstract class BaseCrawler<T> implements Crawler {
protected final String baseUrl;
private static final Logger logger = LoggerFactory.getLogger(BaseCrawler.class);
public BaseCrawler(String baseUrl) {
this.baseUrl = baseUrl;
}
// 定义和子类匹配的抽象方法签名
public abstract List<T> startCrawl() throws ParseException, NetworkException, CrawlerException;
// 统一请求页面方法(加固防拦截)
public Document getPage(String url) throws NetworkException {
String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36";
String cookie = "ptui_loginuin=; pgv_pvid=123456; RK=randomtest; _qpsvr_localtest=; uin=;";
int retry = 3;
while (retry > 0) {
try {
Connection conn = Jsoup.connect(url)
.userAgent(userAgent)
.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
.header("Accept-Language", "zh-CN,zh;q=0.9")
.header("Referer", url.contains("douban") ? "https://movie.douban.com/" : "https://pvp.qq.com/")
.header("Connection", "keep-alive")
.cookie("Cookie", cookie)
.timeout(15000)
.followRedirects(true);
Document doc = conn.get();
logger.info("第{}次请求页面:{}", 4 - retry, url);
return doc;
} catch (HttpStatusException e) {
retry--;
logger.error("请求页面失败,剩余重试次数:{}", retry, e);
if (retry <= 0) {
throw new NetworkException("页面请求彻底失败:" + url, e);
}
try {
Thread.sleep(2000);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
}
} catch (Exception e) {
throw new NetworkException("页面请求异常:" + url, e);
}
}
throw new NetworkException("请求超时:" + url);
}
}

12
project(期末项目报告)/CrawlerMain2/src/crawler/Crawler.java

@ -0,0 +1,12 @@
package crawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import java.util.List;
// ===================== 2. 策略模式:抽象策略接口 =====================
public interface Crawler {
List<?> startCrawl() throws ParseException, NetworkException, CrawlerException;
}

43
project(期末项目报告)/CrawlerMain2/src/crawler/HeroCrawler.java

@ -0,0 +1,43 @@
package crawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import model.Hero;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class HeroCrawler extends BaseCrawler {
private static final Logger logger = LoggerFactory.getLogger(HeroCrawler.class);
public HeroCrawler() {
super("https://pvp.qq.com/web201605/herolist.shtml");
}
@Override
public List<Hero> startCrawl() throws ParseException, NetworkException, CrawlerException {
List<Hero> list = new ArrayList<>();
logger.info("开始爬取王者荣耀英雄数据");
try {
Document doc = getPage(baseUrl);
Elements heros = doc.select("ul.herolist li a");
if (heros.isEmpty()) throw new ParseException("页面解析失败:未找到英雄列表项");
for (Element h : heros) {
String name = h.text().trim();
if (!name.isEmpty()) list.add(new Hero(name));
}
logger.info("英雄爬取完成,共{}条数据", list.size());
} catch (NetworkException e) {
throw e;
} catch (Exception e) {
throw new ParseException("英雄数据解析异常", e);
}
return list;
}
}

56
project(期末项目报告)/CrawlerMain2/src/crawler/MovieCrawler.java

@ -0,0 +1,56 @@
package crawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import model.Movie;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
// ===================== 5. 具体策略爬虫类 =====================
public class MovieCrawler extends BaseCrawler {
private static final Logger logger = LoggerFactory.getLogger(MovieCrawler.class);
public MovieCrawler() {
super("https://movie.douban.com/top250");
}
@Override
public List<Movie> startCrawl() throws ParseException, NetworkException, CrawlerException {
List<Movie> list = new ArrayList<>();
logger.info("开始爬取豆瓣电影Top250");
try {
for (int i = 0; i < 250; i += 25) {
Document doc = getPage(baseUrl + "?start=" + i);
Elements items = doc.select(".item");
if (items.isEmpty()) throw new ParseException("页面解析失败:未找到电影列表项");
for (Element e : items) {
Element titleEle = e.select(".title").first();
Element ratingEle = e.select(".rating_num").first();
if (titleEle == null || ratingEle == null) {
logger.warn("单条电影数据解析失败,跳过");
continue;
}
String title = titleEle.text().split("/")[0].trim();
String rating = ratingEle.text();
list.add(new Movie(title, rating));
}
Thread.sleep(1000);
}
logger.info("豆瓣电影爬取完成,共{}条数据", list.size());
} catch (NetworkException e) {
throw e;
} catch (InterruptedException e) {
throw new CrawlerException("爬取被中断", e);
} catch (Exception e) {
throw new ParseException("电影数据解析异常", e);
}
return list;
}
}

74
project(期末项目报告)/CrawlerMain2/src/crawler/WeatherCrawler.java

@ -0,0 +1,74 @@
package crawler;
import exception.CrawlerException;
import exception.NetworkException;
import exception.ParseException;
import model.Weather;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class WeatherCrawler extends BaseCrawler {
private static final Logger logger = LoggerFactory.getLogger(WeatherCrawler.class);
private static final String[][] cities = {
{"北京", "北京", "101010100"}, {"上海", "上海", "101020100"}, {"天津", "天津", "101030100"}, {"重庆", "重庆", "101040100"},
{"河北", "石家庄", "101090101"}, {"山西", "太原", "101100101"}, {"辽宁", "沈阳", "101070101"}, {"吉林", "长春", "101060101"},
{"黑龙江", "哈尔滨", "101050101"}, {"江苏", "南京", "101190101"}, {"浙江", "杭州", "101210101"}, {"安徽", "合肥", "101220101"},
{"福建", "福州", "101230101"}, {"江西", "南昌", "101240101"}, {"山东", "济南", "101120101"}, {"河南", "郑州", "101180101"},
{"湖北", "武汉", "101200101"}, {"湖南", "长沙", "101250101"}, {"广东", "广州", "101280101"}, {"海南", "海口", "101310101"},
{"四川", "成都", "101270101"}, {"贵州", "贵阳", "101260101"}, {"云南", "昆明", "101290101"}, {"陕西", "西安", "101110101"},
{"甘肃", "兰州", "101160101"}, {"青海", "西宁", "101150101"}, {"内蒙古", "呼和浩特", "101080101"}, {"广西", "南宁", "101300101"},
{"西藏", "拉萨", "101140101"}, {"宁夏", "银川", "101170101"}, {"新疆", "乌鲁木齐", "101130101"},
{"香港", "香港", "101320101"}, {"澳门", "澳门", "101330101"}, {"台湾", "台北", "101340101"}
};
public WeatherCrawler() {
super("https://www.weather.com.cn/weather/");
}
@Override
public List<Weather> startCrawl() throws ParseException, NetworkException, CrawlerException {
List<Weather> list = new ArrayList<>();
logger.info("开始爬取全国城市实时温度数据");
try {
for (String[] city : cities) {
String province = city[0];
String cityName = city[1];
String code = city[2];
Document doc = getPage(baseUrl + code + ".shtml");
// 取7天预报【第1个li】=今日实时温度,全页面通用、绝不空指针
Element today = doc.select("ul.t li").first();
if (today == null) throw new ParseException("实时天气解析失败:" + cityName);
String tempStr = today.select(".tem").text();
String weaStr = today.select(".wea").text();
// 拆分:最高温/最低温,取**最高温作为实时温度**
String realTemp;
if(tempStr.contains("/")){
realTemp = tempStr.split("/")[0];
}else if(tempStr.contains("~")){
realTemp = tempStr.split("~")[0];
}else{
realTemp = tempStr;
}
list.add(new Weather(province, cityName, weaStr, realTemp));
Thread.sleep(500);
}
logger.info("实时天气爬取完成,共{}条数据", list.size());
} catch (NetworkException e) {
throw e;
} catch (InterruptedException e) {
throw new CrawlerException("爬取线程被中断", e);
} catch (Exception e) {
throw new ParseException("天气数据解析异常", e);
}
return list;
}
}

10
project(期末项目报告)/CrawlerMain2/src/exception/CrawlerException.java

@ -0,0 +1,10 @@
package exception;
public class CrawlerException extends Exception {
public CrawlerException(String message) {
super(message);
}
public CrawlerException(String message, Throwable cause) {
super(message, cause);
}
}

10
project(期末项目报告)/CrawlerMain2/src/exception/NetworkException.java

@ -0,0 +1,10 @@
package exception;
public class NetworkException extends CrawlerException {
public NetworkException(String message) {
super(message);
}
public NetworkException(String message, Throwable cause) {
super(message, cause);
}
}

10
project(期末项目报告)/CrawlerMain2/src/exception/ParseException.java

@ -0,0 +1,10 @@
package exception;
public class ParseException extends CrawlerException {
public ParseException(String message) {
super(message);
}
public ParseException(String message, Throwable cause) {
super(message, cause);
}
}

13
project(期末项目报告)/CrawlerMain2/src/logback.xml

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<!-- 这里就是你要的:时间+日志级别+类名+信息 -->
<pattern>%d{HH:mm:ss.SSS} [%level] %logger - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<root level="INFO">
<appender-ref ref="CONSOLE"/>
</root>
</configuration>

18
project(期末项目报告)/CrawlerMain2/src/model/Hero.java

@ -0,0 +1,18 @@
package model;
public class Hero {
private final String name;
public Hero(String name) {
this.name = name;
}
public String getName() {
return name;
}
@Override
public String toString() {
return "英雄:" + name;
}
}

33
project(期末项目报告)/CrawlerMain2/src/model/Movie.java

@ -0,0 +1,33 @@
package model;
// ===================== 4. MVC‑Model:实体类 =====================
public class Movie {
private final String title;
private final String rating;
public Movie(String title, String rating) {
this.title = title;
this.rating = rating;
}
public String getTitle() {
return title;
}
public double getRatingDouble() {
try {
return Double.parseDouble(rating);
} catch (NumberFormatException e) {
throw new IllegalArgumentException("评分格式错误:" + rating, e);
}
}
public String getRating() {
return rating;
}
@Override
public String toString() {
return "电影:《" + title + "》 | 评分:" + rating;
}
}

34
project(期末项目报告)/CrawlerMain2/src/model/Weather.java

@ -0,0 +1,34 @@
package model;
public class Weather {
private final String province;
private final String city;
private final String condition;
private final String temperature; // 实时温度
public Weather(String province, String city, String condition, String temperature) {
this.province = province;
this.city = city;
this.condition = condition;
this.temperature = temperature;
}
public String getProvince() { return province; }
public String getCity() { return city; }
public String getCondition() { return condition; }
public String getTemperature() { return temperature; }
// 用于排序,提取温度数字
public int getTempNum() {
try {
return Integer.parseInt(temperature.replaceAll("[^0-9]", ""));
} catch (Exception e) {
return Integer.MIN_VALUE;
}
}
@Override
public String toString() {
return "省份:" + province + " | 城市:" + city + " | 天气:" + condition + " | 实时温度:" + temperature;
}
}

195
project(期末项目报告)/CrawlerMain2/src/util/DataUtil.java

@ -0,0 +1,195 @@
package util;
import com.alibaba.fastjson2.JSON;
import com.alibaba.fastjson2.JSONWriter;
import exception.ParseException;
import model.Hero;
import model.Movie;
import model.Weather;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import java.util.HashSet;
import java.util.Set;
// ===================== 7. MVC‑Repository:数据仓库 =====================
public final class DataUtil {
private static final String PATH = "D:\\Java爬虫\\";
private static final Logger logger = LoggerFactory.getLogger(DataUtil.class);
private DataUtil() {
}
public static void initFolder() {
File dir = new File(PATH);
if (!dir.exists()) {
boolean created = dir.mkdirs();
if (created) logger.info("创建目录:{}", PATH);
else logger.error("目录创建失败:{}", PATH);
}
}
// ✅ 第一点:try‑with‑resources 安全资源管理
public static void saveText(String fileName, String content) throws IOException {
if (fileName == null || fileName.isBlank()) throw new IllegalArgumentException("文件名不能为空");
if (content == null || content.isBlank()) {
logger.warn("保存文件内容为空,跳过:{}", fileName);
return;
}
try (FileWriter fw = new FileWriter(PATH + fileName)) {
fw.write(content);
}
logger.info("文件保存成功:{}", fileName);
}
public static <T> void addAll(String fileName, List<T> dataList) throws IOException {
if (dataList == null) throw new NullPointerException("待保存数据列表不能为null");
if (dataList.isEmpty()) {
logger.warn("批量数据为空,跳过保存:{}", fileName);
return;
}
StringBuilder sb = new StringBuilder();
dataList.forEach(item -> {
if (item != null) sb.append(item).append("\r\n");
});
saveText(fileName, sb.toString());
}
// ✅ 第二点:JSON 序列化持久化导出
public static <T> void exportJson(String fileName, List<T> dataList) throws IOException {
if (dataList == null || dataList.isEmpty()) {
logger.warn("JSON导出:数据为空,跳过");
return;
}
try (FileWriter fw = new FileWriter(PATH + fileName)) {
String jsonStr = JSON.toJSONString(dataList, JSONWriter.Feature.PrettyFormat);
fw.write(jsonStr);
}
logger.info("✅ JSON文件导出成功:{}", fileName);
}
// ✅ 第三点:数据导入恢复会话(已加入解析异常捕获)
public static <T> List<T> importJson(String fileName, Class<T> clazz) throws IOException, ParseException {
File file = new File(PATH + fileName);
if (!file.exists()) {
logger.warn("导入文件不存在:{}", fileName);
return Collections.emptyList();
}
try (FileReader fr = new FileReader(file)) {
char[] buf = new char[(int) file.length()];
fr.read(buf);
String jsonStr = new String(buf);
try {
return JSON.parseArray(jsonStr, clazz);
} catch (com.alibaba.fastjson2.JSONException e) {
logger.error("JSON格式解析错误:{}", e.getMessage());
throw new ParseException("数据格式错误,解析失败:" + e.getMessage());
}
}
}
// ✅ 第四点:增量抓取 - 获取已爬取电影标题集合(去重用)
public static Set<String> getExistMovieTitles(String fileName) {
try {
List<Movie> oldList = importJson(fileName, Movie.class);
return oldList.stream().map(Movie::getTitle).collect(Collectors.toSet());
} catch (IOException | ParseException e) {
logger.warn("读取历史电影数据失败,将全量抓取", e);
return new HashSet<>();
}
}
// ✅ 第四点:增量抓取 - 获取已爬取英雄名称集合(去重用)
public static Set<String> getExistHeroNames(String fileName) {
try {
List<Hero> oldList = importJson(fileName, Hero.class);
return oldList.stream().map(Hero::getName).collect(Collectors.toSet());
} catch (IOException | ParseException e) {
logger.warn("读取历史英雄数据失败,将全量抓取", e);
return new HashSet<>();
}
}
// ✅ 第四点:增量抓取 - 获取已爬取城市名集合(天气去重用)
public static Set<String> getExistWeatherCities(String fileName) {
try {
List<Weather> oldList = importJson(fileName, Weather.class);
return oldList.stream().map(Weather::getCity).collect(Collectors.toSet());
} catch (IOException | ParseException e) {
logger.warn("读取历史天气数据失败,将全量抓取", e);
return new HashSet<>();
}
}
public static void analyzeOnly(List<Movie> movieList, List<Hero> heroList) {
if (movieList == null || heroList == null) return;
logger.info("===== 电影&英雄数据分析(仅统计,不存储) =====");
double sum = 0;
int validCount = 0;
for (Movie movie : movieList) {
try {
sum += movie.getRatingDouble();
validCount++;
} catch (IllegalArgumentException e) {
logger.warn("电影评分解析失败,跳过:{}", movie.getTitle(), e);
}
}
if (validCount == 0) {
logger.error("无有效电影评分数据");
return;
}
double avg = sum / validCount;
System.out.println("电影平均评分:" + String.format("%.2f", avg));
long highScoreCount = movieList.stream()
.filter(m -> {
try {
return m.getRatingDouble() >= 8.5;
} catch (IllegalArgumentException e) {
return false;
}
})
.count();
System.out.println("8.5分以上电影数量:" + highScoreCount);
System.out.println("英雄总数量:" + heroList.size());
logger.info("电影&英雄数据分析结束");
}
public static void analyzeWeatherOnly(List<Weather> weatherList) {
if (weatherList == null || weatherList.isEmpty()) {
logger.warn("天气数据为空,无法统计");
return;
}
logger.info("===== 全国天气数据分析(仅统计,不存储) =====");
Map<String, Long> weatherTypeCount = weatherList.stream()
.collect(Collectors.groupingBy(Weather::getCondition, Collectors.counting()));
System.out.println("\n各天气类型数量:");
weatherTypeCount.forEach((type, count) -> System.out.println(" " + type + ":" + count + "个"));
List<Integer> temps = weatherList.stream()
.map(Weather::getTempNum)
.filter(t -> t != Integer.MIN_VALUE)
.collect(Collectors.toList());
if (!temps.isEmpty()) {
int maxTemp = Collections.max(temps);
int minTemp = Collections.min(temps);
double avgTemp = temps.stream().mapToInt(Integer::intValue).average().orElse(0);
System.out.println("\n温度统计(最高温):");
System.out.println(" 最高温度:" + maxTemp + "℃");
System.out.println(" 最低温度:" + minTemp + "℃");
System.out.println(" 平均温度:" + String.format("%.1f", avgTemp) + "℃");
} else {
System.out.println("无有效温度数据");
}
logger.info("天气数据分析结束");
}
}

33
project(期末项目报告)/CrawlerMain2/src/view/CrawlerView.java

@ -0,0 +1,33 @@
package view;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.Scanner;
// ===================== 9. MVC‑View:视图层 =====================
public class CrawlerView {
private static final Logger logger = LoggerFactory.getLogger(CrawlerView.class);
private final Scanner scanner = new Scanner(System.in);
public void showMenu() {
System.out.println("\n===== 爬虫CLI交互菜单 =====");
System.out.println("1. 爬取豆瓣电影");
System.out.println("2. 爬取王者荣耀英雄");
System.out.println("3. 爬取全国天气");
System.out.println("4. 电影&英雄数据分析(仅统计,不存储)");
System.out.println("5. 天气数据分析(天气类型、最高/最低/平均温)");
System.out.println("6. 导入历史数据");
System.out.println("0. 退出程序");
System.out.print("请输入操作指令:");
}
public int getInput() {
return Integer.parseInt(scanner.nextLine());
}
public void showMsg(String msg) {
System.out.println(msg);
logger.info(msg);
}
}
Loading…
Cancel
Save