diff --git a/project（期末项目报告）/202401070210-郑诗艺-期末实验报告.pdf b/project（期末项目报告）/202401070210-郑诗艺-期末实验报告.pdf
new file mode 100644
index 0000000..eef9320
Binary files /dev/null and b/project（期末项目报告）/202401070210-郑诗艺-期末实验报告.pdf differ
diff --git a/project（期末项目报告）/CrawlerMain2/.gitignore b/project（期末项目报告）/CrawlerMain2/.gitignore
new file mode 100644
index 0000000..f68d109
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.gitignore
@@ -0,0 +1,29 @@
+### IntelliJ IDEA ###
+out/
+!**/src/main/**/out/
+!**/src/test/**/out/
+
+### Eclipse ###
+.apt_generated
+.classpath
+.factorypath
+.project
+.settings
+.springBeans
+.sts4-cache
+bin/
+!**/src/main/**/bin/
+!**/src/test/**/bin/
+
+### NetBeans ###
+/nbproject/private/
+/nbbuild/
+/dist/
+/nbdist/
+/.nb-gradle/
+
+### VS Code ###
+.vscode/
+
+### Mac OS ###
+.DS_Store
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/.gitignore b/project（期末项目报告）/CrawlerMain2/.idea/.gitignore
new file mode 100644
index 0000000..7d05e99
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/.gitignore
@@ -0,0 +1,10 @@
+# 默认忽略的文件
+/shelf/
+/workspace.xml
+# 基于编辑器的 HTTP 客户端请求
+/httpRequests/
+# 依赖于环境的 Maven 主目录路径
+/mavenHomeManager.xml
+# Datasource local storage ignored files
+/dataSources/
+/dataSources.local.xml
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/libraries/fastjson2_2_0_32.xml b/project（期末项目报告）/CrawlerMain2/.idea/libraries/fastjson2_2_0_32.xml
new file mode 100644
index 0000000..5565c61
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/libraries/fastjson2_2_0_32.xml
@@ -0,0 +1,9 @@
+<component name="libraryTable">
+  <library name="fastjson2-2.0.32">
+    <CLASSES>
+      <root url="jar://$USER_HOME$/Downloads/fastjson2-2.0.32.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/libraries/jcommon_1_0_24.xml b/project（期末项目报告）/CrawlerMain2/.idea/libraries/jcommon_1_0_24.xml
new file mode 100644
index 0000000..cef0a8d
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/libraries/jcommon_1_0_24.xml
@@ -0,0 +1,9 @@
+<component name="libraryTable">
+  <library name="jcommon-1.0.24">
+    <CLASSES>
+      <root url="jar://$USER_HOME$/Downloads/jcommon-1.0.24.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/libraries/jfreechart_1_5_3.xml b/project（期末项目报告）/CrawlerMain2/.idea/libraries/jfreechart_1_5_3.xml
new file mode 100644
index 0000000..6fdf9d7
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/libraries/jfreechart_1_5_3.xml
@@ -0,0 +1,9 @@
+<component name="libraryTable">
+  <library name="jfreechart-1.5.3">
+    <CLASSES>
+      <root url="jar://$USER_HOME$/Downloads/jfreechart-1.5.3.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/libraries/jsoup_1_17_2.xml b/project（期末项目报告）/CrawlerMain2/.idea/libraries/jsoup_1_17_2.xml
new file mode 100644
index 0000000..90ce41d
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/libraries/jsoup_1_17_2.xml
@@ -0,0 +1,9 @@
+<component name="libraryTable">
+  <library name="jsoup-1.17.2">
+    <CLASSES>
+      <root url="jar://$USER_HOME$/Downloads/jsoup-1.17.2.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/libraries/kumo_core_1_12.xml b/project（期末项目报告）/CrawlerMain2/.idea/libraries/kumo_core_1_12.xml
new file mode 100644
index 0000000..c74069d
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/libraries/kumo_core_1_12.xml
@@ -0,0 +1,9 @@
+<component name="libraryTable">
+  <library name="kumo-core-1.12">
+    <CLASSES>
+      <root url="jar://$USER_HOME$/Downloads/kumo-core-1.12.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/libraries/logback_classic_1_4_11.xml b/project（期末项目报告）/CrawlerMain2/.idea/libraries/logback_classic_1_4_11.xml
new file mode 100644
index 0000000..54a73cf
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/libraries/logback_classic_1_4_11.xml
@@ -0,0 +1,9 @@
+<component name="libraryTable">
+  <library name="logback-classic-1.4.11">
+    <CLASSES>
+      <root url="jar://$USER_HOME$/Downloads/logback-classic-1.4.11.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/libraries/logback_core_1_4_11.xml b/project（期末项目报告）/CrawlerMain2/.idea/libraries/logback_core_1_4_11.xml
new file mode 100644
index 0000000..fbdb3a1
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/libraries/logback_core_1_4_11.xml
@@ -0,0 +1,9 @@
+<component name="libraryTable">
+  <library name="logback-core-1.4.11">
+    <CLASSES>
+      <root url="jar://$USER_HOME$/Downloads/logback-core-1.4.11.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/libraries/slf4j_api_2_0_9.xml b/project（期末项目报告）/CrawlerMain2/.idea/libraries/slf4j_api_2_0_9.xml
new file mode 100644
index 0000000..7c49634
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/libraries/slf4j_api_2_0_9.xml
@@ -0,0 +1,9 @@
+<component name="libraryTable">
+  <library name="slf4j-api-2.0.9">
+    <CLASSES>
+      <root url="jar://$USER_HOME$/Downloads/slf4j-api-2.0.9.jar!/" />
+    </CLASSES>
+    <JAVADOC />
+    <SOURCES />
+  </library>
+</component>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/misc.xml b/project（期末项目报告）/CrawlerMain2/.idea/misc.xml
new file mode 100644
index 0000000..3653b1f
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/misc.xml
@@ -0,0 +1,6 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectRootManager" version="2" project-jdk-name="openjdk-26" project-jdk-type="JavaSDK">
+    <output url="file://$PROJECT_DIR$/out" />
+  </component>
+</project>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/.idea/modules.xml b/project（期末项目报告）/CrawlerMain2/.idea/modules.xml
new file mode 100644
index 0000000..8824534
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/.idea/modules.xml
@@ -0,0 +1,8 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="ProjectModuleManager">
+    <modules>
+      <module fileurl="file://$PROJECT_DIR$/CrawlerMain2.iml" filepath="$PROJECT_DIR$/CrawlerMain2.iml" />
+    </modules>
+  </component>
+</project>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/CrawlerMain2.iml b/project（期末项目报告）/CrawlerMain2/CrawlerMain2.iml
new file mode 100644
index 0000000..b3ea8c0
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/CrawlerMain2.iml
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="true">
+    <exclude-output />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="library" name="jsoup-1.17.2" level="project" />
+    <orderEntry type="library" name="jfreechart-1.5.3" level="project" />
+    <orderEntry type="library" name="jcommon-1.0.24" level="project" />
+    <orderEntry type="library" name="kumo-core-1.12" level="project" />
+    <orderEntry type="library" name="logback-classic-1.4.11" level="project" />
+    <orderEntry type="library" name="logback-core-1.4.11" level="project" />
+    <orderEntry type="library" name="slf4j-api-2.0.9" level="project" />
+    <orderEntry type="library" name="fastjson2-2.0.32" level="project" />
+  </component>
+</module>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/README.md b/project（期末项目报告）/CrawlerMain2/README.md
new file mode 100644
index 0000000..2d7bcc1
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/README.md
@@ -0,0 +1,150 @@
+# Java爬虫综合项目（CLI + MVC + Command + 策略模式）
+## 一、项目简介
+本项目实现了一个命令行菜单式爬虫，支持爬取豆瓣电影、王者荣耀英雄、中国天气网天气等数据。采用MVC、命令模式、策略模式、多层结构，集成日志体系与异常分包，代码结构规范，易于扩展和维护。
+
+## 二、项目结构
+````
+CrawlerMain2
+├── .idea # IDEA 配置文件
+├── out # 编译输出目录
+└── src
+├── command # 命令模式模块
+│ ├── AnalyzeCommand
+│ ├── CommandInvoker
+│ ├── CrawlCommand
+│ ├── HeroCrawlCommand
+│ ├── MovieCrawlCommand
+│ ├── WeatherAnalyzeCommand
+│ └── WeatherCrawlCommand
+├── controller # 控制器与上下文
+│ └── CrawlerContext
+├── crawler # 策略模式爬虫实现
+│ ├── BaseCrawler
+│ ├── Crawler
+│ ├── HeroCrawler
+│ ├── MovieCrawler
+│ └── WeatherCrawler
+├── exception # 自定义异常体系
+├── model # 数据实体类
+│ ├── Hero
+│ ├── Movie
+│ └── Weather
+├── util # 工具类
+│ └── DataUtil # JSON 导入导出、增量去重、文件 IO
+├── view # 视图层（CLI 菜单交互）
+│ └── CrawlerView
+├── CrawlerMain # 程序入口
+└── logback.xml # 日志配置
+├── .gitignore           # Git忽略文件配置
+└── CrawlerMain2.iml     # IDEA模块配置
+````
+---
+## 三、功能介绍
+
+- 命令行菜单，操作简单，支持多种数据源抓取
+- 豆瓣电影、王者荣耀英雄、中国天气网天气数据采集
+- 本地数据存储和分析统计
+- MVC分层、命令模式、策略模式设计，扩展方便
+- try-with-resources安全IO，保证资源释放与数据安全
+- 支持JSON序列化导出（movie.json等）与数据备份
+- 历史数据导入恢复功能，支持断点/回溯操作
+- 增量抓取机制，自动去重，避免重复采集
+- 日志体系与异常处理，项目健壮可追踪
+
+---
+
+## 四、依赖说明
+
+- **JDK 8及以上版本**
+- **jsoup**（网页解析）
+- **slf4j**（日志接口）
+- **logback**（日志实现）
+- **fastjson2**（JSON处理框架，完成数据序列化/反序列化）
+- IO相关类（java.io.File、FileReader、IOException等）实现文件操作
+- 集合类（Set、HashSet、Stream），用于增量抓取与数据去重
+
+> 推荐使用 Maven（pom.xml）管理依赖，添加如下：
+
+```xml
+<dependency>
+  <groupId>org.jsoup</groupId>
+  <artifactId>jsoup</artifactId>
+  <version>1.15.3</version>
+</dependency>
+<dependency>
+<groupId>org.slf4j</groupId>
+<artifactId>slf4j-api</artifactId>
+<version>1.7.36</version>
+</dependency>
+<dependency>
+<groupId>ch.qos.logback</groupId>
+<artifactId>logback-classic</artifactId>
+<version>1.2.11</version>
+</dependency>
+<dependency>
+<groupId>com.alibaba.fastjson2</groupId>
+<artifactId>fastjson2</artifactId>
+<version>2.0.47</version>
+</dependency>
+```
+如果不用 Maven，可手动下载相关 jar 包，并在 IDEA 的 Libraries 中添加。
+
+---
+## 五、运行说明（IntelliJ IDEA）
+1. 用 IDEA 打开项目根目录（包含 src/，logback.xml 等）。
+2. 配置 jsoup、slf4j、logback、fastjson2 依赖（建议用 Maven），或手动添加。
+3. 确认DataUtil.java里的数据存储路径有权限。如必要，修改为本地可用的目录。
+4. 右键CrawlerMain.java，选择“Run”，即可启动项目。
+5. 启动后按照命令行菜单提示输入数字操作，体验各类爬取、分析、导出、恢复功能：
+````
+  1 爬取豆瓣电影
+  2 爬取王者荣耀英雄
+  3 爬取全国天气
+  4 电影、英雄数据分析（仅统计，不存储）
+  5 天气数据分析
+  6 导入历史数据
+  0 退出程序
+````
+程序在抓取时自动生成 JSON 文件 (movie.json)，支持导入历史数据并增量去重。
+
+---
+##  六、主要设计与包说明
+| 包/类            | 功能描述                                   |
+| ---------------- | ------------------------------------------ |
+| exception        | 自定义异常体系                             |
+| command          | 命令模式相关类（命令封装、调度）           |
+| controller       | 控制器上下文（业务流转管理）               |
+| strategy         | 策略接口与具体爬虫实现                     |
+| model            | 数据实体类（电影、英雄、天气）             |
+| util/DataUtil.java | 数据存储、历史恢复、JSON导入导出与增量去重 |
+| view             | CLI菜单视图，交互展示                      |
+---
+## 七、核心工具/新功能说明
+- DataUtil 工具类
+  - 新增方法封装 JSON 序列化与反序列化（依赖 fastjson2 的 JSON 工具类）
+  - 支持文件安全读写（采用 try‑with‑resources 结构，自动释放 IO 资源）
+  - 增量抓取与去重，采用 Set/HashSet 结构自动过滤已采集的对象
+  - 历史数据校验与导入，支持断点恢复（即重新导入 movie.json，恢复进度）
+
+---
+## 八、常见问题
+1. 存储权限报错：请修改 DataUtil 的数据路径为本机有权限的盘符或目录
+2. 依赖缺失或报错：确保 jsoup、slf4j、logback、fastjson2 已配置到项目
+3. JSON导入导出异常：检查 movie.json 是否存在且格式正确，或依赖版本是否兼容
+4. 重复抓取问题：增量去重会自动过滤相同对象，手动修改数据时注意唯一性
+5. 出现 IOException：请检查文件路径、权限是否正常
+
+---
+## 九、扩展与二次开发说明
+- 新增网站爬虫或数据实体：
+  - 新建 strategy 类（如 NewSiteCrawler.java）
+  - 新建 command 类，添加进菜单与调度
+  - 扩展 model 数据实体
+- 导出和导入功能可支持更多格式，只需扩展 DataUtil 工具类即可
+
+---
+## 十、作者信息
+- 姓名：郑诗艺
+- 学号：202401070210
+- 班级：大数据管理与应用2402班
+- 日期：2026.5.24
diff --git a/project（期末项目报告）/CrawlerMain2/src/ClassDiagram.puml b/project（期末项目报告）/CrawlerMain2/src/ClassDiagram.puml
new file mode 100644
index 0000000..40f7cca
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/ClassDiagram.puml
@@ -0,0 +1,100 @@
+@startuml 爬虫项目类图_完整竖版一张图
+' 全局设置：垂直布局、紧凑、单张大图、禁止分页
+skinparam layout topdown
+skinparam ranksep 55
+skinparam nodesep 18
+skinparam dpi 300
+skinparam page {
+    width 100%
+    height 100%
+}
+skinparam class {
+    BackgroundColor #f0f5ff
+    BorderColor #222222
+    ArrowColor #222222
+}
+skinparam package {
+    BackgroundColor #eef4ff
+    BorderColor #222222
+}
+
+' 从上到下依次排列所有包，不拆分
+package model {
+    class Hero {
+        -name: String
+        -type: String
+    }
+    class Movie {
+        -title: String
+        -rate: Double
+    }
+    class Weather {
+        -city: String
+        -temp: String
+    }
+}
+
+package crawler {
+    interface Crawler {
+        +crawl(): List<?>
+    }
+    abstract class BaseCrawler implements Crawler {
+        +crawl(): List<?>
+    }
+    class MovieCrawler extends BaseCrawler
+    class HeroCrawler extends BaseCrawler
+    class WeatherCrawler extends BaseCrawler
+}
+
+package exception {
+    class CrawlerException {
+        +CrawlerException(msg: String)
+    }
+    class NetworkException extends CrawlerException
+    class ParseException extends CrawlerException
+}
+
+package command {
+    interface CrawlCommand {
+        +execute(): void
+    }
+    class CommandInvoker {
+        +invoke(): void
+    }
+    class MovieCrawlCommand implements CrawlCommand
+    class HeroCrawlCommand implements CrawlCommand
+    class WeatherCrawlCommand implements CrawlCommand
+    class AnalyzeCommand implements CrawlCommand
+    class WeatherAnalyzeCommand implements CrawlCommand
+    CommandInvoker -[dashed]-> CrawlCommand : 调度
+}
+
+package controller {
+    class CrawlerContext
+}
+
+package view {
+    class CrawlerView {
+        +showMenu(): void
+    }
+}
+
+package util {
+    class DataUtil {
+        +exportJson(): void
+        +importJson(): List<?>
+        +removeDuplicate(): List<?>
+        +analyzeOnly(): void
+        +analyzeWeatherOnly(): void
+    }
+}
+
+class CrawlerMain
+
+' 依赖关系
+CrawlerMain --> CrawlerContext
+CrawlerMain --> CrawlerView
+CrawlerMain --> DataUtil
+DataUtil -[dashed]-> ParseException : throws
+
+@enduml
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/CrawlerMain.java b/project（期末项目报告）/CrawlerMain2/src/CrawlerMain.java
new file mode 100644
index 0000000..9e159f9
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/CrawlerMain.java
@@ -0,0 +1,101 @@
+import command.AnalyzeCommand;
+import command.HeroCrawlCommand;
+import command.MovieCrawlCommand;
+import command.WeatherAnalyzeCommand;
+import command.WeatherCrawlCommand;
+import command.CommandInvoker;
+import controller.CrawlerContext;
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+import model.Hero;
+import model.Movie;
+import model.Weather;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import util.DataUtil;
+import view.CrawlerView;
+
+import java.io.IOException;
+import java.util.List;
+
+// ===================== 10. 主程序入口 =====================
+public class CrawlerMain {
+    private static final Logger logger = LoggerFactory.getLogger(CrawlerMain.class);
+
+    public static void main(String[] args) {
+        logger.info("===== 爬虫程序启动（CLI+MVC+Command+策略模式） =====");
+        CrawlerView view = new CrawlerView();
+        CrawlerContext context = new CrawlerContext();
+        CommandInvoker invoker = new CommandInvoker();
+
+        MovieCrawlCommand movieCmd = new MovieCrawlCommand(context);
+        HeroCrawlCommand heroCmd = new HeroCrawlCommand(context);
+        WeatherCrawlCommand weatherCmd = new WeatherCrawlCommand(context);
+        AnalyzeCommand analyzeCmd = new AnalyzeCommand(movieCmd, heroCmd);
+        WeatherAnalyzeCommand weatherAnalyzeCmd = new WeatherAnalyzeCommand(weatherCmd);
+
+        DataUtil.initFolder();
+
+        while (true) {
+            try {
+                view.showMenu();
+                int op = view.getInput();
+                switch (op) {
+                    case 1:
+                        invoker.runCommand(movieCmd);
+                        view.showMsg("电影爬取完成");
+                        break;
+                    case 2:
+                        invoker.runCommand(heroCmd);
+                        view.showMsg("英雄爬取完成");
+                        break;
+                    case 3:
+                        invoker.runCommand(weatherCmd);
+                        view.showMsg("天气爬取完成");
+                        break;
+                    case 4:
+                        analyzeCmd.execute();
+                        break;
+                    case 5:
+                        weatherAnalyzeCmd.execute();
+                        break;
+                    // 第6项：导入历史数据（无fileName:，无报错）
+                    case 6:
+                        try {
+                            List<Movie> movieList = DataUtil.importJson("movie.json", Movie.class);
+                            List<Hero> heroList = DataUtil.importJson("hero.json", Hero.class);
+                            List<Weather> weatherList = DataUtil.importJson("weather.json", Weather.class);
+                            view.showMsg("✅ 历史数据导入成功！");
+                            view.showMsg("电影：" + movieList.size() + " 条");
+                            view.showMsg("英雄：" + heroList.size() + " 条");
+                            view.showMsg("天气：" + weatherList.size() + " 条");
+                        } catch (IOException e) {
+                            view.showMsg("导入失败：" + e.getMessage());
+                        }
+                        break;
+                    case 0:
+                        view.showMsg("程序退出");
+                        System.exit(0);
+                        break;
+                    default:
+                        view.showMsg("指令错误，请重新输入");
+                }
+            } catch (NumberFormatException e) {
+                view.showMsg("请输入数字！");
+            } catch (NetworkException e) {
+                logger.error("网络异常：", e);
+                view.showMsg("网络异常：" + e.getMessage());
+            } catch (ParseException e) {
+                logger.error("解析异常：", e);
+                view.showMsg("解析异常：" + e.getMessage());
+            } catch (CrawlerException e) {
+                logger.error("爬虫异常：", e);
+                view.showMsg("爬虫异常：" + e.getMessage());
+            }catch (IOException e) {
+                logger.error("IO异常：", e);
+                view.showMsg("IO异常：" + e.getMessage());
+            }
+        }
+    }
+}
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/command/AnalyzeCommand.java b/project（期末项目报告）/CrawlerMain2/src/command/AnalyzeCommand.java
new file mode 100644
index 0000000..35bc41b
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/command/AnalyzeCommand.java
@@ -0,0 +1,22 @@
+package command;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import util.DataUtil;
+
+public class AnalyzeCommand implements CrawlCommand {
+    private final MovieCrawlCommand movieCmd;
+    private final HeroCrawlCommand heroCmd;
+    private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class);
+
+    public AnalyzeCommand(MovieCrawlCommand movieCmd, HeroCrawlCommand heroCmd) {
+        this.movieCmd = movieCmd;
+        this.heroCmd = heroCmd;
+    }
+
+    @Override
+    public void execute() {
+        DataUtil.analyzeOnly(movieCmd.getResult(), heroCmd.getResult());
+        logger.info("电影&英雄数据分析命令执行完成（仅统计）");
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/command/CommandInvoker.java b/project（期末项目报告）/CrawlerMain2/src/command/CommandInvoker.java
new file mode 100644
index 0000000..8bda699
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/command/CommandInvoker.java
@@ -0,0 +1,13 @@
+package command;
+
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+
+import java.io.IOException;
+
+public class CommandInvoker {
+    public void runCommand(CrawlCommand cmd) throws ParseException, NetworkException, CrawlerException, IOException {
+        cmd.execute();
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/command/CrawlCommand.java b/project（期末项目报告）/CrawlerMain2/src/command/CrawlCommand.java
new file mode 100644
index 0000000..bfcd5a2
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/command/CrawlCommand.java
@@ -0,0 +1,12 @@
+package command;
+
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+
+import java.io.IOException;
+
+// ===================== 8. Command模式 =====================
+public interface CrawlCommand {
+    void execute() throws ParseException, NetworkException, CrawlerException, IOException;
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/command/HeroCrawlCommand.java b/project（期末项目报告）/CrawlerMain2/src/command/HeroCrawlCommand.java
new file mode 100644
index 0000000..4125917
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/command/HeroCrawlCommand.java
@@ -0,0 +1,54 @@
+package command;
+
+import controller.CrawlerContext;
+import crawler.HeroCrawler;
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+import model.Hero;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import util.DataUtil;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class HeroCrawlCommand implements CrawlCommand {
+    private final CrawlerContext context;
+    private List<Hero> heroList;
+    private List<Hero> oldHeroList;
+
+    private static final Logger logger = LoggerFactory.getLogger(HeroCrawlCommand.class);
+
+    public HeroCrawlCommand(CrawlerContext context) {
+        this.context = context;
+    }
+
+    @Override
+    public void execute() throws ParseException, NetworkException, CrawlerException, IOException {
+        oldHeroList = DataUtil.importJson("hero.json", Hero.class);
+        logger.info("导入历史英雄数据：{}条", oldHeroList.size());
+
+        // 第四点：增量抓取，过滤重复英雄
+        Set<String> existNames = DataUtil.getExistHeroNames("hero.json");
+
+        context.setCrawlerStrategy(new HeroCrawler());
+        heroList = (List<Hero>) context.executeCrawl();
+
+        heroList = heroList.stream()
+                .filter(hero -> !existNames.contains(hero.getName()))
+                .collect(Collectors.toList());
+
+        heroList.addAll(oldHeroList);
+
+        DataUtil.addAll("英雄数据.txt", heroList);
+        DataUtil.exportJson("hero.json", heroList);
+        logger.info("英雄爬取完成，本次新增：{}条", heroList.size() - oldHeroList.size());
+    }
+
+    public List<Hero> getResult() {
+        return heroList;
+    }
+}
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/command/MovieCrawlCommand.java b/project（期末项目报告）/CrawlerMain2/src/command/MovieCrawlCommand.java
new file mode 100644
index 0000000..ceef898
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/command/MovieCrawlCommand.java
@@ -0,0 +1,57 @@
+package command;
+
+import controller.CrawlerContext;
+import crawler.MovieCrawler;
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+import model.Movie;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import util.DataUtil;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class MovieCrawlCommand implements CrawlCommand {
+    private final CrawlerContext context;
+    private List<Movie> movieList;
+    private List<Movie> oldMovieList;
+
+    private static final Logger logger = LoggerFactory.getLogger(MovieCrawlCommand.class);
+
+    public MovieCrawlCommand(CrawlerContext context) {
+        this.context = context;
+    }
+
+    @Override
+    public void execute() throws ParseException, NetworkException, CrawlerException, IOException {
+        // 第三点：导入历史数据
+        oldMovieList = DataUtil.importJson("movie.json", Movie.class);
+        logger.info("导入历史电影数据：{}条", oldMovieList.size());
+
+        // 第四点：获取已存在的电影，增量抓取，避免重复
+        Set<String> existTitles = DataUtil.getExistMovieTitles("movie.json");
+
+        context.setCrawlerStrategy(new MovieCrawler());
+        movieList = (List<Movie>) context.executeCrawl();
+
+        // 过滤掉已经存在的电影，只保留新数据
+        movieList = movieList.stream()
+                .filter(movie -> !existTitles.contains(movie.getTitle()))
+                .collect(Collectors.toList());
+
+        // 合并：新数据 + 历史数据
+        movieList.addAll(oldMovieList);
+
+        DataUtil.addAll("电影数据.txt", movieList);
+        DataUtil.exportJson("movie.json", movieList);
+        logger.info("电影爬取完成，本次新增：{}条", movieList.size() - oldMovieList.size());
+    }
+
+    public List<Movie> getResult() {
+        return movieList;
+    }
+}
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/command/WeatherAnalyzeCommand.java b/project（期末项目报告）/CrawlerMain2/src/command/WeatherAnalyzeCommand.java
new file mode 100644
index 0000000..1776add
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/command/WeatherAnalyzeCommand.java
@@ -0,0 +1,20 @@
+package command;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import util.DataUtil;
+
+public class WeatherAnalyzeCommand implements CrawlCommand {
+    private final WeatherCrawlCommand weatherCmd;
+    private static final Logger logger = LoggerFactory.getLogger(WeatherAnalyzeCommand.class);
+
+    public WeatherAnalyzeCommand(WeatherCrawlCommand weatherCmd) {
+        this.weatherCmd = weatherCmd;
+    }
+
+    @Override
+    public void execute() {
+        DataUtil.analyzeWeatherOnly(weatherCmd.getResult());
+        logger.info("天气数据分析命令执行完成（仅统计）");
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/command/WeatherCrawlCommand.java b/project（期末项目报告）/CrawlerMain2/src/command/WeatherCrawlCommand.java
new file mode 100644
index 0000000..08bdd3e
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/command/WeatherCrawlCommand.java
@@ -0,0 +1,54 @@
+package command;
+
+import controller.CrawlerContext;
+import crawler.WeatherCrawler;
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+import model.Weather;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import util.DataUtil;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+public class WeatherCrawlCommand implements CrawlCommand {
+    private final CrawlerContext context;
+    private List<Weather> weatherList;
+    private List<Weather> oldWeatherList;
+
+    private static final Logger logger = LoggerFactory.getLogger(WeatherCrawlCommand.class);
+
+    public WeatherCrawlCommand(CrawlerContext context) {
+        this.context = context;
+    }
+
+    @Override
+    public void execute() throws ParseException, NetworkException, CrawlerException, IOException {
+        oldWeatherList = DataUtil.importJson("weather.json", Weather.class);
+        logger.info("导入历史天气数据：{}条", oldWeatherList.size());
+
+        // 第四点：增量抓取，过滤重复城市
+        Set<String> existCities = DataUtil.getExistWeatherCities("weather.json");
+
+        context.setCrawlerStrategy(new WeatherCrawler());
+        weatherList = (List<Weather>) context.executeCrawl();
+
+        weatherList = weatherList.stream()
+                .filter(weather -> !existCities.contains(weather.getCity()))
+                .collect(Collectors.toList());
+
+        weatherList.addAll(oldWeatherList);
+
+        DataUtil.addAll("天气数据.txt", weatherList);
+        DataUtil.exportJson("weather.json", weatherList);
+        logger.info("天气爬取完成，本次新增：{}条", weatherList.size() - oldWeatherList.size());
+    }
+
+    public List<Weather> getResult() {
+        return weatherList;
+    }
+}
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/controller/CrawlerContext.java b/project（期末项目报告）/CrawlerMain2/src/controller/CrawlerContext.java
new file mode 100644
index 0000000..fbc7975
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/controller/CrawlerContext.java
@@ -0,0 +1,28 @@
+package controller;
+
+import crawler.Crawler;
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+// ===================== 6. 策略上下文 =====================
+public class CrawlerContext {
+    private Crawler crawlerStrategy;
+    private static final Logger logger = LoggerFactory.getLogger(CrawlerContext.class);
+
+    public void setCrawlerStrategy(Crawler crawlerStrategy) {
+        this.crawlerStrategy = crawlerStrategy;
+    }
+
+    public List<?> executeCrawl() throws ParseException, NetworkException, CrawlerException {
+        if (crawlerStrategy == null) {
+            logger.error("未设置爬取策略");
+            throw new CrawlerException("爬取策略未配置");
+        }
+        return crawlerStrategy.startCrawl();
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/crawler/BaseCrawler.java b/project（期末项目报告）/CrawlerMain2/src/crawler/BaseCrawler.java
new file mode 100644
index 0000000..461be03
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/crawler/BaseCrawler.java
@@ -0,0 +1,64 @@
+package crawler;
+
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+import org.jsoup.Connection;
+import org.jsoup.HttpStatusException;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+public abstract class BaseCrawler<T> implements Crawler {
+    protected final String baseUrl;
+    private static final Logger logger = LoggerFactory.getLogger(BaseCrawler.class);
+
+    public BaseCrawler(String baseUrl) {
+        this.baseUrl = baseUrl;
+    }
+
+    // 定义和子类匹配的抽象方法签名
+    public abstract List<T> startCrawl() throws ParseException, NetworkException, CrawlerException;
+
+    // 统一请求页面方法（加固防拦截）
+    public Document getPage(String url) throws NetworkException {
+        String userAgent = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36";
+        String cookie = "ptui_loginuin=; pgv_pvid=123456; RK=randomtest; _qpsvr_localtest=; uin=;";
+
+        int retry = 3;
+        while (retry > 0) {
+            try {
+                Connection conn = Jsoup.connect(url)
+                        .userAgent(userAgent)
+                        .header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
+                        .header("Accept-Language", "zh-CN,zh;q=0.9")
+                        .header("Referer", url.contains("douban") ? "https://movie.douban.com/" : "https://pvp.qq.com/")
+                        .header("Connection", "keep-alive")
+                        .cookie("Cookie", cookie)
+                        .timeout(15000)
+                        .followRedirects(true);
+
+                Document doc = conn.get();
+                logger.info("第{}次请求页面：{}", 4 - retry, url);
+                return doc;
+            } catch (HttpStatusException e) {
+                retry--;
+                logger.error("请求页面失败，剩余重试次数：{}", retry, e);
+                if (retry <= 0) {
+                    throw new NetworkException("页面请求彻底失败：" + url, e);
+                }
+                try {
+                    Thread.sleep(2000);
+                } catch (InterruptedException ie) {
+                    Thread.currentThread().interrupt();
+                }
+            } catch (Exception e) {
+                throw new NetworkException("页面请求异常：" + url, e);
+            }
+        }
+        throw new NetworkException("请求超时：" + url);
+    }
+}
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/crawler/Crawler.java b/project（期末项目报告）/CrawlerMain2/src/crawler/Crawler.java
new file mode 100644
index 0000000..de0c2a9
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/crawler/Crawler.java
@@ -0,0 +1,12 @@
+package crawler;
+
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+
+import java.util.List;
+
+// ===================== 2. 策略模式：抽象策略接口 =====================
+public interface Crawler {
+    List<?> startCrawl() throws ParseException, NetworkException, CrawlerException;
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/crawler/HeroCrawler.java b/project（期末项目报告）/CrawlerMain2/src/crawler/HeroCrawler.java
new file mode 100644
index 0000000..e304132
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/crawler/HeroCrawler.java
@@ -0,0 +1,43 @@
+package crawler;
+
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+import model.Hero;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class HeroCrawler extends BaseCrawler {
+    private static final Logger logger = LoggerFactory.getLogger(HeroCrawler.class);
+
+    public HeroCrawler() {
+        super("https://pvp.qq.com/web201605/herolist.shtml");
+    }
+
+    @Override
+    public List<Hero> startCrawl() throws ParseException, NetworkException, CrawlerException {
+        List<Hero> list = new ArrayList<>();
+        logger.info("开始爬取王者荣耀英雄数据");
+        try {
+            Document doc = getPage(baseUrl);
+            Elements heros = doc.select("ul.herolist li a");
+            if (heros.isEmpty()) throw new ParseException("页面解析失败：未找到英雄列表项");
+            for (Element h : heros) {
+                String name = h.text().trim();
+                if (!name.isEmpty()) list.add(new Hero(name));
+            }
+            logger.info("英雄爬取完成，共{}条数据", list.size());
+        } catch (NetworkException e) {
+            throw e;
+        } catch (Exception e) {
+            throw new ParseException("英雄数据解析异常", e);
+        }
+        return list;
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/crawler/MovieCrawler.java b/project（期末项目报告）/CrawlerMain2/src/crawler/MovieCrawler.java
new file mode 100644
index 0000000..b58611a
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/crawler/MovieCrawler.java
@@ -0,0 +1,56 @@
+package crawler;
+
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+import model.Movie;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+// ===================== 5. 具体策略爬虫类 =====================
+public class MovieCrawler extends BaseCrawler {
+    private static final Logger logger = LoggerFactory.getLogger(MovieCrawler.class);
+
+    public MovieCrawler() {
+        super("https://movie.douban.com/top250");
+    }
+
+    @Override
+    public List<Movie> startCrawl() throws ParseException, NetworkException, CrawlerException {
+        List<Movie> list = new ArrayList<>();
+        logger.info("开始爬取豆瓣电影Top250");
+        try {
+            for (int i = 0; i < 250; i += 25) {
+                Document doc = getPage(baseUrl + "?start=" + i);
+                Elements items = doc.select(".item");
+                if (items.isEmpty()) throw new ParseException("页面解析失败：未找到电影列表项");
+                for (Element e : items) {
+                    Element titleEle = e.select(".title").first();
+                    Element ratingEle = e.select(".rating_num").first();
+                    if (titleEle == null || ratingEle == null) {
+                        logger.warn("单条电影数据解析失败，跳过");
+                        continue;
+                    }
+                    String title = titleEle.text().split("/")[0].trim();
+                    String rating = ratingEle.text();
+                    list.add(new Movie(title, rating));
+                }
+                Thread.sleep(1000);
+            }
+            logger.info("豆瓣电影爬取完成，共{}条数据", list.size());
+        } catch (NetworkException e) {
+            throw e;
+        } catch (InterruptedException e) {
+            throw new CrawlerException("爬取被中断", e);
+        } catch (Exception e) {
+            throw new ParseException("电影数据解析异常", e);
+        }
+        return list;
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/crawler/WeatherCrawler.java b/project（期末项目报告）/CrawlerMain2/src/crawler/WeatherCrawler.java
new file mode 100644
index 0000000..5a59b8e
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/crawler/WeatherCrawler.java
@@ -0,0 +1,74 @@
+package crawler;
+
+import exception.CrawlerException;
+import exception.NetworkException;
+import exception.ParseException;
+import model.Weather;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class WeatherCrawler extends BaseCrawler {
+    private static final Logger logger = LoggerFactory.getLogger(WeatherCrawler.class);
+    private static final String[][] cities = {
+            {"北京", "北京", "101010100"}, {"上海", "上海", "101020100"}, {"天津", "天津", "101030100"}, {"重庆", "重庆", "101040100"},
+            {"河北", "石家庄", "101090101"}, {"山西", "太原", "101100101"}, {"辽宁", "沈阳", "101070101"}, {"吉林", "长春", "101060101"},
+            {"黑龙江", "哈尔滨", "101050101"}, {"江苏", "南京", "101190101"}, {"浙江", "杭州", "101210101"}, {"安徽", "合肥", "101220101"},
+            {"福建", "福州", "101230101"}, {"江西", "南昌", "101240101"}, {"山东", "济南", "101120101"}, {"河南", "郑州", "101180101"},
+            {"湖北", "武汉", "101200101"}, {"湖南", "长沙", "101250101"}, {"广东", "广州", "101280101"}, {"海南", "海口", "101310101"},
+            {"四川", "成都", "101270101"}, {"贵州", "贵阳", "101260101"}, {"云南", "昆明", "101290101"}, {"陕西", "西安", "101110101"},
+            {"甘肃", "兰州", "101160101"}, {"青海", "西宁", "101150101"}, {"内蒙古", "呼和浩特", "101080101"}, {"广西", "南宁", "101300101"},
+            {"西藏", "拉萨", "101140101"}, {"宁夏", "银川", "101170101"}, {"新疆", "乌鲁木齐", "101130101"},
+            {"香港", "香港", "101320101"}, {"澳门", "澳门", "101330101"}, {"台湾", "台北", "101340101"}
+    };
+
+    public WeatherCrawler() {
+        super("https://www.weather.com.cn/weather/");
+    }
+
+    @Override
+    public List<Weather> startCrawl() throws ParseException, NetworkException, CrawlerException {
+        List<Weather> list = new ArrayList<>();
+        logger.info("开始爬取全国城市实时温度数据");
+        try {
+            for (String[] city : cities) {
+                String province = city[0];
+                String cityName = city[1];
+                String code = city[2];
+                Document doc = getPage(baseUrl + code + ".shtml");
+
+                // 取7天预报【第1个li】=今日实时温度，全页面通用、绝不空指针
+                Element today = doc.select("ul.t li").first();
+                if (today == null) throw new ParseException("实时天气解析失败：" + cityName);
+
+                String tempStr = today.select(".tem").text();
+                String weaStr = today.select(".wea").text();
+
+                // 拆分：最高温/最低温，取**最高温作为实时温度**
+                String realTemp;
+                if(tempStr.contains("/")){
+                    realTemp = tempStr.split("/")[0];
+                }else if(tempStr.contains("~")){
+                    realTemp = tempStr.split("~")[0];
+                }else{
+                    realTemp = tempStr;
+                }
+
+                list.add(new Weather(province, cityName, weaStr, realTemp));
+                Thread.sleep(500);
+            }
+            logger.info("实时天气爬取完成，共{}条数据", list.size());
+        } catch (NetworkException e) {
+            throw e;
+        } catch (InterruptedException e) {
+            throw new CrawlerException("爬取线程被中断", e);
+        } catch (Exception e) {
+            throw new ParseException("天气数据解析异常", e);
+        }
+        return list;
+    }
+}
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/exception/CrawlerException.java b/project（期末项目报告）/CrawlerMain2/src/exception/CrawlerException.java
new file mode 100644
index 0000000..628602b
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/exception/CrawlerException.java
@@ -0,0 +1,10 @@
+package exception;
+
+public class CrawlerException extends Exception {
+    public CrawlerException(String message) {
+        super(message);
+    }
+    public CrawlerException(String message, Throwable cause) {
+        super(message, cause);
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/exception/NetworkException.java b/project（期末项目报告）/CrawlerMain2/src/exception/NetworkException.java
new file mode 100644
index 0000000..aa62667
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/exception/NetworkException.java
@@ -0,0 +1,10 @@
+package exception;
+
+public class NetworkException extends CrawlerException {
+    public NetworkException(String message) {
+        super(message);
+    }
+    public NetworkException(String message, Throwable cause) {
+        super(message, cause);
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/exception/ParseException.java b/project（期末项目报告）/CrawlerMain2/src/exception/ParseException.java
new file mode 100644
index 0000000..6092fa4
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/exception/ParseException.java
@@ -0,0 +1,10 @@
+package exception;
+
+public class ParseException extends CrawlerException {
+    public ParseException(String message) {
+        super(message);
+    }
+    public ParseException(String message, Throwable cause) {
+        super(message, cause);
+    }
+}
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/logback.xml b/project（期末项目报告）/CrawlerMain2/src/logback.xml
new file mode 100644
index 0000000..0c498dc
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/logback.xml
@@ -0,0 +1,13 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<configuration>
+    <appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
+        <encoder>
+            <!-- 这里就是你要的：时间+日志级别+类名+信息 -->
+            <pattern>%d{HH:mm:ss.SSS} [%level] %logger - %msg%n</pattern>
+            <charset>UTF-8</charset>
+        </encoder>
+    </appender>
+    <root level="INFO">
+        <appender-ref ref="CONSOLE"/>
+    </root>
+</configuration>
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/model/Hero.java b/project（期末项目报告）/CrawlerMain2/src/model/Hero.java
new file mode 100644
index 0000000..69d82bd
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/model/Hero.java
@@ -0,0 +1,18 @@
+package model;
+
+public class Hero {
+    private final String name;
+
+    public Hero(String name) {
+        this.name = name;
+    }
+
+    public String getName() {
+        return name;
+    }
+
+    @Override
+    public String toString() {
+        return "英雄：" + name;
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/model/Movie.java b/project（期末项目报告）/CrawlerMain2/src/model/Movie.java
new file mode 100644
index 0000000..3d40e7f
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/model/Movie.java
@@ -0,0 +1,33 @@
+package model;
+
+// ===================== 4. MVC‑Model：实体类 =====================
+public class Movie {
+    private final String title;
+    private final String rating;
+
+    public Movie(String title, String rating) {
+        this.title = title;
+        this.rating = rating;
+    }
+
+    public String getTitle() {
+        return title;
+    }
+
+    public double getRatingDouble() {
+        try {
+            return Double.parseDouble(rating);
+        } catch (NumberFormatException e) {
+            throw new IllegalArgumentException("评分格式错误：" + rating, e);
+        }
+    }
+
+    public String getRating() {
+        return rating;
+    }
+
+    @Override
+    public String toString() {
+        return "电影：《" + title + "》 | 评分：" + rating;
+    }
+}
diff --git a/project（期末项目报告）/CrawlerMain2/src/model/Weather.java b/project（期末项目报告）/CrawlerMain2/src/model/Weather.java
new file mode 100644
index 0000000..d8d0c79
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/model/Weather.java
@@ -0,0 +1,34 @@
+package model;
+
+public class Weather {
+    private final String province;
+    private final String city;
+    private final String condition;
+    private final String temperature; // 实时温度
+
+    public Weather(String province, String city, String condition, String temperature) {
+        this.province = province;
+        this.city = city;
+        this.condition = condition;
+        this.temperature = temperature;
+    }
+
+    public String getProvince() { return province; }
+    public String getCity() { return city; }
+    public String getCondition() { return condition; }
+    public String getTemperature() { return temperature; }
+
+    // 用于排序，提取温度数字
+    public int getTempNum() {
+        try {
+            return Integer.parseInt(temperature.replaceAll("[^0-9]", ""));
+        } catch (Exception e) {
+            return Integer.MIN_VALUE;
+        }
+    }
+
+    @Override
+    public String toString() {
+        return "省份：" + province + " | 城市：" + city + " | 天气：" + condition + " | 实时温度：" + temperature;
+    }
+}
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/util/DataUtil.java b/project（期末项目报告）/CrawlerMain2/src/util/DataUtil.java
new file mode 100644
index 0000000..597f35f
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/util/DataUtil.java
@@ -0,0 +1,195 @@
+package util;
+
+import com.alibaba.fastjson2.JSON;
+import com.alibaba.fastjson2.JSONWriter;
+import exception.ParseException;
+import model.Hero;
+import model.Movie;
+import model.Weather;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileReader;
+import java.io.FileWriter;
+import java.io.IOException;
+import java.util.Collections;
+import java.util.List;
+import java.util.Map;
+import java.util.stream.Collectors;
+import java.util.HashSet;
+import java.util.Set;
+
+// ===================== 7. MVC‑Repository：数据仓库 =====================
+public final class DataUtil {
+    private static final String PATH = "D:\\Java爬虫\\";
+    private static final Logger logger = LoggerFactory.getLogger(DataUtil.class);
+
+    private DataUtil() {
+    }
+
+    public static void initFolder() {
+        File dir = new File(PATH);
+        if (!dir.exists()) {
+            boolean created = dir.mkdirs();
+            if (created) logger.info("创建目录：{}", PATH);
+            else logger.error("目录创建失败：{}", PATH);
+        }
+    }
+
+    // ✅ 第一点：try‑with‑resources 安全资源管理
+    public static void saveText(String fileName, String content) throws IOException {
+        if (fileName == null || fileName.isBlank()) throw new IllegalArgumentException("文件名不能为空");
+        if (content == null || content.isBlank()) {
+            logger.warn("保存文件内容为空，跳过：{}", fileName);
+            return;
+        }
+        try (FileWriter fw = new FileWriter(PATH + fileName)) {
+            fw.write(content);
+        }
+        logger.info("文件保存成功：{}", fileName);
+    }
+
+    public static <T> void addAll(String fileName, List<T> dataList) throws IOException {
+        if (dataList == null) throw new NullPointerException("待保存数据列表不能为null");
+        if (dataList.isEmpty()) {
+            logger.warn("批量数据为空，跳过保存：{}", fileName);
+            return;
+        }
+        StringBuilder sb = new StringBuilder();
+        dataList.forEach(item -> {
+            if (item != null) sb.append(item).append("\r\n");
+        });
+        saveText(fileName, sb.toString());
+    }
+
+    // ✅ 第二点：JSON 序列化持久化导出
+    public static <T> void exportJson(String fileName, List<T> dataList) throws IOException {
+        if (dataList == null || dataList.isEmpty()) {
+            logger.warn("JSON导出：数据为空，跳过");
+            return;
+        }
+        try (FileWriter fw = new FileWriter(PATH + fileName)) {
+            String jsonStr = JSON.toJSONString(dataList, JSONWriter.Feature.PrettyFormat);
+            fw.write(jsonStr);
+        }
+        logger.info("✅ JSON文件导出成功：{}", fileName);
+    }
+
+    // ✅ 第三点：数据导入恢复会话（已加入解析异常捕获）
+    public static <T> List<T> importJson(String fileName, Class<T> clazz) throws IOException, ParseException {
+        File file = new File(PATH + fileName);
+        if (!file.exists()) {
+            logger.warn("导入文件不存在：{}", fileName);
+            return Collections.emptyList();
+        }
+        try (FileReader fr = new FileReader(file)) {
+            char[] buf = new char[(int) file.length()];
+            fr.read(buf);
+            String jsonStr = new String(buf);
+            try {
+                return JSON.parseArray(jsonStr, clazz);
+            } catch (com.alibaba.fastjson2.JSONException e) {
+                logger.error("JSON格式解析错误：{}", e.getMessage());
+                throw new ParseException("数据格式错误，解析失败：" + e.getMessage());
+            }
+        }
+    }
+
+    // ✅ 第四点：增量抓取 - 获取已爬取电影标题集合（去重用）
+    public static Set<String> getExistMovieTitles(String fileName) {
+        try {
+            List<Movie> oldList = importJson(fileName, Movie.class);
+            return oldList.stream().map(Movie::getTitle).collect(Collectors.toSet());
+        } catch (IOException | ParseException e) {
+            logger.warn("读取历史电影数据失败，将全量抓取", e);
+            return new HashSet<>();
+        }
+    }
+
+    // ✅ 第四点：增量抓取 - 获取已爬取英雄名称集合（去重用）
+    public static Set<String> getExistHeroNames(String fileName) {
+        try {
+            List<Hero> oldList = importJson(fileName, Hero.class);
+            return oldList.stream().map(Hero::getName).collect(Collectors.toSet());
+        } catch (IOException | ParseException e) {
+            logger.warn("读取历史英雄数据失败，将全量抓取", e);
+            return new HashSet<>();
+        }
+    }
+
+    // ✅ 第四点：增量抓取 - 获取已爬取城市名集合（天气去重用）
+    public static Set<String> getExistWeatherCities(String fileName) {
+        try {
+            List<Weather> oldList = importJson(fileName, Weather.class);
+            return oldList.stream().map(Weather::getCity).collect(Collectors.toSet());
+        } catch (IOException | ParseException e) {
+            logger.warn("读取历史天气数据失败，将全量抓取", e);
+            return new HashSet<>();
+        }
+    }
+
+    public static void analyzeOnly(List<Movie> movieList, List<Hero> heroList) {
+        if (movieList == null || heroList == null) return;
+        logger.info("===== 电影&英雄数据分析（仅统计，不存储） =====");
+        double sum = 0;
+        int validCount = 0;
+        for (Movie movie : movieList) {
+            try {
+                sum += movie.getRatingDouble();
+                validCount++;
+            } catch (IllegalArgumentException e) {
+                logger.warn("电影评分解析失败，跳过：{}", movie.getTitle(), e);
+            }
+        }
+        if (validCount == 0) {
+            logger.error("无有效电影评分数据");
+            return;
+        }
+        double avg = sum / validCount;
+        System.out.println("电影平均评分：" + String.format("%.2f", avg));
+        long highScoreCount = movieList.stream()
+                .filter(m -> {
+                    try {
+                        return m.getRatingDouble() >= 8.5;
+                    } catch (IllegalArgumentException e) {
+                        return false;
+                    }
+                })
+                .count();
+        System.out.println("8.5分以上电影数量：" + highScoreCount);
+        System.out.println("英雄总数量：" + heroList.size());
+        logger.info("电影&英雄数据分析结束");
+    }
+
+    public static void analyzeWeatherOnly(List<Weather> weatherList) {
+        if (weatherList == null || weatherList.isEmpty()) {
+            logger.warn("天气数据为空，无法统计");
+            return;
+        }
+        logger.info("===== 全国天气数据分析（仅统计，不存储） =====");
+
+        Map<String, Long> weatherTypeCount = weatherList.stream()
+                .collect(Collectors.groupingBy(Weather::getCondition, Collectors.counting()));
+        System.out.println("\n各天气类型数量：");
+        weatherTypeCount.forEach((type, count) -> System.out.println("  " + type + "：" + count + "个"));
+
+        List<Integer> temps = weatherList.stream()
+                .map(Weather::getTempNum)
+                .filter(t -> t != Integer.MIN_VALUE)
+                .collect(Collectors.toList());
+
+        if (!temps.isEmpty()) {
+            int maxTemp = Collections.max(temps);
+            int minTemp = Collections.min(temps);
+            double avgTemp = temps.stream().mapToInt(Integer::intValue).average().orElse(0);
+            System.out.println("\n温度统计（最高温）：");
+            System.out.println("  最高温度：" + maxTemp + "℃");
+            System.out.println("  最低温度：" + minTemp + "℃");
+            System.out.println("  平均温度：" + String.format("%.1f", avgTemp) + "℃");
+        } else {
+            System.out.println("无有效温度数据");
+        }
+        logger.info("天气数据分析结束");
+    }
+}
\ No newline at end of file
diff --git a/project（期末项目报告）/CrawlerMain2/src/view/CrawlerView.java b/project（期末项目报告）/CrawlerMain2/src/view/CrawlerView.java
new file mode 100644
index 0000000..01584cd
--- /dev/null
+++ b/project（期末项目报告）/CrawlerMain2/src/view/CrawlerView.java
@@ -0,0 +1,33 @@
+package view;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Scanner;
+
+// ===================== 9. MVC‑View：视图层 =====================
+public class CrawlerView {
+    private static final Logger logger = LoggerFactory.getLogger(CrawlerView.class);
+    private final Scanner scanner = new Scanner(System.in);
+
+    public void showMenu() {
+        System.out.println("\n===== 爬虫CLI交互菜单 =====");
+        System.out.println("1. 爬取豆瓣电影");
+        System.out.println("2. 爬取王者荣耀英雄");
+        System.out.println("3. 爬取全国天气");
+        System.out.println("4. 电影&英雄数据分析（仅统计，不存储）");
+        System.out.println("5. 天气数据分析（天气类型、最高/最低/平均温）");
+        System.out.println("6. 导入历史数据");
+        System.out.println("0. 退出程序");
+        System.out.print("请输入操作指令：");
+    }
+
+    public int getInput() {
+        return Integer.parseInt(scanner.nextLine());
+    }
+
+    public void showMsg(String msg) {
+        System.out.println(msg);
+        logger.info(msg);
+    }
+}