From e29de1d130ec4947a0551adb08997e0a0d963c9a Mon Sep 17 00:00:00 2001 From: Linyating <2704777949@qq.com> Date: Thu, 21 May 2026 10:27:57 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20'W11'?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- W11/.classpath | 7 +++ W11/AnsiColorUtil.java | 48 ++++++++++++++++ W11/BaseCommand.java | 31 ++++++++++ W11/BaseCrawler.java | 35 ++++++++++++ W11/README.md | 126 +++++++++++++++++++++++++++++++++++++++++ W11/logback.xml | 22 +++++++ 6 files changed, 269 insertions(+) create mode 100644 W11/.classpath create mode 100644 W11/AnsiColorUtil.java create mode 100644 W11/BaseCommand.java create mode 100644 W11/BaseCrawler.java create mode 100644 W11/README.md create mode 100644 W11/logback.xml diff --git a/W11/.classpath b/W11/.classpath new file mode 100644 index 0000000..a325f62 --- /dev/null +++ b/W11/.classpath @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/W11/AnsiColorUtil.java b/W11/AnsiColorUtil.java new file mode 100644 index 0000000..1030738 --- /dev/null +++ b/W11/AnsiColorUtil.java @@ -0,0 +1,48 @@ +package com.crawler.common; + +public final class AnsiColorUtil { + private AnsiColorUtil() {} + + public static final String RESET = "\u001B[0m"; + public static final String BLACK = "\u001B[30m"; + public static final String RED = "\u001B[31m"; + public static final String GREEN = "\u001B[32m"; + public static final String YELLOW = "\u001B[33m"; + public static final String BLUE = "\u001B[34m"; + public static final String PURPLE = "\u001B[35m"; + public static final String CYAN = "\u001B[36m"; + public static final String WHITE = "\u001B[37m"; + + public static final String BLACK_BOLD = "\u001B[1;30m"; + public static final String RED_BOLD = "\u001B[1;31m"; + public static final String GREEN_BOLD = "\u001B[1;32m"; + public static final String YELLOW_BOLD = "\u001B[1;33m"; + public static final String BLUE_BOLD = "\u001B[1;34m"; + public static final String PURPLE_BOLD = "\u001B[1;35m"; + public static final String CYAN_BOLD = "\u001B[1;36m"; + public static final String WHITE_BOLD = "\u001B[1;37m"; + + public static String colorize(String text, String color) { + return color + text + RESET; + } + + public static String success(String text) { + return GREEN_BOLD + text + RESET; + } + + public static String error(String text) { + return RED_BOLD + text + RESET; + } + + public static String warning(String text) { + return YELLOW_BOLD + text + RESET; + } + + public static String info(String text) { + return BLUE_BOLD + text + RESET; + } + + public static String highlight(String text) { + return CYAN_BOLD + text + RESET; + } +} \ No newline at end of file diff --git a/W11/BaseCommand.java b/W11/BaseCommand.java new file mode 100644 index 0000000..f97afcf --- /dev/null +++ b/W11/BaseCommand.java @@ -0,0 +1,31 @@ +package com.crawler.common; + +public abstract class BaseCommand implements Command { + protected T model; + protected ConsoleView view; + + public BaseCommand(T model, ConsoleView view) { + this.model = model; + this.view = view; + } + + public BaseCommand(ConsoleView view) { + this.view = view; + } + + public T getModel() { + return model; + } + + public void setModel(T model) { + this.model = model; + } + + public ConsoleView getView() { + return view; + } + + public void setView(ConsoleView view) { + this.view = view; + } +} \ No newline at end of file diff --git a/W11/BaseCrawler.java b/W11/BaseCrawler.java new file mode 100644 index 0000000..87ac4b7 --- /dev/null +++ b/W11/BaseCrawler.java @@ -0,0 +1,35 @@ +package com.crawler.common; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public abstract class BaseCrawler { + + public abstract List crawl(); + + public void saveToCSV(List data, String filePath) throws IOException { + if (data == null || data.isEmpty()) { + LoggerUtil.warn("没有数据需要保存"); + return; + } + + CsvUtil.ensureDirectoryExists(filePath); + List csvData = convertToCsvFormat(data); + CsvUtil.write(filePath, csvData); + } + + public List loadFromCSV(String filePath) throws IOException { + if (!CsvUtil.fileExists(filePath)) { + LoggerUtil.warn("文件不存在: {}", filePath); + return new ArrayList<>(); + } + + List csvData = CsvUtil.read(filePath); + return convertFromCsvFormat(csvData); + } + + protected abstract List convertToCsvFormat(List data); + + protected abstract List convertFromCsvFormat(List csvData); +} \ No newline at end of file diff --git a/W11/README.md b/W11/README.md new file mode 100644 index 0000000..7ff0b68 --- /dev/null +++ b/W11/README.md @@ -0,0 +1,126 @@ +# CrawlerProject - Java爬虫项目 + +一个基于Java的爬虫项目,包含豆瓣电影Top250、网易云音乐热搜、北京天气预报三个模块。 + +## 项目结构 + +``` +CrawlerProject/ +├── pom.xml # Maven配置文件 +├── README.md # 项目说明 +├── output/ # 输出目录(运行时自动创建) +│ ├── douban_top250.csv # 豆瓣电影数据 +│ ├── netease_top150.csv # 网易云音乐数据 +│ └── beijing_weather_30days.csv # 北京天气数据 +└── src/main/ + ├── java/com/crawler/ + │ ├── common/ # 公共工具类 + │ │ ├── AnsiColorUtil.java # ANSI颜色码管理 + │ │ ├── BaseCommand.java # 命令抽象类 + │ │ ├── BaseCrawler.java # 爬虫抽象类 + │ │ ├── Command.java # 命令接口 + │ │ ├── ConsoleView.java # 统一输出类 + │ │ ├── CsvUtil.java # CSV读写工具 + │ │ ├── LoggerUtil.java # 日志工具类 + │ │ └── MainController.java # 主控制器 + │ ├── douban/ # 豆瓣模块 + │ ├── netease/ # 网易云模块 + │ └── weather/ # 天气模块 + └── resources/ + └── logback.xml # 日志配置 +``` + +## 技术栈 + +- Java 11 +- Maven 3.8+ +- Jsoup 1.17.2 - HTML解析 +- OpenCSV 5.9 - CSV处理 +- SLF4J + Logback - 日志框架 + +## 运行方式 + +### 方式一:使用Maven运行 + +```bash +cd CrawlerProject +mvn clean compile +mvn exec:java +``` + +### 方式二:打包后运行 + +```bash +cd CrawlerProject +mvn clean package +java -jar target/CrawlerProject-1.0.0.jar +``` + +## 使用说明 + +运行后会显示主菜单: + +``` +========== 爬虫系统 ========== +1. 豆瓣电影Top250 +2. 网易云音乐热搜 +3. 北京天气预报 +0. 退出 +请选择: +``` + +### 豆瓣电影Top250模块 + +进入模块后支持以下命令: +- `help` - 显示帮助信息 +- `list` - 列出已爬取的电影 +- `crawl` - 爬取豆瓣电影Top250数据 +- `exit` - 退出模块 + +### 网易云音乐热搜模块 + +进入模块后支持以下命令: +- `help` - 显示帮助信息 +- `list` - 列出已爬取的歌曲 +- `crawl` - 爬取网易云音乐热搜数据 +- `exit` - 退出模块 + +**注意**:网易云反爬严格,如无法获取真实数据将使用模拟数据。 + +### 北京天气预报模块 + +进入模块后支持以下命令: +- `help` - 显示帮助信息 +- `list` - 列出已爬取的天气数据 +- `crawl` - 爬取北京30天天气预报 +- `exit` - 退出模块 + +**API配置**:如需获取真实天气数据,请在 `WeatherCrawler.java` 中配置和风天气API Key: +```java +private static final String QWEATHER_API_KEY = "YOUR_API_KEY"; +``` +注册地址:https://devapi.qweather.com/ + +## 输出文件 + +- `output/douban_top250.csv` - 豆瓣电影Top250数据 +- `output/netease_top150.csv` - 网易云音乐热搜榜数据 +- `output/beijing_weather_30days.csv` - 北京30天天气预报数据 + +## 注意事项 + +1. **网络请求**:爬虫会访问外部网站,请确保网络连接正常 +2. **反爬机制**:部分网站有反爬机制,爬取时会有随机延迟 +3. **数据保存**:所有爬取数据会自动保存到CSV文件 +4. **模拟数据**:当无法获取真实数据时,会使用模拟数据展示 + +## 代码规范 + +- 所有输出均通过 `ConsoleView` 类,禁止直接调用 `System.out` +- ANSI颜色码统一在 `AnsiColorUtil` 类中定义 +- 每个模块包含完整的MVC结构 +- 使用命令模式实现模块功能 + +## 许可证 + +MIT License diff --git a/W11/logback.xml b/W11/logback.xml new file mode 100644 index 0000000..597efec --- /dev/null +++ b/W11/logback.xml @@ -0,0 +1,22 @@ + + + + + %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + + + + + logs/crawler.log + + %d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + \ No newline at end of file