Browse Source

上传文件至 'project/src/main/java/com/example'

main
Xingzhimeng 3 weeks ago
parent
commit
adca923eef
  1. 375
      project/src/main/java/com/example/App.java

375
project/src/main/java/com/example/App.java

@ -0,0 +1,375 @@
package com.example;
import java.util.ArrayList;
import java.util.List;
/**
* 多网站爬虫系统 - 主程序入口
* 采用MVC架构支持命令行参数控制
* 课堂知识点继承多态封装枚举类命令行参数命令模式策略模式异常体系
*/
public class App {
/**
* 爬虫类型枚举 - 课堂知识点枚举类
*/
public enum CrawlerType {
STEAM("Steam游戏爬虫"),
MOVIE("豆瓣电影爬虫"),
BOOK("豆瓣读书爬虫"),
ALL("全部爬虫");
private final String description;
CrawlerType(String description) {
this.description = description;
}
public String getDescription() {
return description;
}
}
/**
* 保存策略枚举 - 课堂知识点枚举类策略模式
*/
public enum SaveStrategyType {
TEXT("文本格式", new TextSaveStrategy()),
JSON("JSON格式", new JsonSaveStrategy()),
CSV("CSV格式", new CsvSaveStrategy());
private final String description;
private final SaveStrategy strategy;
SaveStrategyType(String description, SaveStrategy strategy) {
this.description = description;
this.strategy = strategy;
}
public String getDescription() {
return description;
}
public SaveStrategy getStrategy() {
return strategy;
}
}
public static void main(String[] args) {
System.out.println("========== 多网站爬虫系统启动 ==========\n");
// 解析命令行参数 - 课堂知识点:命令行参数
CrawlerType crawlerType = parseCrawlerType(args);
SaveStrategyType strategyType = parseSaveStrategy(args);
boolean saveToDb = shouldSaveToDatabase(args);
boolean mergeOutput = shouldMergeOutput(args);
// 启动时检查网络状态
System.out.println("🔍 正在检查网络连接状态...");
if (!HttpCrawler.isNetworkAvailable()) {
System.err.println("⚠️ 警告:当前网络不可用!");
System.err.println("⚠️ 将使用默认数据运行,部分功能可能受限");
System.err.println("⚠️ 建议检查网络连接后重新运行");
System.out.println();
} else {
System.out.println("✅ 网络连接正常!");
System.out.println();
}
// 如果需要保存到数据库,先初始化数据库
if (saveToDb) {
System.out.println("📦 正在初始化数据库...");
DatabaseManager.getInstance(); // 初始化数据库
}
// 使用命令模式执行爬虫
executeWithCommandPattern(crawlerType, strategyType, saveToDb, mergeOutput);
// 如果使用了数据库,关闭连接
if (saveToDb) {
DatabaseManager.getInstance().close();
}
// 打印精美分隔线
printDivider();
System.out.println("🎉 所有爬虫执行完毕!");
printDivider();
System.out.println("\n� 执行报告");
System.out.println("─────────────────────────────────────────────────────");
System.out.printf(" %-12s %s%n", "网络状态:", HttpCrawler.isNetworkAvailable() ? "✅ 正常" : "❌ 不可用");
System.out.printf(" %-12s %s%n", "爬虫类型:", crawlerType.getDescription());
System.out.printf(" %-12s %s%n", "保存策略:", strategyType.getDescription());
System.out.printf(" %-12s %s%n", "数据来源:", HttpCrawler.isNetworkAvailable() ? "实时爬取" : "默认缓存");
System.out.printf(" %-12s %s%n", "数据库:", saveToDb ? "✅ 已保存" : "❌ 未启用");
System.out.printf(" %-12s %s%n", "合并输出:", mergeOutput ? "✅ 已启用" : "❌ 未启用");
System.out.println("─────────────────────────────────────────────────────");
System.out.println("\n💡 提示:如需获取最新数据,请确保网络连接后重新运行");
printDivider();
}
/**
* 打印精美分隔线
*/
private static void printDivider() {
System.out.println("\n╔═══════════════════════════════════════════════════╗");
}
/**
* 解析爬虫类型参数
* @param args 命令行参数数组
* @return 爬虫类型枚举
*/
private static CrawlerType parseCrawlerType(String[] args) {
if (args.length == 0) {
return CrawlerType.ALL;
}
String arg = args[0].toLowerCase();
switch (arg) {
case "-steam":
case "-s":
return CrawlerType.STEAM;
case "-movie":
case "-m":
return CrawlerType.MOVIE;
case "-book":
case "-b":
return CrawlerType.BOOK;
case "-all":
case "-a":
return CrawlerType.ALL;
case "-h":
case "--help":
printUsage();
System.exit(0);
return CrawlerType.ALL;
default:
System.out.println("❌ 未知参数: " + arg);
printUsage();
System.exit(1);
return CrawlerType.ALL;
}
}
/**
* 解析保存策略参数
* @param args 命令行参数数组
* @return 保存策略枚举
*/
private static SaveStrategyType parseSaveStrategy(String[] args) {
for (String arg : args) {
switch (arg.toLowerCase()) {
case "-text":
case "-t":
return SaveStrategyType.TEXT;
case "-json":
case "-j":
return SaveStrategyType.JSON;
case "-csv":
case "-c":
return SaveStrategyType.CSV;
}
}
return SaveStrategyType.TEXT; // 默认文本格式
}
/**
* 判断是否需要保存到数据库
* @param args 命令行参数数组
* @return 是否保存到数据库
*/
private static boolean shouldSaveToDatabase(String[] args) {
for (String arg : args) {
if ("-db".equalsIgnoreCase(arg) || "--database".equalsIgnoreCase(arg)) {
return true;
}
}
return false;
}
/**
* 打印使用说明
*/
private static void printUsage() {
System.out.println("\n📖 使用方法:");
System.out.println(" java App [爬虫类型] [保存策略] [可选参数]");
System.out.println("\n爬虫类型:");
System.out.println(" -steam, -s Steam游戏爬虫");
System.out.println(" -movie, -m 豆瓣电影爬虫");
System.out.println(" -book, -b 豆瓣读书爬虫");
System.out.println(" -all, -a 所有爬虫(默认)");
System.out.println("\n保存策略:");
System.out.println(" -text, -t 文本格式(默认)");
System.out.println(" -json, -j JSON格式");
System.out.println(" -csv, -c CSV格式");
System.out.println("\n可选参数:");
System.out.println(" -db, --database 保存数据到SQLite数据库");
System.out.println(" -merge 将所有数据合并保存到一个文件");
System.out.println("\n示例:");
System.out.println(" java App -movie -json # 爬取电影并保存为JSON格式");
System.out.println(" java App -book -csv # 爬取书籍并保存为CSV格式");
System.out.println(" java App -all -db # 爬取所有数据并保存到数据库");
System.out.println(" java App -all -merge # 爬取所有数据合并保存到一个文件");
System.out.println(" java App -all -json -merge # 合并保存为JSON格式");
}
/**
* 判断是否需要合并输出到一个文件
* @param args 命令行参数数组
* @return 是否合并输出
*/
private static boolean shouldMergeOutput(String[] args) {
for (String arg : args) {
if ("-merge".equalsIgnoreCase(arg) || "--merge-output".equalsIgnoreCase(arg)) {
return true;
}
}
return false;
}
/**
* 使用命令模式执行爬虫 - 课堂知识点命令模式策略模式异常体系数据库持久化
*/
private static void executeWithCommandPattern(CrawlerType crawlerType, SaveStrategyType strategyType, boolean saveToDb, boolean mergeOutput) {
// 创建命令调用器
CommandInvoker invoker = new CommandInvoker();
// 创建爬虫列表
List<Crawler> crawlers = new ArrayList<>();
switch (crawlerType) {
case STEAM:
crawlers.add(new SteamCrawler());
break;
case MOVIE:
crawlers.add(new DoubanMovieCrawler());
break;
case BOOK:
crawlers.add(new DoubanBookCrawler());
break;
case ALL:
crawlers.add(new SteamCrawler());
crawlers.add(new DoubanMovieCrawler());
crawlers.add(new DoubanBookCrawler());
break;
}
// 如果合并输出,收集所有数据
List<Object> allData = new ArrayList<>();
// 为每个爬虫创建命令并执行
int index = 1;
for (Crawler crawler : crawlers) {
String filename = mergeOutput ? "merged_output" : getFilename(crawler, strategyType);
// 创建命令(命令模式)
Command command = new CrawlerCommand(crawler, filename);
try {
// 执行命令
invoker.execute(command);
// 收集数据用于合并
if (mergeOutput) {
if (crawler instanceof SteamCrawler) {
SteamCrawler sc = (SteamCrawler) crawler;
allData.add("=== Steam游戏 ===");
allData.add(sc);
} else if (crawler instanceof DoubanMovieCrawler) {
DoubanMovieCrawler mc = (DoubanMovieCrawler) crawler;
allData.add("=== 豆瓣电影 ===");
allData.addAll(mc.getMovies());
} else if (crawler instanceof DoubanBookCrawler) {
DoubanBookCrawler bc = (DoubanBookCrawler) crawler;
allData.add("=== 豆瓣书籍 ===");
allData.addAll(bc.getBooks());
}
}
// 如果需要保存到数据库
if (saveToDb) {
crawler.saveToDatabase();
}
if (index < crawlers.size()) {
System.out.println();
}
} catch (CrawlerException e) {
System.err.println("❌ 命令执行失败: " + e.getMessage());
if (e.getCause() != null) {
e.getCause().printStackTrace();
}
}
index++;
}
// 如果合并输出,保存到一个文件
if (mergeOutput && !allData.isEmpty()) {
saveMergedData(allData, strategyType);
}
// 显示命令历史
invoker.showHistory();
}
/**
* 将所有数据合并保存到一个文件保存到桌面
* @param data 所有爬虫数据
* @param strategyType 保存策略
*/
private static void saveMergedData(List<Object> data, SaveStrategyType strategyType) {
// 获取桌面路径
String desktopPath = System.getProperty("user.home") + "\\Desktop";
String extension = strategyType == SaveStrategyType.JSON ? "json" :
strategyType == SaveStrategyType.CSV ? "csv" : "txt";
String filename = desktopPath + "\\crawler_merged_" + System.currentTimeMillis() + "." + extension;
try {
SaveStrategy strategy = strategyType.getStrategy();
strategy.save(data, filename);
System.out.println("\n📥 已将所有数据合并保存到桌面文件:");
System.out.println(" " + filename);
} catch (Exception e) {
System.err.println("❌ 合并保存失败: " + e.getMessage());
}
}
/**
* 根据爬虫类型和策略获取文件名
*/
private static String getFilename(Crawler crawler, SaveStrategyType strategyType) {
String baseName;
if (crawler instanceof SteamCrawler) {
baseName = "steam_game_info";
} else if (crawler instanceof DoubanMovieCrawler) {
baseName = "douban_movies";
} else if (crawler instanceof DoubanBookCrawler) {
baseName = "douban_books";
} else {
baseName = "crawler_output";
}
String extension;
switch (strategyType) {
case JSON:
extension = ".json";
break;
case CSV:
extension = ".csv";
break;
case TEXT:
default:
extension = ".txt";
break;
}
return baseName + extension;
}
/**
* 简单加法方法 - 演示方法定义
*/
public int add(int a, int b) {
return a + b;
}
}
Loading…
Cancel
Save