1 changed files with 375 additions and 0 deletions
@ -0,0 +1,375 @@ |
|||
package com.example; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
/** |
|||
* 多网站爬虫系统 - 主程序入口 |
|||
* 采用MVC架构,支持命令行参数控制 |
|||
* 课堂知识点:继承、多态、封装、枚举类、命令行参数、命令模式、策略模式、异常体系 |
|||
*/ |
|||
public class App { |
|||
|
|||
/** |
|||
* 爬虫类型枚举 - 课堂知识点:枚举类 |
|||
*/ |
|||
public enum CrawlerType { |
|||
STEAM("Steam游戏爬虫"), |
|||
MOVIE("豆瓣电影爬虫"), |
|||
BOOK("豆瓣读书爬虫"), |
|||
ALL("全部爬虫"); |
|||
|
|||
private final String description; |
|||
|
|||
CrawlerType(String description) { |
|||
this.description = description; |
|||
} |
|||
|
|||
public String getDescription() { |
|||
return description; |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 保存策略枚举 - 课堂知识点:枚举类、策略模式 |
|||
*/ |
|||
public enum SaveStrategyType { |
|||
TEXT("文本格式", new TextSaveStrategy()), |
|||
JSON("JSON格式", new JsonSaveStrategy()), |
|||
CSV("CSV格式", new CsvSaveStrategy()); |
|||
|
|||
private final String description; |
|||
private final SaveStrategy strategy; |
|||
|
|||
SaveStrategyType(String description, SaveStrategy strategy) { |
|||
this.description = description; |
|||
this.strategy = strategy; |
|||
} |
|||
|
|||
public String getDescription() { |
|||
return description; |
|||
} |
|||
|
|||
public SaveStrategy getStrategy() { |
|||
return strategy; |
|||
} |
|||
} |
|||
|
|||
public static void main(String[] args) { |
|||
System.out.println("========== 多网站爬虫系统启动 ==========\n"); |
|||
|
|||
// 解析命令行参数 - 课堂知识点:命令行参数
|
|||
CrawlerType crawlerType = parseCrawlerType(args); |
|||
SaveStrategyType strategyType = parseSaveStrategy(args); |
|||
boolean saveToDb = shouldSaveToDatabase(args); |
|||
boolean mergeOutput = shouldMergeOutput(args); |
|||
|
|||
// 启动时检查网络状态
|
|||
System.out.println("🔍 正在检查网络连接状态..."); |
|||
if (!HttpCrawler.isNetworkAvailable()) { |
|||
System.err.println("⚠️ 警告:当前网络不可用!"); |
|||
System.err.println("⚠️ 将使用默认数据运行,部分功能可能受限"); |
|||
System.err.println("⚠️ 建议检查网络连接后重新运行"); |
|||
System.out.println(); |
|||
} else { |
|||
System.out.println("✅ 网络连接正常!"); |
|||
System.out.println(); |
|||
} |
|||
|
|||
// 如果需要保存到数据库,先初始化数据库
|
|||
if (saveToDb) { |
|||
System.out.println("📦 正在初始化数据库..."); |
|||
DatabaseManager.getInstance(); // 初始化数据库
|
|||
} |
|||
|
|||
// 使用命令模式执行爬虫
|
|||
executeWithCommandPattern(crawlerType, strategyType, saveToDb, mergeOutput); |
|||
|
|||
// 如果使用了数据库,关闭连接
|
|||
if (saveToDb) { |
|||
DatabaseManager.getInstance().close(); |
|||
} |
|||
|
|||
// 打印精美分隔线
|
|||
printDivider(); |
|||
System.out.println("🎉 所有爬虫执行完毕!"); |
|||
printDivider(); |
|||
|
|||
System.out.println("\n� 执行报告"); |
|||
System.out.println("─────────────────────────────────────────────────────"); |
|||
System.out.printf(" %-12s %s%n", "网络状态:", HttpCrawler.isNetworkAvailable() ? "✅ 正常" : "❌ 不可用"); |
|||
System.out.printf(" %-12s %s%n", "爬虫类型:", crawlerType.getDescription()); |
|||
System.out.printf(" %-12s %s%n", "保存策略:", strategyType.getDescription()); |
|||
System.out.printf(" %-12s %s%n", "数据来源:", HttpCrawler.isNetworkAvailable() ? "实时爬取" : "默认缓存"); |
|||
System.out.printf(" %-12s %s%n", "数据库:", saveToDb ? "✅ 已保存" : "❌ 未启用"); |
|||
System.out.printf(" %-12s %s%n", "合并输出:", mergeOutput ? "✅ 已启用" : "❌ 未启用"); |
|||
System.out.println("─────────────────────────────────────────────────────"); |
|||
System.out.println("\n💡 提示:如需获取最新数据,请确保网络连接后重新运行"); |
|||
printDivider(); |
|||
} |
|||
|
|||
/** |
|||
* 打印精美分隔线 |
|||
*/ |
|||
private static void printDivider() { |
|||
System.out.println("\n╔═══════════════════════════════════════════════════╗"); |
|||
} |
|||
|
|||
/** |
|||
* 解析爬虫类型参数 |
|||
* @param args 命令行参数数组 |
|||
* @return 爬虫类型枚举 |
|||
*/ |
|||
private static CrawlerType parseCrawlerType(String[] args) { |
|||
if (args.length == 0) { |
|||
return CrawlerType.ALL; |
|||
} |
|||
|
|||
String arg = args[0].toLowerCase(); |
|||
switch (arg) { |
|||
case "-steam": |
|||
case "-s": |
|||
return CrawlerType.STEAM; |
|||
case "-movie": |
|||
case "-m": |
|||
return CrawlerType.MOVIE; |
|||
case "-book": |
|||
case "-b": |
|||
return CrawlerType.BOOK; |
|||
case "-all": |
|||
case "-a": |
|||
return CrawlerType.ALL; |
|||
case "-h": |
|||
case "--help": |
|||
printUsage(); |
|||
System.exit(0); |
|||
return CrawlerType.ALL; |
|||
default: |
|||
System.out.println("❌ 未知参数: " + arg); |
|||
printUsage(); |
|||
System.exit(1); |
|||
return CrawlerType.ALL; |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 解析保存策略参数 |
|||
* @param args 命令行参数数组 |
|||
* @return 保存策略枚举 |
|||
*/ |
|||
private static SaveStrategyType parseSaveStrategy(String[] args) { |
|||
for (String arg : args) { |
|||
switch (arg.toLowerCase()) { |
|||
case "-text": |
|||
case "-t": |
|||
return SaveStrategyType.TEXT; |
|||
case "-json": |
|||
case "-j": |
|||
return SaveStrategyType.JSON; |
|||
case "-csv": |
|||
case "-c": |
|||
return SaveStrategyType.CSV; |
|||
} |
|||
} |
|||
return SaveStrategyType.TEXT; // 默认文本格式
|
|||
} |
|||
|
|||
/** |
|||
* 判断是否需要保存到数据库 |
|||
* @param args 命令行参数数组 |
|||
* @return 是否保存到数据库 |
|||
*/ |
|||
private static boolean shouldSaveToDatabase(String[] args) { |
|||
for (String arg : args) { |
|||
if ("-db".equalsIgnoreCase(arg) || "--database".equalsIgnoreCase(arg)) { |
|||
return true; |
|||
} |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
/** |
|||
* 打印使用说明 |
|||
*/ |
|||
private static void printUsage() { |
|||
System.out.println("\n📖 使用方法:"); |
|||
System.out.println(" java App [爬虫类型] [保存策略] [可选参数]"); |
|||
System.out.println("\n爬虫类型:"); |
|||
System.out.println(" -steam, -s Steam游戏爬虫"); |
|||
System.out.println(" -movie, -m 豆瓣电影爬虫"); |
|||
System.out.println(" -book, -b 豆瓣读书爬虫"); |
|||
System.out.println(" -all, -a 所有爬虫(默认)"); |
|||
System.out.println("\n保存策略:"); |
|||
System.out.println(" -text, -t 文本格式(默认)"); |
|||
System.out.println(" -json, -j JSON格式"); |
|||
System.out.println(" -csv, -c CSV格式"); |
|||
System.out.println("\n可选参数:"); |
|||
System.out.println(" -db, --database 保存数据到SQLite数据库"); |
|||
System.out.println(" -merge 将所有数据合并保存到一个文件"); |
|||
System.out.println("\n示例:"); |
|||
System.out.println(" java App -movie -json # 爬取电影并保存为JSON格式"); |
|||
System.out.println(" java App -book -csv # 爬取书籍并保存为CSV格式"); |
|||
System.out.println(" java App -all -db # 爬取所有数据并保存到数据库"); |
|||
System.out.println(" java App -all -merge # 爬取所有数据合并保存到一个文件"); |
|||
System.out.println(" java App -all -json -merge # 合并保存为JSON格式"); |
|||
} |
|||
|
|||
/** |
|||
* 判断是否需要合并输出到一个文件 |
|||
* @param args 命令行参数数组 |
|||
* @return 是否合并输出 |
|||
*/ |
|||
private static boolean shouldMergeOutput(String[] args) { |
|||
for (String arg : args) { |
|||
if ("-merge".equalsIgnoreCase(arg) || "--merge-output".equalsIgnoreCase(arg)) { |
|||
return true; |
|||
} |
|||
} |
|||
return false; |
|||
} |
|||
|
|||
/** |
|||
* 使用命令模式执行爬虫 - 课堂知识点:命令模式、策略模式、异常体系、数据库持久化 |
|||
*/ |
|||
private static void executeWithCommandPattern(CrawlerType crawlerType, SaveStrategyType strategyType, boolean saveToDb, boolean mergeOutput) { |
|||
// 创建命令调用器
|
|||
CommandInvoker invoker = new CommandInvoker(); |
|||
|
|||
// 创建爬虫列表
|
|||
List<Crawler> crawlers = new ArrayList<>(); |
|||
switch (crawlerType) { |
|||
case STEAM: |
|||
crawlers.add(new SteamCrawler()); |
|||
break; |
|||
case MOVIE: |
|||
crawlers.add(new DoubanMovieCrawler()); |
|||
break; |
|||
case BOOK: |
|||
crawlers.add(new DoubanBookCrawler()); |
|||
break; |
|||
case ALL: |
|||
crawlers.add(new SteamCrawler()); |
|||
crawlers.add(new DoubanMovieCrawler()); |
|||
crawlers.add(new DoubanBookCrawler()); |
|||
break; |
|||
} |
|||
|
|||
// 如果合并输出,收集所有数据
|
|||
List<Object> allData = new ArrayList<>(); |
|||
|
|||
// 为每个爬虫创建命令并执行
|
|||
int index = 1; |
|||
for (Crawler crawler : crawlers) { |
|||
String filename = mergeOutput ? "merged_output" : getFilename(crawler, strategyType); |
|||
|
|||
// 创建命令(命令模式)
|
|||
Command command = new CrawlerCommand(crawler, filename); |
|||
|
|||
try { |
|||
// 执行命令
|
|||
invoker.execute(command); |
|||
|
|||
// 收集数据用于合并
|
|||
if (mergeOutput) { |
|||
if (crawler instanceof SteamCrawler) { |
|||
SteamCrawler sc = (SteamCrawler) crawler; |
|||
allData.add("=== Steam游戏 ==="); |
|||
allData.add(sc); |
|||
} else if (crawler instanceof DoubanMovieCrawler) { |
|||
DoubanMovieCrawler mc = (DoubanMovieCrawler) crawler; |
|||
allData.add("=== 豆瓣电影 ==="); |
|||
allData.addAll(mc.getMovies()); |
|||
} else if (crawler instanceof DoubanBookCrawler) { |
|||
DoubanBookCrawler bc = (DoubanBookCrawler) crawler; |
|||
allData.add("=== 豆瓣书籍 ==="); |
|||
allData.addAll(bc.getBooks()); |
|||
} |
|||
} |
|||
|
|||
// 如果需要保存到数据库
|
|||
if (saveToDb) { |
|||
crawler.saveToDatabase(); |
|||
} |
|||
|
|||
if (index < crawlers.size()) { |
|||
System.out.println(); |
|||
} |
|||
} catch (CrawlerException e) { |
|||
System.err.println("❌ 命令执行失败: " + e.getMessage()); |
|||
if (e.getCause() != null) { |
|||
e.getCause().printStackTrace(); |
|||
} |
|||
} |
|||
index++; |
|||
} |
|||
|
|||
// 如果合并输出,保存到一个文件
|
|||
if (mergeOutput && !allData.isEmpty()) { |
|||
saveMergedData(allData, strategyType); |
|||
} |
|||
|
|||
// 显示命令历史
|
|||
invoker.showHistory(); |
|||
} |
|||
|
|||
/** |
|||
* 将所有数据合并保存到一个文件(保存到桌面) |
|||
* @param data 所有爬虫数据 |
|||
* @param strategyType 保存策略 |
|||
*/ |
|||
private static void saveMergedData(List<Object> data, SaveStrategyType strategyType) { |
|||
// 获取桌面路径
|
|||
String desktopPath = System.getProperty("user.home") + "\\Desktop"; |
|||
String extension = strategyType == SaveStrategyType.JSON ? "json" : |
|||
strategyType == SaveStrategyType.CSV ? "csv" : "txt"; |
|||
String filename = desktopPath + "\\crawler_merged_" + System.currentTimeMillis() + "." + extension; |
|||
|
|||
try { |
|||
SaveStrategy strategy = strategyType.getStrategy(); |
|||
strategy.save(data, filename); |
|||
System.out.println("\n📥 已将所有数据合并保存到桌面文件:"); |
|||
System.out.println(" " + filename); |
|||
} catch (Exception e) { |
|||
System.err.println("❌ 合并保存失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 根据爬虫类型和策略获取文件名 |
|||
*/ |
|||
private static String getFilename(Crawler crawler, SaveStrategyType strategyType) { |
|||
String baseName; |
|||
if (crawler instanceof SteamCrawler) { |
|||
baseName = "steam_game_info"; |
|||
} else if (crawler instanceof DoubanMovieCrawler) { |
|||
baseName = "douban_movies"; |
|||
} else if (crawler instanceof DoubanBookCrawler) { |
|||
baseName = "douban_books"; |
|||
} else { |
|||
baseName = "crawler_output"; |
|||
} |
|||
|
|||
String extension; |
|||
switch (strategyType) { |
|||
case JSON: |
|||
extension = ".json"; |
|||
break; |
|||
case CSV: |
|||
extension = ".csv"; |
|||
break; |
|||
case TEXT: |
|||
default: |
|||
extension = ".txt"; |
|||
break; |
|||
} |
|||
|
|||
return baseName + extension; |
|||
} |
|||
|
|||
/** |
|||
* 简单加法方法 - 演示方法定义 |
|||
*/ |
|||
public int add(int a, int b) { |
|||
return a + b; |
|||
} |
|||
} |
|||
Loading…
Reference in new issue