diff --git a/project/command/BaseCommand.java b/project/command/BaseCommand.java new file mode 100644 index 0000000..130e51f --- /dev/null +++ b/project/command/BaseCommand.java @@ -0,0 +1,18 @@ +package com.crawler.command; + +import java.util.Scanner; + +public abstract class BaseCommand implements Command { + protected Scanner scanner; + protected CommandHistory history; + + public BaseCommand() { + this.scanner = new Scanner(System.in); + this.history = CommandHistory.getInstance(); + } + + protected String readInput(String prompt) { + System.out.print(prompt); + return scanner.nextLine().trim(); + } +} \ No newline at end of file diff --git a/project/command/CacheCommand.java b/project/command/CacheCommand.java new file mode 100644 index 0000000..968786f --- /dev/null +++ b/project/command/CacheCommand.java @@ -0,0 +1,363 @@ +package com.crawler.command; + +import com.crawler.exception.CacheException; +import com.crawler.model.CrawlerData; +import com.crawler.util.JsonSerializer; +import com.crawler.view.CrawlerView; + +import java.io.IOException; +import java.util.List; + +public class CacheCommand extends BaseCommand { + + private static final String DEFAULT_CACHE_DIR = "data"; + private static final String DEFAULT_CACHE_FILE = DEFAULT_CACHE_DIR + "/crawler_data.json"; + + public static String getDefaultCacheFile(String crawlerName) { + String safeName = sanitizeFileName(crawlerName); + return DEFAULT_CACHE_DIR + "/" + safeName + ".json"; + } + + private static String sanitizeFileName(String name) { + return name.replaceAll("[^a-zA-Z0-9\\u4e00-\\u9fa5-_]", "_"); + } + + private CrawlerView view; + + static { + initCacheDir(); + } + + private static void initCacheDir() { + java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR); + if (!cacheDir.exists()) { + boolean created = cacheDir.mkdirs(); + if (created) { + System.out.println("已创建数据目录: " + DEFAULT_CACHE_DIR); + } + } + } + + public CacheCommand() { + this.view = new CrawlerView(); + } + + @Override + public String getName() { + return "cache"; + } + + @Override + public String getDescription() { + return "缓存操作: save - 保存数据, load - 读取数据, list - 查看缓存文件, delete - 删除缓存文件"; + } + + @Override + public void execute() { + String action = readInput("请输入缓存操作 (save/load/list/delete): ").toLowerCase(); + + switch (action) { + case "save": + saveData(); + break; + case "load": + loadDataWithRetry(); + break; + case "list": + listCacheFiles(); + break; + case "delete": + deleteCacheFile(); + break; + default: + System.out.println("未知操作: " + action); + System.out.println("可用操作: save, load, list, delete"); + } + } + + private void saveData(List dataList, String crawlerName) throws CacheException { + if (dataList == null || dataList.isEmpty()) { + throw new CacheException("没有数据可保存"); + } + + String defaultFile = (crawlerName != null && !crawlerName.isEmpty()) + ? getDefaultCacheFile(crawlerName) + : DEFAULT_CACHE_FILE; + + String filePath = readInputWithRetry("请输入保存路径 (默认: " + defaultFile + "): "); + if (filePath.isEmpty()) { + filePath = defaultFile; + } + + validatePath(filePath); + + try { + JsonSerializer.serializeToFile(dataList, filePath); + + java.io.File savedFile = new java.io.File(filePath); + view.showCacheSuccess(filePath, dataList.size(), savedFile.length()); + } catch (IOException e) { + throw new CacheException("保存失败: " + e.getMessage(), filePath, e); + } + } + + private void saveData(List dataList) throws CacheException { + saveData(dataList, null); + } + + private void validatePath(String filePath) { + if (!isPathAllowed(filePath)) { + throw new SecurityException("访问被拒绝: 无权访问该路径"); + } + } + + private boolean isPathAllowed(String filePath) { + if (filePath == null || filePath.isEmpty()) { + return false; + } + + java.io.File file = new java.io.File(filePath); + String canonicalPath; + try { + canonicalPath = file.getCanonicalPath(); + } catch (IOException e) { + return false; + } + + String dataDirPath; + try { + dataDirPath = new java.io.File(DEFAULT_CACHE_DIR).getCanonicalPath(); + } catch (IOException e) { + return false; + } + + return canonicalPath.startsWith(dataDirPath); + } + + private void saveData() { + System.out.println("注意: 当前没有爬取的数据。请先使用 crawl 命令爬取数据。"); + System.out.println("或者使用 crawl 命令爬取后,在提示时选择保存。"); + } + + private void loadDataWithRetry() { + while (true) { + try { + loadData(); + break; + } catch (CacheException e) { + System.err.println(e.getMessage()); + String retry = readInput("是否重新输入路径? (y/n): ").toLowerCase(); + if (!retry.equals("y") && !retry.equals("yes")) { + break; + } + } + } + } + + private void loadData() throws CacheException { + String filePath = readInput("请输入读取路径 (默认: " + DEFAULT_CACHE_FILE + "): "); + if (filePath.isEmpty()) { + filePath = DEFAULT_CACHE_FILE; + } + + java.io.File file = new java.io.File(filePath); + if (!file.exists()) { + throw new CacheException("文件不存在", filePath); + } + + if (!file.isFile()) { + throw new CacheException("路径不是有效的文件", filePath); + } + + if (!file.canRead()) { + throw new CacheException("文件不可读取", filePath); + } + + try { + List dataList = JsonSerializer.deserializeFromFile(filePath); + view.showData(dataList); + } catch (IOException e) { + throw new CacheException("读取失败: " + e.getMessage(), filePath, e); + } + } + + private void listCacheFiles() { + java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR); + if (!cacheDir.exists() || !cacheDir.isDirectory()) { + System.out.println("数据目录不存在"); + return; + } + + java.io.File[] files = cacheDir.listFiles((dir, name) -> name.endsWith(".json")); + if (files == null || files.length == 0) { + System.out.println("数据目录中没有JSON文件"); + return; + } + + System.out.println("========================================"); + System.out.println("数据文件列表:"); + System.out.println("========================================"); + for (java.io.File file : files) { + System.out.println("- " + file.getName() + " (" + file.length() + " bytes)"); + } + } + + private void deleteCacheFile() { + java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR); + if (!cacheDir.exists() || !cacheDir.isDirectory()) { + System.out.println("数据目录不存在"); + return; + } + + java.io.File[] files = cacheDir.listFiles((dir, name) -> name.endsWith(".json")); + if (files == null || files.length == 0) { + System.out.println("数据目录中没有JSON文件"); + return; + } + + System.out.println("========================================"); + System.out.println("可选删除的文件:"); + System.out.println("========================================"); + for (int i = 0; i < files.length; i++) { + System.out.println("[" + (i + 1) + "] " + files[i].getName() + " (" + files[i].length() + " bytes)"); + } + System.out.println("[all] 删除所有文件"); + System.out.println("========================================"); + + String input = readInput("请输入要删除的文件序号或 'all': "); + + if (input.equalsIgnoreCase("all")) { + System.out.print("确定要删除所有文件吗? (y/n): "); + String confirm = scanner.nextLine().trim().toLowerCase(); + if (confirm.equals("y") || confirm.equals("yes")) { + int count = 0; + for (java.io.File file : files) { + if (file.delete()) { + count++; + } + } + System.out.println("已删除 " + count + " 个文件"); + } else { + System.out.println("取消删除"); + } + } else { + try { + int index = Integer.parseInt(input) - 1; + if (index >= 0 && index < files.length) { + java.io.File file = files[index]; + System.out.print("确定要删除 '" + file.getName() + "' 吗? (y/n): "); + String confirm = scanner.nextLine().trim().toLowerCase(); + if (confirm.equals("y") || confirm.equals("yes")) { + if (file.delete()) { + System.out.println("已删除: " + file.getName()); + } else { + System.out.println("删除失败"); + } + } else { + System.out.println("取消删除"); + } + } else { + System.out.println("无效的序号"); + } + } catch (NumberFormatException e) { + System.out.println("无效输入,请输入数字序号或 'all'"); + } + } + } + + private String readInputWithRetry(String prompt) throws CacheException { + String input = readInput(prompt); + if (input == null || input.trim().isEmpty()) { + return input; + } + + java.io.File file = new java.io.File(input); + java.io.File parentDir = file.getParentFile(); + + if (parentDir != null && !parentDir.exists()) { + System.out.println("警告: 父目录不存在,将自动创建"); + } + + return input; + } + + public static void saveDataWithPrompt(List dataList, String crawlerName) { + if (dataList == null || dataList.isEmpty()) { + return; + } + + String defaultFile = (crawlerName != null && !crawlerName.isEmpty()) + ? getDefaultCacheFile(crawlerName) + : DEFAULT_CACHE_FILE; + + CrawlerView view = new CrawlerView(); + java.util.Scanner scanner = new java.util.Scanner(System.in); + System.out.print("\n是否保存爬取结果? (y/n): "); + String input = scanner.nextLine().trim().toLowerCase(); + + if (input.equals("y") || input.equals("yes")) { + while (true) { + String filePath = readInputWithScanner(scanner, "请输入保存路径 (默认: " + defaultFile + "): "); + if (filePath.isEmpty()) { + filePath = defaultFile; + } + + try { + validatePathStatic(filePath); + + JsonSerializer.serializeToFile(dataList, filePath); + + java.io.File savedFile = new java.io.File(filePath); + view.showCacheSuccess(filePath, dataList.size(), savedFile.length()); + break; + } catch (SecurityException e) { + System.err.println("安全错误: " + e.getMessage()); + String retry = readInputWithScanner(scanner, "是否重新输入路径? (y/n): "); + if (!retry.equals("y") && !retry.equals("yes")) { + break; + } + } catch (IOException e) { + System.err.println("保存失败: " + e.getMessage()); + String retry = readInputWithScanner(scanner, "是否重新输入路径? (y/n): "); + if (!retry.equals("y") && !retry.equals("yes")) { + break; + } + } + } + } + } + + private static void validatePathStatic(String filePath) { + if (!isPathAllowedStatic(filePath)) { + throw new SecurityException("访问被拒绝: 无权访问该路径"); + } + } + + private static boolean isPathAllowedStatic(String filePath) { + if (filePath == null || filePath.isEmpty()) { + return false; + } + + java.io.File file = new java.io.File(filePath); + String canonicalPath; + try { + canonicalPath = file.getCanonicalPath(); + } catch (IOException e) { + return false; + } + + String dataDirPath; + try { + dataDirPath = new java.io.File(DEFAULT_CACHE_DIR).getCanonicalPath(); + } catch (IOException e) { + return false; + } + + return canonicalPath.startsWith(dataDirPath); + } + + private static String readInputWithScanner(java.util.Scanner scanner, String prompt) { + System.out.print(prompt); + return scanner.nextLine().trim(); + } +} diff --git a/project/command/Command.java b/project/command/Command.java new file mode 100644 index 0000000..24e912f --- /dev/null +++ b/project/command/Command.java @@ -0,0 +1,7 @@ +package com.crawler.command; + +public interface Command { + String getName(); + String getDescription(); + void execute(); +} \ No newline at end of file diff --git a/project/command/CommandController.java b/project/command/CommandController.java new file mode 100644 index 0000000..f4fab0a --- /dev/null +++ b/project/command/CommandController.java @@ -0,0 +1,63 @@ +package com.crawler.command; + +import java.util.ArrayList; +import java.util.List; +import java.util.Scanner; + +public class CommandController { + private List commands; + private Scanner scanner; + private CommandHistory history; + + public CommandController() { + commands = new ArrayList<>(); + scanner = new Scanner(System.in); + history = CommandHistory.getInstance(); + initCommands(); + } + + private void initCommands() { + HelpCommand helpCmd = new HelpCommand(commands); + commands.add(helpCmd); + commands.add(new ListCommand()); + commands.add(new CrawlCommand()); + commands.add(new CacheCommand()); + commands.add(new ExitCommand()); + } + + public void start() { + System.out.println("========================================"); + System.out.println("Java爬虫框架 - 命令行模式"); + System.out.println("========================================"); + System.out.println("输入 'help' 查看可用指令"); + System.out.println("========================================"); + + while (true) { + System.out.print("> "); + String input = scanner.nextLine().trim().toLowerCase(); + + if (input.isEmpty()) { + continue; + } + + history.add(input); + executeCommand(input); + } + } + + private void executeCommand(String commandName) { + for (Command cmd : commands) { + if (cmd.getName().equals(commandName)) { + cmd.execute(); + return; + } + } + + System.out.println("未知指令: " + commandName); + System.out.println("输入 'help' 查看可用指令"); + } + + public void stop() { + scanner.close(); + } +} \ No newline at end of file diff --git a/project/command/CommandHistory.java b/project/command/CommandHistory.java new file mode 100644 index 0000000..1109113 --- /dev/null +++ b/project/command/CommandHistory.java @@ -0,0 +1,36 @@ +package com.crawler.command; + +import java.util.ArrayList; +import java.util.List; + +public class CommandHistory { + private static CommandHistory instance; + private List history; + + private CommandHistory() { + history = new ArrayList<>(); + } + + public static CommandHistory getInstance() { + if (instance == null) { + instance = new CommandHistory(); + } + return instance; + } + + public void add(String command) { + history.add(command); + } + + public List getHistory() { + return new ArrayList<>(history); + } + + public void clear() { + history.clear(); + } + + public int size() { + return history.size(); + } +} \ No newline at end of file