5 changed files with 487 additions and 0 deletions
@ -0,0 +1,18 @@ |
|||
package com.crawler.command; |
|||
|
|||
import java.util.Scanner; |
|||
|
|||
public abstract class BaseCommand implements Command { |
|||
protected Scanner scanner; |
|||
protected CommandHistory history; |
|||
|
|||
public BaseCommand() { |
|||
this.scanner = new Scanner(System.in); |
|||
this.history = CommandHistory.getInstance(); |
|||
} |
|||
|
|||
protected String readInput(String prompt) { |
|||
System.out.print(prompt); |
|||
return scanner.nextLine().trim(); |
|||
} |
|||
} |
|||
@ -0,0 +1,363 @@ |
|||
package com.crawler.command; |
|||
|
|||
import com.crawler.exception.CacheException; |
|||
import com.crawler.model.CrawlerData; |
|||
import com.crawler.util.JsonSerializer; |
|||
import com.crawler.view.CrawlerView; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.List; |
|||
|
|||
public class CacheCommand extends BaseCommand { |
|||
|
|||
private static final String DEFAULT_CACHE_DIR = "data"; |
|||
private static final String DEFAULT_CACHE_FILE = DEFAULT_CACHE_DIR + "/crawler_data.json"; |
|||
|
|||
public static String getDefaultCacheFile(String crawlerName) { |
|||
String safeName = sanitizeFileName(crawlerName); |
|||
return DEFAULT_CACHE_DIR + "/" + safeName + ".json"; |
|||
} |
|||
|
|||
private static String sanitizeFileName(String name) { |
|||
return name.replaceAll("[^a-zA-Z0-9\\u4e00-\\u9fa5-_]", "_"); |
|||
} |
|||
|
|||
private CrawlerView view; |
|||
|
|||
static { |
|||
initCacheDir(); |
|||
} |
|||
|
|||
private static void initCacheDir() { |
|||
java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR); |
|||
if (!cacheDir.exists()) { |
|||
boolean created = cacheDir.mkdirs(); |
|||
if (created) { |
|||
System.out.println("已创建数据目录: " + DEFAULT_CACHE_DIR); |
|||
} |
|||
} |
|||
} |
|||
|
|||
public CacheCommand() { |
|||
this.view = new CrawlerView(); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "cache"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "缓存操作: save - 保存数据, load - 读取数据, list - 查看缓存文件, delete - 删除缓存文件"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
String action = readInput("请输入缓存操作 (save/load/list/delete): ").toLowerCase(); |
|||
|
|||
switch (action) { |
|||
case "save": |
|||
saveData(); |
|||
break; |
|||
case "load": |
|||
loadDataWithRetry(); |
|||
break; |
|||
case "list": |
|||
listCacheFiles(); |
|||
break; |
|||
case "delete": |
|||
deleteCacheFile(); |
|||
break; |
|||
default: |
|||
System.out.println("未知操作: " + action); |
|||
System.out.println("可用操作: save, load, list, delete"); |
|||
} |
|||
} |
|||
|
|||
private void saveData(List<CrawlerData> dataList, String crawlerName) throws CacheException { |
|||
if (dataList == null || dataList.isEmpty()) { |
|||
throw new CacheException("没有数据可保存"); |
|||
} |
|||
|
|||
String defaultFile = (crawlerName != null && !crawlerName.isEmpty()) |
|||
? getDefaultCacheFile(crawlerName) |
|||
: DEFAULT_CACHE_FILE; |
|||
|
|||
String filePath = readInputWithRetry("请输入保存路径 (默认: " + defaultFile + "): "); |
|||
if (filePath.isEmpty()) { |
|||
filePath = defaultFile; |
|||
} |
|||
|
|||
validatePath(filePath); |
|||
|
|||
try { |
|||
JsonSerializer.serializeToFile(dataList, filePath); |
|||
|
|||
java.io.File savedFile = new java.io.File(filePath); |
|||
view.showCacheSuccess(filePath, dataList.size(), savedFile.length()); |
|||
} catch (IOException e) { |
|||
throw new CacheException("保存失败: " + e.getMessage(), filePath, e); |
|||
} |
|||
} |
|||
|
|||
private void saveData(List<CrawlerData> dataList) throws CacheException { |
|||
saveData(dataList, null); |
|||
} |
|||
|
|||
private void validatePath(String filePath) { |
|||
if (!isPathAllowed(filePath)) { |
|||
throw new SecurityException("访问被拒绝: 无权访问该路径"); |
|||
} |
|||
} |
|||
|
|||
private boolean isPathAllowed(String filePath) { |
|||
if (filePath == null || filePath.isEmpty()) { |
|||
return false; |
|||
} |
|||
|
|||
java.io.File file = new java.io.File(filePath); |
|||
String canonicalPath; |
|||
try { |
|||
canonicalPath = file.getCanonicalPath(); |
|||
} catch (IOException e) { |
|||
return false; |
|||
} |
|||
|
|||
String dataDirPath; |
|||
try { |
|||
dataDirPath = new java.io.File(DEFAULT_CACHE_DIR).getCanonicalPath(); |
|||
} catch (IOException e) { |
|||
return false; |
|||
} |
|||
|
|||
return canonicalPath.startsWith(dataDirPath); |
|||
} |
|||
|
|||
private void saveData() { |
|||
System.out.println("注意: 当前没有爬取的数据。请先使用 crawl 命令爬取数据。"); |
|||
System.out.println("或者使用 crawl 命令爬取后,在提示时选择保存。"); |
|||
} |
|||
|
|||
private void loadDataWithRetry() { |
|||
while (true) { |
|||
try { |
|||
loadData(); |
|||
break; |
|||
} catch (CacheException e) { |
|||
System.err.println(e.getMessage()); |
|||
String retry = readInput("是否重新输入路径? (y/n): ").toLowerCase(); |
|||
if (!retry.equals("y") && !retry.equals("yes")) { |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
private void loadData() throws CacheException { |
|||
String filePath = readInput("请输入读取路径 (默认: " + DEFAULT_CACHE_FILE + "): "); |
|||
if (filePath.isEmpty()) { |
|||
filePath = DEFAULT_CACHE_FILE; |
|||
} |
|||
|
|||
java.io.File file = new java.io.File(filePath); |
|||
if (!file.exists()) { |
|||
throw new CacheException("文件不存在", filePath); |
|||
} |
|||
|
|||
if (!file.isFile()) { |
|||
throw new CacheException("路径不是有效的文件", filePath); |
|||
} |
|||
|
|||
if (!file.canRead()) { |
|||
throw new CacheException("文件不可读取", filePath); |
|||
} |
|||
|
|||
try { |
|||
List<CrawlerData> dataList = JsonSerializer.deserializeFromFile(filePath); |
|||
view.showData(dataList); |
|||
} catch (IOException e) { |
|||
throw new CacheException("读取失败: " + e.getMessage(), filePath, e); |
|||
} |
|||
} |
|||
|
|||
private void listCacheFiles() { |
|||
java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR); |
|||
if (!cacheDir.exists() || !cacheDir.isDirectory()) { |
|||
System.out.println("数据目录不存在"); |
|||
return; |
|||
} |
|||
|
|||
java.io.File[] files = cacheDir.listFiles((dir, name) -> name.endsWith(".json")); |
|||
if (files == null || files.length == 0) { |
|||
System.out.println("数据目录中没有JSON文件"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("========================================"); |
|||
System.out.println("数据文件列表:"); |
|||
System.out.println("========================================"); |
|||
for (java.io.File file : files) { |
|||
System.out.println("- " + file.getName() + " (" + file.length() + " bytes)"); |
|||
} |
|||
} |
|||
|
|||
private void deleteCacheFile() { |
|||
java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR); |
|||
if (!cacheDir.exists() || !cacheDir.isDirectory()) { |
|||
System.out.println("数据目录不存在"); |
|||
return; |
|||
} |
|||
|
|||
java.io.File[] files = cacheDir.listFiles((dir, name) -> name.endsWith(".json")); |
|||
if (files == null || files.length == 0) { |
|||
System.out.println("数据目录中没有JSON文件"); |
|||
return; |
|||
} |
|||
|
|||
System.out.println("========================================"); |
|||
System.out.println("可选删除的文件:"); |
|||
System.out.println("========================================"); |
|||
for (int i = 0; i < files.length; i++) { |
|||
System.out.println("[" + (i + 1) + "] " + files[i].getName() + " (" + files[i].length() + " bytes)"); |
|||
} |
|||
System.out.println("[all] 删除所有文件"); |
|||
System.out.println("========================================"); |
|||
|
|||
String input = readInput("请输入要删除的文件序号或 'all': "); |
|||
|
|||
if (input.equalsIgnoreCase("all")) { |
|||
System.out.print("确定要删除所有文件吗? (y/n): "); |
|||
String confirm = scanner.nextLine().trim().toLowerCase(); |
|||
if (confirm.equals("y") || confirm.equals("yes")) { |
|||
int count = 0; |
|||
for (java.io.File file : files) { |
|||
if (file.delete()) { |
|||
count++; |
|||
} |
|||
} |
|||
System.out.println("已删除 " + count + " 个文件"); |
|||
} else { |
|||
System.out.println("取消删除"); |
|||
} |
|||
} else { |
|||
try { |
|||
int index = Integer.parseInt(input) - 1; |
|||
if (index >= 0 && index < files.length) { |
|||
java.io.File file = files[index]; |
|||
System.out.print("确定要删除 '" + file.getName() + "' 吗? (y/n): "); |
|||
String confirm = scanner.nextLine().trim().toLowerCase(); |
|||
if (confirm.equals("y") || confirm.equals("yes")) { |
|||
if (file.delete()) { |
|||
System.out.println("已删除: " + file.getName()); |
|||
} else { |
|||
System.out.println("删除失败"); |
|||
} |
|||
} else { |
|||
System.out.println("取消删除"); |
|||
} |
|||
} else { |
|||
System.out.println("无效的序号"); |
|||
} |
|||
} catch (NumberFormatException e) { |
|||
System.out.println("无效输入,请输入数字序号或 'all'"); |
|||
} |
|||
} |
|||
} |
|||
|
|||
private String readInputWithRetry(String prompt) throws CacheException { |
|||
String input = readInput(prompt); |
|||
if (input == null || input.trim().isEmpty()) { |
|||
return input; |
|||
} |
|||
|
|||
java.io.File file = new java.io.File(input); |
|||
java.io.File parentDir = file.getParentFile(); |
|||
|
|||
if (parentDir != null && !parentDir.exists()) { |
|||
System.out.println("警告: 父目录不存在,将自动创建"); |
|||
} |
|||
|
|||
return input; |
|||
} |
|||
|
|||
public static void saveDataWithPrompt(List<CrawlerData> dataList, String crawlerName) { |
|||
if (dataList == null || dataList.isEmpty()) { |
|||
return; |
|||
} |
|||
|
|||
String defaultFile = (crawlerName != null && !crawlerName.isEmpty()) |
|||
? getDefaultCacheFile(crawlerName) |
|||
: DEFAULT_CACHE_FILE; |
|||
|
|||
CrawlerView view = new CrawlerView(); |
|||
java.util.Scanner scanner = new java.util.Scanner(System.in); |
|||
System.out.print("\n是否保存爬取结果? (y/n): "); |
|||
String input = scanner.nextLine().trim().toLowerCase(); |
|||
|
|||
if (input.equals("y") || input.equals("yes")) { |
|||
while (true) { |
|||
String filePath = readInputWithScanner(scanner, "请输入保存路径 (默认: " + defaultFile + "): "); |
|||
if (filePath.isEmpty()) { |
|||
filePath = defaultFile; |
|||
} |
|||
|
|||
try { |
|||
validatePathStatic(filePath); |
|||
|
|||
JsonSerializer.serializeToFile(dataList, filePath); |
|||
|
|||
java.io.File savedFile = new java.io.File(filePath); |
|||
view.showCacheSuccess(filePath, dataList.size(), savedFile.length()); |
|||
break; |
|||
} catch (SecurityException e) { |
|||
System.err.println("安全错误: " + e.getMessage()); |
|||
String retry = readInputWithScanner(scanner, "是否重新输入路径? (y/n): "); |
|||
if (!retry.equals("y") && !retry.equals("yes")) { |
|||
break; |
|||
} |
|||
} catch (IOException e) { |
|||
System.err.println("保存失败: " + e.getMessage()); |
|||
String retry = readInputWithScanner(scanner, "是否重新输入路径? (y/n): "); |
|||
if (!retry.equals("y") && !retry.equals("yes")) { |
|||
break; |
|||
} |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
private static void validatePathStatic(String filePath) { |
|||
if (!isPathAllowedStatic(filePath)) { |
|||
throw new SecurityException("访问被拒绝: 无权访问该路径"); |
|||
} |
|||
} |
|||
|
|||
private static boolean isPathAllowedStatic(String filePath) { |
|||
if (filePath == null || filePath.isEmpty()) { |
|||
return false; |
|||
} |
|||
|
|||
java.io.File file = new java.io.File(filePath); |
|||
String canonicalPath; |
|||
try { |
|||
canonicalPath = file.getCanonicalPath(); |
|||
} catch (IOException e) { |
|||
return false; |
|||
} |
|||
|
|||
String dataDirPath; |
|||
try { |
|||
dataDirPath = new java.io.File(DEFAULT_CACHE_DIR).getCanonicalPath(); |
|||
} catch (IOException e) { |
|||
return false; |
|||
} |
|||
|
|||
return canonicalPath.startsWith(dataDirPath); |
|||
} |
|||
|
|||
private static String readInputWithScanner(java.util.Scanner scanner, String prompt) { |
|||
System.out.print(prompt); |
|||
return scanner.nextLine().trim(); |
|||
} |
|||
} |
|||
@ -0,0 +1,7 @@ |
|||
package com.crawler.command; |
|||
|
|||
public interface Command { |
|||
String getName(); |
|||
String getDescription(); |
|||
void execute(); |
|||
} |
|||
@ -0,0 +1,63 @@ |
|||
package com.crawler.command; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.Scanner; |
|||
|
|||
public class CommandController { |
|||
private List<Command> commands; |
|||
private Scanner scanner; |
|||
private CommandHistory history; |
|||
|
|||
public CommandController() { |
|||
commands = new ArrayList<>(); |
|||
scanner = new Scanner(System.in); |
|||
history = CommandHistory.getInstance(); |
|||
initCommands(); |
|||
} |
|||
|
|||
private void initCommands() { |
|||
HelpCommand helpCmd = new HelpCommand(commands); |
|||
commands.add(helpCmd); |
|||
commands.add(new ListCommand()); |
|||
commands.add(new CrawlCommand()); |
|||
commands.add(new CacheCommand()); |
|||
commands.add(new ExitCommand()); |
|||
} |
|||
|
|||
public void start() { |
|||
System.out.println("========================================"); |
|||
System.out.println("Java爬虫框架 - 命令行模式"); |
|||
System.out.println("========================================"); |
|||
System.out.println("输入 'help' 查看可用指令"); |
|||
System.out.println("========================================"); |
|||
|
|||
while (true) { |
|||
System.out.print("> "); |
|||
String input = scanner.nextLine().trim().toLowerCase(); |
|||
|
|||
if (input.isEmpty()) { |
|||
continue; |
|||
} |
|||
|
|||
history.add(input); |
|||
executeCommand(input); |
|||
} |
|||
} |
|||
|
|||
private void executeCommand(String commandName) { |
|||
for (Command cmd : commands) { |
|||
if (cmd.getName().equals(commandName)) { |
|||
cmd.execute(); |
|||
return; |
|||
} |
|||
} |
|||
|
|||
System.out.println("未知指令: " + commandName); |
|||
System.out.println("输入 'help' 查看可用指令"); |
|||
} |
|||
|
|||
public void stop() { |
|||
scanner.close(); |
|||
} |
|||
} |
|||
@ -0,0 +1,36 @@ |
|||
package com.crawler.command; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class CommandHistory { |
|||
private static CommandHistory instance; |
|||
private List<String> history; |
|||
|
|||
private CommandHistory() { |
|||
history = new ArrayList<>(); |
|||
} |
|||
|
|||
public static CommandHistory getInstance() { |
|||
if (instance == null) { |
|||
instance = new CommandHistory(); |
|||
} |
|||
return instance; |
|||
} |
|||
|
|||
public void add(String command) { |
|||
history.add(command); |
|||
} |
|||
|
|||
public List<String> getHistory() { |
|||
return new ArrayList<>(history); |
|||
} |
|||
|
|||
public void clear() { |
|||
history.clear(); |
|||
} |
|||
|
|||
public int size() { |
|||
return history.size(); |
|||
} |
|||
} |
|||
Loading…
Reference in new issue