You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 

363 lines
13 KiB

package com.crawler.command;
import com.crawler.exception.CacheException;
import com.crawler.model.CrawlerData;
import com.crawler.util.JsonSerializer;
import com.crawler.view.CrawlerView;
import java.io.IOException;
import java.util.List;
public class CacheCommand extends BaseCommand {
private static final String DEFAULT_CACHE_DIR = "data";
private static final String DEFAULT_CACHE_FILE = DEFAULT_CACHE_DIR + "/crawler_data.json";
public static String getDefaultCacheFile(String crawlerName) {
String safeName = sanitizeFileName(crawlerName);
return DEFAULT_CACHE_DIR + "/" + safeName + ".json";
}
private static String sanitizeFileName(String name) {
return name.replaceAll("[^a-zA-Z0-9\\u4e00-\\u9fa5-_]", "_");
}
private CrawlerView view;
static {
initCacheDir();
}
private static void initCacheDir() {
java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR);
if (!cacheDir.exists()) {
boolean created = cacheDir.mkdirs();
if (created) {
System.out.println("已创建数据目录: " + DEFAULT_CACHE_DIR);
}
}
}
public CacheCommand() {
this.view = new CrawlerView();
}
@Override
public String getName() {
return "cache";
}
@Override
public String getDescription() {
return "缓存操作: save - 保存数据, load - 读取数据, list - 查看缓存文件, delete - 删除缓存文件";
}
@Override
public void execute() {
String action = readInput("请输入缓存操作 (save/load/list/delete): ").toLowerCase();
switch (action) {
case "save":
saveData();
break;
case "load":
loadDataWithRetry();
break;
case "list":
listCacheFiles();
break;
case "delete":
deleteCacheFile();
break;
default:
System.out.println("未知操作: " + action);
System.out.println("可用操作: save, load, list, delete");
}
}
private void saveData(List<CrawlerData> dataList, String crawlerName) throws CacheException {
if (dataList == null || dataList.isEmpty()) {
throw new CacheException("没有数据可保存");
}
String defaultFile = (crawlerName != null && !crawlerName.isEmpty())
? getDefaultCacheFile(crawlerName)
: DEFAULT_CACHE_FILE;
String filePath = readInputWithRetry("请输入保存路径 (默认: " + defaultFile + "): ");
if (filePath.isEmpty()) {
filePath = defaultFile;
}
validatePath(filePath);
try {
JsonSerializer.serializeToFile(dataList, filePath);
java.io.File savedFile = new java.io.File(filePath);
view.showCacheSuccess(filePath, dataList.size(), savedFile.length());
} catch (IOException e) {
throw new CacheException("保存失败: " + e.getMessage(), filePath, e);
}
}
private void saveData(List<CrawlerData> dataList) throws CacheException {
saveData(dataList, null);
}
private void validatePath(String filePath) {
if (!isPathAllowed(filePath)) {
throw new SecurityException("访问被拒绝: 无权访问该路径");
}
}
private boolean isPathAllowed(String filePath) {
if (filePath == null || filePath.isEmpty()) {
return false;
}
java.io.File file = new java.io.File(filePath);
String canonicalPath;
try {
canonicalPath = file.getCanonicalPath();
} catch (IOException e) {
return false;
}
String dataDirPath;
try {
dataDirPath = new java.io.File(DEFAULT_CACHE_DIR).getCanonicalPath();
} catch (IOException e) {
return false;
}
return canonicalPath.startsWith(dataDirPath);
}
private void saveData() {
System.out.println("注意: 当前没有爬取的数据。请先使用 crawl 命令爬取数据。");
System.out.println("或者使用 crawl 命令爬取后,在提示时选择保存。");
}
private void loadDataWithRetry() {
while (true) {
try {
loadData();
break;
} catch (CacheException e) {
System.err.println(e.getMessage());
String retry = readInput("是否重新输入路径? (y/n): ").toLowerCase();
if (!retry.equals("y") && !retry.equals("yes")) {
break;
}
}
}
}
private void loadData() throws CacheException {
String filePath = readInput("请输入读取路径 (默认: " + DEFAULT_CACHE_FILE + "): ");
if (filePath.isEmpty()) {
filePath = DEFAULT_CACHE_FILE;
}
java.io.File file = new java.io.File(filePath);
if (!file.exists()) {
throw new CacheException("文件不存在", filePath);
}
if (!file.isFile()) {
throw new CacheException("路径不是有效的文件", filePath);
}
if (!file.canRead()) {
throw new CacheException("文件不可读取", filePath);
}
try {
List<CrawlerData> dataList = JsonSerializer.deserializeFromFile(filePath);
view.showData(dataList);
} catch (IOException e) {
throw new CacheException("读取失败: " + e.getMessage(), filePath, e);
}
}
private void listCacheFiles() {
java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR);
if (!cacheDir.exists() || !cacheDir.isDirectory()) {
System.out.println("数据目录不存在");
return;
}
java.io.File[] files = cacheDir.listFiles((dir, name) -> name.endsWith(".json"));
if (files == null || files.length == 0) {
System.out.println("数据目录中没有JSON文件");
return;
}
System.out.println("========================================");
System.out.println("数据文件列表:");
System.out.println("========================================");
for (java.io.File file : files) {
System.out.println("- " + file.getName() + " (" + file.length() + " bytes)");
}
}
private void deleteCacheFile() {
java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR);
if (!cacheDir.exists() || !cacheDir.isDirectory()) {
System.out.println("数据目录不存在");
return;
}
java.io.File[] files = cacheDir.listFiles((dir, name) -> name.endsWith(".json"));
if (files == null || files.length == 0) {
System.out.println("数据目录中没有JSON文件");
return;
}
System.out.println("========================================");
System.out.println("可选删除的文件:");
System.out.println("========================================");
for (int i = 0; i < files.length; i++) {
System.out.println("[" + (i + 1) + "] " + files[i].getName() + " (" + files[i].length() + " bytes)");
}
System.out.println("[all] 删除所有文件");
System.out.println("========================================");
String input = readInput("请输入要删除的文件序号或 'all': ");
if (input.equalsIgnoreCase("all")) {
System.out.print("确定要删除所有文件吗? (y/n): ");
String confirm = scanner.nextLine().trim().toLowerCase();
if (confirm.equals("y") || confirm.equals("yes")) {
int count = 0;
for (java.io.File file : files) {
if (file.delete()) {
count++;
}
}
System.out.println("已删除 " + count + " 个文件");
} else {
System.out.println("取消删除");
}
} else {
try {
int index = Integer.parseInt(input) - 1;
if (index >= 0 && index < files.length) {
java.io.File file = files[index];
System.out.print("确定要删除 '" + file.getName() + "' 吗? (y/n): ");
String confirm = scanner.nextLine().trim().toLowerCase();
if (confirm.equals("y") || confirm.equals("yes")) {
if (file.delete()) {
System.out.println("已删除: " + file.getName());
} else {
System.out.println("删除失败");
}
} else {
System.out.println("取消删除");
}
} else {
System.out.println("无效的序号");
}
} catch (NumberFormatException e) {
System.out.println("无效输入,请输入数字序号或 'all'");
}
}
}
private String readInputWithRetry(String prompt) throws CacheException {
String input = readInput(prompt);
if (input == null || input.trim().isEmpty()) {
return input;
}
java.io.File file = new java.io.File(input);
java.io.File parentDir = file.getParentFile();
if (parentDir != null && !parentDir.exists()) {
System.out.println("警告: 父目录不存在,将自动创建");
}
return input;
}
public static void saveDataWithPrompt(List<CrawlerData> dataList, String crawlerName) {
if (dataList == null || dataList.isEmpty()) {
return;
}
String defaultFile = (crawlerName != null && !crawlerName.isEmpty())
? getDefaultCacheFile(crawlerName)
: DEFAULT_CACHE_FILE;
CrawlerView view = new CrawlerView();
java.util.Scanner scanner = new java.util.Scanner(System.in);
System.out.print("\n是否保存爬取结果? (y/n): ");
String input = scanner.nextLine().trim().toLowerCase();
if (input.equals("y") || input.equals("yes")) {
while (true) {
String filePath = readInputWithScanner(scanner, "请输入保存路径 (默认: " + defaultFile + "): ");
if (filePath.isEmpty()) {
filePath = defaultFile;
}
try {
validatePathStatic(filePath);
JsonSerializer.serializeToFile(dataList, filePath);
java.io.File savedFile = new java.io.File(filePath);
view.showCacheSuccess(filePath, dataList.size(), savedFile.length());
break;
} catch (SecurityException e) {
System.err.println("安全错误: " + e.getMessage());
String retry = readInputWithScanner(scanner, "是否重新输入路径? (y/n): ");
if (!retry.equals("y") && !retry.equals("yes")) {
break;
}
} catch (IOException e) {
System.err.println("保存失败: " + e.getMessage());
String retry = readInputWithScanner(scanner, "是否重新输入路径? (y/n): ");
if (!retry.equals("y") && !retry.equals("yes")) {
break;
}
}
}
}
}
private static void validatePathStatic(String filePath) {
if (!isPathAllowedStatic(filePath)) {
throw new SecurityException("访问被拒绝: 无权访问该路径");
}
}
private static boolean isPathAllowedStatic(String filePath) {
if (filePath == null || filePath.isEmpty()) {
return false;
}
java.io.File file = new java.io.File(filePath);
String canonicalPath;
try {
canonicalPath = file.getCanonicalPath();
} catch (IOException e) {
return false;
}
String dataDirPath;
try {
dataDirPath = new java.io.File(DEFAULT_CACHE_DIR).getCanonicalPath();
} catch (IOException e) {
return false;
}
return canonicalPath.startsWith(dataDirPath);
}
private static String readInputWithScanner(java.util.Scanner scanner, String prompt) {
System.out.print(prompt);
return scanner.nextLine().trim();
}
}