You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
363 lines
13 KiB
363 lines
13 KiB
package com.crawler.command;
|
|
|
|
import com.crawler.exception.CacheException;
|
|
import com.crawler.model.CrawlerData;
|
|
import com.crawler.util.JsonSerializer;
|
|
import com.crawler.view.CrawlerView;
|
|
|
|
import java.io.IOException;
|
|
import java.util.List;
|
|
|
|
public class CacheCommand extends BaseCommand {
|
|
|
|
private static final String DEFAULT_CACHE_DIR = "data";
|
|
private static final String DEFAULT_CACHE_FILE = DEFAULT_CACHE_DIR + "/crawler_data.json";
|
|
|
|
public static String getDefaultCacheFile(String crawlerName) {
|
|
String safeName = sanitizeFileName(crawlerName);
|
|
return DEFAULT_CACHE_DIR + "/" + safeName + ".json";
|
|
}
|
|
|
|
private static String sanitizeFileName(String name) {
|
|
return name.replaceAll("[^a-zA-Z0-9\\u4e00-\\u9fa5-_]", "_");
|
|
}
|
|
|
|
private CrawlerView view;
|
|
|
|
static {
|
|
initCacheDir();
|
|
}
|
|
|
|
private static void initCacheDir() {
|
|
java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR);
|
|
if (!cacheDir.exists()) {
|
|
boolean created = cacheDir.mkdirs();
|
|
if (created) {
|
|
System.out.println("已创建数据目录: " + DEFAULT_CACHE_DIR);
|
|
}
|
|
}
|
|
}
|
|
|
|
public CacheCommand() {
|
|
this.view = new CrawlerView();
|
|
}
|
|
|
|
@Override
|
|
public String getName() {
|
|
return "cache";
|
|
}
|
|
|
|
@Override
|
|
public String getDescription() {
|
|
return "缓存操作: save - 保存数据, load - 读取数据, list - 查看缓存文件, delete - 删除缓存文件";
|
|
}
|
|
|
|
@Override
|
|
public void execute() {
|
|
String action = readInput("请输入缓存操作 (save/load/list/delete): ").toLowerCase();
|
|
|
|
switch (action) {
|
|
case "save":
|
|
saveData();
|
|
break;
|
|
case "load":
|
|
loadDataWithRetry();
|
|
break;
|
|
case "list":
|
|
listCacheFiles();
|
|
break;
|
|
case "delete":
|
|
deleteCacheFile();
|
|
break;
|
|
default:
|
|
System.out.println("未知操作: " + action);
|
|
System.out.println("可用操作: save, load, list, delete");
|
|
}
|
|
}
|
|
|
|
private void saveData(List<CrawlerData> dataList, String crawlerName) throws CacheException {
|
|
if (dataList == null || dataList.isEmpty()) {
|
|
throw new CacheException("没有数据可保存");
|
|
}
|
|
|
|
String defaultFile = (crawlerName != null && !crawlerName.isEmpty())
|
|
? getDefaultCacheFile(crawlerName)
|
|
: DEFAULT_CACHE_FILE;
|
|
|
|
String filePath = readInputWithRetry("请输入保存路径 (默认: " + defaultFile + "): ");
|
|
if (filePath.isEmpty()) {
|
|
filePath = defaultFile;
|
|
}
|
|
|
|
validatePath(filePath);
|
|
|
|
try {
|
|
JsonSerializer.serializeToFile(dataList, filePath);
|
|
|
|
java.io.File savedFile = new java.io.File(filePath);
|
|
view.showCacheSuccess(filePath, dataList.size(), savedFile.length());
|
|
} catch (IOException e) {
|
|
throw new CacheException("保存失败: " + e.getMessage(), filePath, e);
|
|
}
|
|
}
|
|
|
|
private void saveData(List<CrawlerData> dataList) throws CacheException {
|
|
saveData(dataList, null);
|
|
}
|
|
|
|
private void validatePath(String filePath) {
|
|
if (!isPathAllowed(filePath)) {
|
|
throw new SecurityException("访问被拒绝: 无权访问该路径");
|
|
}
|
|
}
|
|
|
|
private boolean isPathAllowed(String filePath) {
|
|
if (filePath == null || filePath.isEmpty()) {
|
|
return false;
|
|
}
|
|
|
|
java.io.File file = new java.io.File(filePath);
|
|
String canonicalPath;
|
|
try {
|
|
canonicalPath = file.getCanonicalPath();
|
|
} catch (IOException e) {
|
|
return false;
|
|
}
|
|
|
|
String dataDirPath;
|
|
try {
|
|
dataDirPath = new java.io.File(DEFAULT_CACHE_DIR).getCanonicalPath();
|
|
} catch (IOException e) {
|
|
return false;
|
|
}
|
|
|
|
return canonicalPath.startsWith(dataDirPath);
|
|
}
|
|
|
|
private void saveData() {
|
|
System.out.println("注意: 当前没有爬取的数据。请先使用 crawl 命令爬取数据。");
|
|
System.out.println("或者使用 crawl 命令爬取后,在提示时选择保存。");
|
|
}
|
|
|
|
private void loadDataWithRetry() {
|
|
while (true) {
|
|
try {
|
|
loadData();
|
|
break;
|
|
} catch (CacheException e) {
|
|
System.err.println(e.getMessage());
|
|
String retry = readInput("是否重新输入路径? (y/n): ").toLowerCase();
|
|
if (!retry.equals("y") && !retry.equals("yes")) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private void loadData() throws CacheException {
|
|
String filePath = readInput("请输入读取路径 (默认: " + DEFAULT_CACHE_FILE + "): ");
|
|
if (filePath.isEmpty()) {
|
|
filePath = DEFAULT_CACHE_FILE;
|
|
}
|
|
|
|
java.io.File file = new java.io.File(filePath);
|
|
if (!file.exists()) {
|
|
throw new CacheException("文件不存在", filePath);
|
|
}
|
|
|
|
if (!file.isFile()) {
|
|
throw new CacheException("路径不是有效的文件", filePath);
|
|
}
|
|
|
|
if (!file.canRead()) {
|
|
throw new CacheException("文件不可读取", filePath);
|
|
}
|
|
|
|
try {
|
|
List<CrawlerData> dataList = JsonSerializer.deserializeFromFile(filePath);
|
|
view.showData(dataList);
|
|
} catch (IOException e) {
|
|
throw new CacheException("读取失败: " + e.getMessage(), filePath, e);
|
|
}
|
|
}
|
|
|
|
private void listCacheFiles() {
|
|
java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR);
|
|
if (!cacheDir.exists() || !cacheDir.isDirectory()) {
|
|
System.out.println("数据目录不存在");
|
|
return;
|
|
}
|
|
|
|
java.io.File[] files = cacheDir.listFiles((dir, name) -> name.endsWith(".json"));
|
|
if (files == null || files.length == 0) {
|
|
System.out.println("数据目录中没有JSON文件");
|
|
return;
|
|
}
|
|
|
|
System.out.println("========================================");
|
|
System.out.println("数据文件列表:");
|
|
System.out.println("========================================");
|
|
for (java.io.File file : files) {
|
|
System.out.println("- " + file.getName() + " (" + file.length() + " bytes)");
|
|
}
|
|
}
|
|
|
|
private void deleteCacheFile() {
|
|
java.io.File cacheDir = new java.io.File(DEFAULT_CACHE_DIR);
|
|
if (!cacheDir.exists() || !cacheDir.isDirectory()) {
|
|
System.out.println("数据目录不存在");
|
|
return;
|
|
}
|
|
|
|
java.io.File[] files = cacheDir.listFiles((dir, name) -> name.endsWith(".json"));
|
|
if (files == null || files.length == 0) {
|
|
System.out.println("数据目录中没有JSON文件");
|
|
return;
|
|
}
|
|
|
|
System.out.println("========================================");
|
|
System.out.println("可选删除的文件:");
|
|
System.out.println("========================================");
|
|
for (int i = 0; i < files.length; i++) {
|
|
System.out.println("[" + (i + 1) + "] " + files[i].getName() + " (" + files[i].length() + " bytes)");
|
|
}
|
|
System.out.println("[all] 删除所有文件");
|
|
System.out.println("========================================");
|
|
|
|
String input = readInput("请输入要删除的文件序号或 'all': ");
|
|
|
|
if (input.equalsIgnoreCase("all")) {
|
|
System.out.print("确定要删除所有文件吗? (y/n): ");
|
|
String confirm = scanner.nextLine().trim().toLowerCase();
|
|
if (confirm.equals("y") || confirm.equals("yes")) {
|
|
int count = 0;
|
|
for (java.io.File file : files) {
|
|
if (file.delete()) {
|
|
count++;
|
|
}
|
|
}
|
|
System.out.println("已删除 " + count + " 个文件");
|
|
} else {
|
|
System.out.println("取消删除");
|
|
}
|
|
} else {
|
|
try {
|
|
int index = Integer.parseInt(input) - 1;
|
|
if (index >= 0 && index < files.length) {
|
|
java.io.File file = files[index];
|
|
System.out.print("确定要删除 '" + file.getName() + "' 吗? (y/n): ");
|
|
String confirm = scanner.nextLine().trim().toLowerCase();
|
|
if (confirm.equals("y") || confirm.equals("yes")) {
|
|
if (file.delete()) {
|
|
System.out.println("已删除: " + file.getName());
|
|
} else {
|
|
System.out.println("删除失败");
|
|
}
|
|
} else {
|
|
System.out.println("取消删除");
|
|
}
|
|
} else {
|
|
System.out.println("无效的序号");
|
|
}
|
|
} catch (NumberFormatException e) {
|
|
System.out.println("无效输入,请输入数字序号或 'all'");
|
|
}
|
|
}
|
|
}
|
|
|
|
private String readInputWithRetry(String prompt) throws CacheException {
|
|
String input = readInput(prompt);
|
|
if (input == null || input.trim().isEmpty()) {
|
|
return input;
|
|
}
|
|
|
|
java.io.File file = new java.io.File(input);
|
|
java.io.File parentDir = file.getParentFile();
|
|
|
|
if (parentDir != null && !parentDir.exists()) {
|
|
System.out.println("警告: 父目录不存在,将自动创建");
|
|
}
|
|
|
|
return input;
|
|
}
|
|
|
|
public static void saveDataWithPrompt(List<CrawlerData> dataList, String crawlerName) {
|
|
if (dataList == null || dataList.isEmpty()) {
|
|
return;
|
|
}
|
|
|
|
String defaultFile = (crawlerName != null && !crawlerName.isEmpty())
|
|
? getDefaultCacheFile(crawlerName)
|
|
: DEFAULT_CACHE_FILE;
|
|
|
|
CrawlerView view = new CrawlerView();
|
|
java.util.Scanner scanner = new java.util.Scanner(System.in);
|
|
System.out.print("\n是否保存爬取结果? (y/n): ");
|
|
String input = scanner.nextLine().trim().toLowerCase();
|
|
|
|
if (input.equals("y") || input.equals("yes")) {
|
|
while (true) {
|
|
String filePath = readInputWithScanner(scanner, "请输入保存路径 (默认: " + defaultFile + "): ");
|
|
if (filePath.isEmpty()) {
|
|
filePath = defaultFile;
|
|
}
|
|
|
|
try {
|
|
validatePathStatic(filePath);
|
|
|
|
JsonSerializer.serializeToFile(dataList, filePath);
|
|
|
|
java.io.File savedFile = new java.io.File(filePath);
|
|
view.showCacheSuccess(filePath, dataList.size(), savedFile.length());
|
|
break;
|
|
} catch (SecurityException e) {
|
|
System.err.println("安全错误: " + e.getMessage());
|
|
String retry = readInputWithScanner(scanner, "是否重新输入路径? (y/n): ");
|
|
if (!retry.equals("y") && !retry.equals("yes")) {
|
|
break;
|
|
}
|
|
} catch (IOException e) {
|
|
System.err.println("保存失败: " + e.getMessage());
|
|
String retry = readInputWithScanner(scanner, "是否重新输入路径? (y/n): ");
|
|
if (!retry.equals("y") && !retry.equals("yes")) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
private static void validatePathStatic(String filePath) {
|
|
if (!isPathAllowedStatic(filePath)) {
|
|
throw new SecurityException("访问被拒绝: 无权访问该路径");
|
|
}
|
|
}
|
|
|
|
private static boolean isPathAllowedStatic(String filePath) {
|
|
if (filePath == null || filePath.isEmpty()) {
|
|
return false;
|
|
}
|
|
|
|
java.io.File file = new java.io.File(filePath);
|
|
String canonicalPath;
|
|
try {
|
|
canonicalPath = file.getCanonicalPath();
|
|
} catch (IOException e) {
|
|
return false;
|
|
}
|
|
|
|
String dataDirPath;
|
|
try {
|
|
dataDirPath = new java.io.File(DEFAULT_CACHE_DIR).getCanonicalPath();
|
|
} catch (IOException e) {
|
|
return false;
|
|
}
|
|
|
|
return canonicalPath.startsWith(dataDirPath);
|
|
}
|
|
|
|
private static String readInputWithScanner(java.util.Scanner scanner, String prompt) {
|
|
System.out.print(prompt);
|
|
return scanner.nextLine().trim();
|
|
}
|
|
}
|
|
|