commit a52792f019da9b32c3bf67a09bb9b8760ee400d7
Author: Chengwuyi <3394813085@qq.com>
Date: Sat May 30 21:44:26 2026 +0800
提交项目源码
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..8b21d0d
Binary files /dev/null and b/.gitignore differ
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..f796551
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,45 @@
+{
+ "version": "0.2.0",
+ "configurations": [
+ {
+ "type": "java",
+ "name": "CrawlerScheduler",
+ "request": "launch",
+ "mainClass": "com.ski.crawler.spider.CrawlerScheduler",
+ "projectName": "crawler"
+ },
+ {
+ "type": "java",
+ "name": "Main (no proxy)",
+ "request": "launch",
+ "mainClass": "com.ski.crawler.Main",
+ "args": [
+ "crawl", "--site skiresort", "--limit 5", "--proxy 127.0.0.1:7890", "--timeout 60000", "--retry 2", "--show-failures"
+ ]
+ },
+ {
+ "type": "java",
+ "name": "Main (real, Clash proxy)",
+ "request": "launch",
+ "mainClass": "com.ski.crawler.Main",
+ "vmArgs": [
+ "-Djava.net.useSystemProxies=true",
+ "-Dhttp.proxyHost=127.0.0.1",
+ "-Dhttp.proxyPort=7890",
+ "-Dhttps.proxyHost=127.0.0.1",
+ "-Dhttps.proxyPort=7890"
+ ]
+ },
+ {
+ "type": "java",
+ "name": "Main (real, Clash SOCKS5)",
+ "request": "launch",
+ "mainClass": "com.ski.crawler.Main",
+ "vmArgs": [
+ "-Djava.net.useSystemProxies=true",
+ "-DsocksProxyHost=127.0.0.1",
+ "-DsocksProxyPort=7891"
+ ]
+ }
+ ]
+}
diff --git a/pom.xml b/pom.xml
new file mode 100644
index 0000000..ea28f6c
--- /dev/null
+++ b/pom.xml
@@ -0,0 +1,132 @@
+
+
+ 4.0.0
+
+ com.ski
+ crawler
+ 1.0.0
+ jar
+
+ Web Crawler
+ A Java web crawler project
+
+
+ 11
+ 11
+ UTF-8
+ 1.15.3
+ 4.5.13
+ 2.15.2
+ 5.2.5
+
+
+
+
+
+ org.jsoup
+ jsoup
+ ${jsoup.version}
+
+
+
+
+ org.apache.httpcomponents
+ httpclient
+ ${httpclient.version}
+
+
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ ${jackson.version}
+
+
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+ ${jackson.version}
+
+
+
+
+ com.fasterxml.jackson.core
+ jackson-annotations
+ ${jackson.version}
+
+
+
+
+ org.projectlombok
+ lombok
+ 1.18.28
+ provided
+
+
+
+
+ org.slf4j
+ slf4j-api
+ 1.7.36
+
+
+
+ ch.qos.logback
+ logback-classic
+ 1.2.12
+
+
+
+
+ junit
+ junit
+ 4.13.2
+ test
+
+
+
+ org.apache.poi
+ poi-ooxml
+ ${poi.version}
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.11.0
+
+ 11
+ 11
+
+
+
+
+ org.apache.maven.plugins
+ maven-shade-plugin
+ 3.4.1
+
+
+ package
+
+ shade
+
+
+
+
+ com.ski.crawler.Main
+
+
+
+
+
+
+
+
+
diff --git a/result.jsonl b/result.jsonl
new file mode 100644
index 0000000..a1550e6
--- /dev/null
+++ b/result.jsonl
@@ -0,0 +1,20 @@
+{"id":null,"name":"Thredbo","country":"Australia","region":"Oceania","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/1121#ski-map-42368","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Valle Nevado","country":"Chile","region":"Americas","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/1144#ski-map-42367","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Las Leñas","country":"Argentina","region":"Americas","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/1129#ski-map-42366","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Damüls-Mellau Au, Damüls, Mellau","country":"Vorarlberg","region":"Austria","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/2700#ski-map-37810","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Appalachian Ski Mtn.","country":"North Carolina","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/285#ski-map-34865","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Las Leñas","country":"Argentina","region":"Americas","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/1129#ski-map-42365","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Blue Mountain","country":"Ontario","region":"Canada","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/113#ski-map-39542","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Smugglers' Notch Resort","country":"Vermont","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/209#ski-map-6815","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Granlibakken Ski Resort","country":"California","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/535#ski-map-40733","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Magic Mountain","country":"Vermont","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/201#ski-map-7492","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Bromley Mountain","country":"Vermont","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/217#ski-map-4224","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Magic Mountain","country":"Vermont","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/201#ski-map-6965","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Wurmberg","country":"Central Uplands","region":"Germany","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/4190#ski-map-7596","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Vail","country":"Colorado","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/507#ski-map-2580","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"King Pine Ski Area","country":"New Hampshire","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/354#ski-map-11664","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Pigeon Mountain","country":"Alberta","region":"Canada","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/2131#ski-map-23689","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"The Pines","country":"New York","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/4872#ski-map-10199","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Ski Cooper","country":"Colorado","region":"United States","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/512#ski-map-6863","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Staller Sattel","country":"Tyrol","region":"Austria","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/12393#ski-map-17983","sourceSite":"skimap","crawlTime":null}
+{"id":null,"name":"Val Neigette","country":"Quebec","region":"Canada","latitude":null,"longitude":null,"altitudeMin":null,"altitudeMax":null,"totalKm":null,"slopeCount":null,"liftCount":null,"ticketPriceMin":null,"ticketPriceMax":null,"currency":null,"openTime":null,"snowDepthCm":null,"temperatureC":null,"nearbyHotels":null,"rentalShops":null,"url":"https://skimap.org/skiareas/view/2205#ski-map-2834","sourceSite":"skimap","crawlTime":null}
diff --git a/src/main/java/com/ski/crawler/Main.java b/src/main/java/com/ski/crawler/Main.java
new file mode 100644
index 0000000..99c4ea1
--- /dev/null
+++ b/src/main/java/com/ski/crawler/Main.java
@@ -0,0 +1,40 @@
+package com.ski.crawler;
+
+import com.ski.crawler.command.CrawlCommand;
+import com.ski.crawler.command.ExportCommand;
+import com.ski.crawler.command.FilterCommand;
+import com.ski.crawler.command.HelpCommand;
+import com.ski.crawler.command.ListCommand;
+import com.ski.crawler.command.ResumeCommand;
+import com.ski.crawler.command.SitesCommand;
+import com.ski.crawler.command.StatsCommand;
+import com.ski.crawler.controller.CrawlerContext;
+import com.ski.crawler.controller.CrawlerController;
+import com.ski.crawler.factory.StrategyFactory;
+import com.ski.crawler.repository.SkiResortRepository;
+import com.ski.crawler.service.ScraperService;
+
+public class Main {
+ public static void main(String[] args) {
+ try {
+ SkiResortRepository repo = new SkiResortRepository();
+ StrategyFactory factory = new StrategyFactory();
+ ScraperService service = new ScraperService();
+ CrawlerContext context = new CrawlerContext(repo, factory, service);
+
+ CrawlerController controller = new CrawlerController(
+ new CrawlCommand(),
+ new ListCommand(),
+ new FilterCommand(),
+ new ExportCommand(),
+ new ResumeCommand(),
+ new StatsCommand(),
+ new SitesCommand(),
+ new HelpCommand()
+ );
+ controller.run(args, context);
+ } catch (Exception e) {
+ System.err.println("Crawler failed: " + e.getMessage());
+ }
+ }
+}
diff --git a/src/main/java/com/ski/crawler/command/Command.java b/src/main/java/com/ski/crawler/command/Command.java
new file mode 100644
index 0000000..699ea81
--- /dev/null
+++ b/src/main/java/com/ski/crawler/command/Command.java
@@ -0,0 +1,10 @@
+package com.ski.crawler.command;
+
+import com.ski.crawler.controller.CrawlerContext;
+
+public interface Command {
+ String name();
+
+ void execute(String[] args, CrawlerContext context) throws Exception;
+}
+
diff --git a/src/main/java/com/ski/crawler/command/CrawlCommand.java b/src/main/java/com/ski/crawler/command/CrawlCommand.java
new file mode 100644
index 0000000..5b122e9
--- /dev/null
+++ b/src/main/java/com/ski/crawler/command/CrawlCommand.java
@@ -0,0 +1,253 @@
+package com.ski.crawler.command;
+
+import com.ski.crawler.controller.CrawlerContext;
+import com.ski.crawler.exception.NetworkException;
+import com.ski.crawler.factory.StrategyFactory;
+import com.ski.crawler.repository.SkiResortRepository;
+import com.ski.crawler.service.ScraperService;
+import com.ski.crawler.strategy.CrawlStrategy;
+import com.ski.crawler.util.CliArgs;
+import com.ski.crawler.util.ExcelUtil;
+import com.ski.crawler.utils.CrawlerHttp;
+import com.ski.crawler.view.ConsoleView;
+
+import java.util.Arrays;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Locale;
+import java.util.Map;
+
+public class CrawlCommand implements Command {
+ @Override
+ public String name() {
+ return "crawl";
+ }
+
+ @Override
+ public void execute(String[] args, CrawlerContext context) throws Exception {
+ Map opts = CliArgs.parseOptions(args, 1);
+
+ String siteId = normalizeSite(opts.getOrDefault("site", "skiresort"));
+ int limit = parseLimit(opts.get("limit"), 100);
+ int threads = CliArgs.parseInt(opts.get("threads"), 3);
+ int timeoutMs = CliArgs.parseInt(opts.get("timeout"), 20000);
+ int retry = CliArgs.parseInt(opts.get("retry"), 3);
+ long retrySleep = CliArgs.parseInt(opts.get("retry-sleep"), 1000);
+ boolean dryRun = CliArgs.parseBoolean(opts.get("dry-run"));
+ boolean full = CliArgs.parseBoolean(opts.get("full"));
+ boolean incremental = !full;
+ boolean noProxy = CliArgs.parseBoolean(opts.get("no-proxy"));
+ boolean color = CliArgs.parseBoolean(opts.get("color"));
+ boolean showFailures = CliArgs.parseBoolean(opts.get("show-failures"));
+ Integer widthArg = CliArgs.parseNullableInt(opts.get("width"));
+
+ String country = opts.get("country");
+ String startUrl = opts.get("start-url");
+ String outRaw = opts.get("out");
+ String out = (outRaw == null || outRaw.trim().isEmpty()) ? null : outRaw.trim();
+ String outJsonl = out;
+ String outXlsx = null;
+ if (out != null && out.toLowerCase(Locale.ROOT).endsWith(".xlsx")) {
+ outXlsx = out;
+ outJsonl = null;
+ }
+
+ String userAgent = opts.getOrDefault("ua", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36");
+
+ String proxyHost = opts.getOrDefault("proxy-host", "127.0.0.1");
+ int proxyPort = CliArgs.parseInt(opts.get("proxy-port"), 7890);
+ boolean proxyEnabled = !noProxy;
+ String proxy = opts.get("proxy");
+ if (proxy != null && !proxy.isEmpty()) {
+ String p = proxy.trim();
+ if (p.equalsIgnoreCase("none") || p.equalsIgnoreCase("off") || p.equalsIgnoreCase("false")) {
+ proxyEnabled = false;
+ } else {
+ int idx = p.lastIndexOf(':');
+ if (idx > 0 && idx < p.length() - 1) {
+ proxyHost = p.substring(0, idx);
+ proxyPort = CliArgs.parseInt(p.substring(idx + 1), proxyPort);
+ } else {
+ proxyHost = p;
+ }
+ }
+ }
+
+ CrawlerHttp http = new CrawlerHttp(userAgent, proxyHost, proxyPort, proxyEnabled, timeoutMs);
+ int width = resolveWidth(widthArg);
+ ConsoleView view = new ConsoleView(width, color);
+
+ StrategyFactory factory = context.strategies();
+ SkiResortRepository repo = context.repository();
+ ScraperService svc = context.scraper();
+
+ ScraperService.CrawlReport report;
+ if (siteId.equals("all")) {
+ if (outJsonl != null) {
+ System.err.println("When --site all, JSONL --out is not supported. Use --out result.xlsx or omit --out.");
+ return;
+ }
+ report = crawlAll(factory, svc, startUrl, limit, threads, country, http, repo, incremental, view, showFailures, dryRun, retry, retrySleep);
+ } else {
+ CrawlStrategy strategy = factory.create(siteId);
+ try {
+ report = svc.crawl(strategy, startUrl, limit, threads, country, http, repo, incremental, outJsonl, view, showFailures, dryRun, retry, retrySleep);
+ } catch (NetworkException e) {
+ throw e;
+ }
+ }
+
+ if (outXlsx != null) {
+ if (dryRun) {
+ System.err.println("dry-run is enabled, skip writing: " + outXlsx);
+ } else {
+ ExcelUtil.exportResortsBySiteToXlsx(repo.getAll(), outXlsx);
+ System.err.println("Excel exported: " + repo.getAll().size() + " -> " + outXlsx);
+ }
+ }
+
+ Map summary = new LinkedHashMap<>();
+ summary.put("site", report.site);
+ summary.put("total", report.total);
+ summary.put("success", report.success);
+ summary.put("filteredOut", report.filteredOut);
+ summary.put("skipped", report.skipped);
+ summary.put("failed", report.failed);
+ if (outXlsx != null && !dryRun) {
+ summary.put("out", outXlsx);
+ } else if (outJsonl != null && !dryRun) {
+ summary.put("out", outJsonl);
+ }
+
+ view.printSummary(summary, sortByValueDesc(report.byCountry), showFailures ? report.failures : null);
+ }
+
+ private String normalizeSite(String raw) {
+ if (raw == null) {
+ return "skiresort";
+ }
+ String t = raw.trim().toLowerCase(Locale.ROOT);
+ if (t.equals("wiki")) {
+ return "wikipedia";
+ }
+ return t;
+ }
+
+ private ScraperService.CrawlReport crawlAll(
+ StrategyFactory factory,
+ ScraperService svc,
+ String startUrl,
+ int limit,
+ int threads,
+ String countryFilter,
+ CrawlerHttp http,
+ SkiResortRepository repo,
+ boolean incremental,
+ ConsoleView view,
+ boolean showFailures,
+ boolean dryRun,
+ int retryAttempts,
+ long retrySleepMs
+ ) throws Exception {
+ List sites = Arrays.asList("skiresort", "wikipedia", "skimap");
+ Map byCountry = new LinkedHashMap<>();
+ List failures = new java.util.ArrayList<>();
+ int total = 0;
+ int success = 0;
+ int filteredOut = 0;
+ int skipped = 0;
+ int failed = 0;
+
+ for (String s : sites) {
+ CrawlStrategy strategy = factory.create(s);
+ try {
+ ScraperService.CrawlReport r = svc.crawl(strategy, null, limit, threads, countryFilter, http, repo, incremental, null, view, showFailures, dryRun, retryAttempts, retrySleepMs);
+ total += r.total;
+ success += r.success;
+ filteredOut += r.filteredOut;
+ skipped += r.skipped;
+ failed += r.failed;
+ mergeByCountry(byCountry, r.byCountry);
+ if (showFailures && r.failures != null) {
+ for (String f : r.failures) {
+ if (failures.size() >= 200) {
+ break;
+ }
+ failures.add(f);
+ }
+ }
+ } catch (Exception e) {
+ failed += 1;
+ if (showFailures && failures.size() < 200) {
+ failures.add("site=" + s + " [" + e.getClass().getSimpleName() + "] " + (e.getMessage() == null ? "" : e.getMessage()));
+ }
+ }
+ }
+
+ ScraperService.CrawlReport out = new ScraperService.CrawlReport();
+ out.site = "all";
+ out.total = total;
+ out.success = success;
+ out.filteredOut = filteredOut;
+ out.skipped = skipped;
+ out.failed = failed;
+ out.byCountry = byCountry;
+ out.failures = failures;
+ return out;
+ }
+
+ private void mergeByCountry(Map acc, Map add) {
+ if (acc == null || add == null || add.isEmpty()) {
+ return;
+ }
+ for (Map.Entry e : add.entrySet()) {
+ if (e.getKey() == null) {
+ continue;
+ }
+ long v = e.getValue() == null ? 0L : e.getValue();
+ acc.put(e.getKey(), acc.getOrDefault(e.getKey(), 0L) + v);
+ }
+ }
+
+ private int parseLimit(String v, int def) {
+ if (v == null || v.trim().isEmpty()) {
+ return def;
+ }
+ String t = v.trim();
+ if (t.equalsIgnoreCase("all")) {
+ return -1;
+ }
+ try {
+ int n = Integer.parseInt(t);
+ return n <= 0 ? def : n;
+ } catch (Exception e) {
+ return def;
+ }
+ }
+
+ private int resolveWidth(Integer widthArg) {
+ if (widthArg != null && widthArg > 20) {
+ return widthArg;
+ }
+ String cols = System.getenv("COLUMNS");
+ if (cols != null) {
+ try {
+ int n = Integer.parseInt(cols.trim());
+ if (n > 20) {
+ return n;
+ }
+ } catch (Exception ignored) {
+ }
+ }
+ return 120;
+ }
+
+ private Map sortByValueDesc(Map m) {
+ if (m == null || m.isEmpty()) {
+ return m;
+ }
+ return m.entrySet().stream()
+ .sorted((a, b) -> Long.compare(b.getValue(), a.getValue()))
+ .collect(LinkedHashMap::new, (acc, e) -> acc.put(e.getKey(), e.getValue()), Map::putAll);
+ }
+}
diff --git a/src/main/java/com/ski/crawler/command/ExportCommand.java b/src/main/java/com/ski/crawler/command/ExportCommand.java
new file mode 100644
index 0000000..a625cf9
--- /dev/null
+++ b/src/main/java/com/ski/crawler/command/ExportCommand.java
@@ -0,0 +1,77 @@
+package com.ski.crawler.command;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.ski.crawler.controller.CrawlerContext;
+import com.ski.crawler.model.SkiResort;
+import com.ski.crawler.repository.SkiResortRepository;
+import com.ski.crawler.util.CliArgs;
+import com.ski.crawler.util.ExcelUtil;
+import com.ski.crawler.util.JsonUtil;
+
+import java.io.BufferedWriter;
+import java.util.Locale;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+
+public class ExportCommand implements Command {
+ @Override
+ public String name() {
+ return "export";
+ }
+
+ @Override
+ public void execute(String[] args, CrawlerContext context) throws Exception {
+ Map opts = CliArgs.parseOptions(args, 1);
+ String out = opts.get("out");
+ if (out == null || out.trim().isEmpty()) {
+ System.err.println("Missing --out ");
+ return;
+ }
+
+ SkiResortRepository repo = context.repository();
+ List all = repo.getAll();
+ String path = out.trim();
+ if (path.toLowerCase(Locale.ROOT).endsWith(".xlsx")) {
+ ExcelUtil.exportResortsBySiteToXlsx(all, path);
+ System.err.println("Exported: " + all.size() + " -> " + path);
+ return;
+ }
+
+ ObjectMapper mapper = JsonUtil.mapper();
+ try (BufferedWriter w = JsonUtil.openJsonlWriter(path)) {
+ for (SkiResort r : all) {
+ w.write(mapper.writeValueAsString(toJson(r)));
+ w.newLine();
+ }
+ }
+ System.err.println("Exported: " + all.size() + " -> " + path);
+ }
+
+ private Map toJson(SkiResort r) {
+ Map obj = new LinkedHashMap<>();
+ obj.put("id", r.getId());
+ obj.put("name", r.getName());
+ obj.put("country", r.getCountry());
+ obj.put("region", r.getRegion());
+ obj.put("latitude", r.getLatitude());
+ obj.put("longitude", r.getLongitude());
+ obj.put("altitudeMin", r.getAltitudeMin());
+ obj.put("altitudeMax", r.getAltitudeMax());
+ obj.put("totalKm", r.getTotalKm());
+ obj.put("slopeCount", r.getSlopeCount());
+ obj.put("liftCount", r.getLiftCount());
+ obj.put("ticketPriceMin", r.getTicketPriceMin());
+ obj.put("ticketPriceMax", r.getTicketPriceMax());
+ obj.put("currency", r.getCurrency());
+ obj.put("openTime", r.getOpenTime());
+ obj.put("snowDepthCm", r.getSnowDepthCm());
+ obj.put("temperatureC", r.getTemperatureC());
+ obj.put("nearbyHotels", r.getNearbyHotels());
+ obj.put("rentalShops", r.getRentalShops());
+ obj.put("url", r.getSourceUrl());
+ obj.put("sourceSite", r.getSourceSite());
+ obj.put("crawlTime", r.getCrawledAt() == null ? null : r.getCrawledAt().toString());
+ return obj;
+ }
+}
diff --git a/src/main/java/com/ski/crawler/command/FilterCommand.java b/src/main/java/com/ski/crawler/command/FilterCommand.java
new file mode 100644
index 0000000..3dfe9d5
--- /dev/null
+++ b/src/main/java/com/ski/crawler/command/FilterCommand.java
@@ -0,0 +1,18 @@
+package com.ski.crawler.command;
+
+import com.ski.crawler.controller.CrawlerContext;
+
+public class FilterCommand implements Command {
+ private final ListCommand delegate = new ListCommand();
+
+ @Override
+ public String name() {
+ return "filter";
+ }
+
+ @Override
+ public void execute(String[] args, CrawlerContext context) {
+ delegate.execute(args, context);
+ }
+}
+
diff --git a/src/main/java/com/ski/crawler/command/HelpCommand.java b/src/main/java/com/ski/crawler/command/HelpCommand.java
new file mode 100644
index 0000000..ebcae3f
--- /dev/null
+++ b/src/main/java/com/ski/crawler/command/HelpCommand.java
@@ -0,0 +1,38 @@
+package com.ski.crawler.command;
+
+import com.ski.crawler.controller.CrawlerContext;
+
+public class HelpCommand implements Command {
+ @Override
+ public String name() {
+ return "help";
+ }
+
+ @Override
+ public void execute(String[] args, CrawlerContext context) {
+ System.out.println("命令:");
+ System.out.println(" crawl --site --limit [--country <关键词>] [--out ] [--dry-run] [--no-proxy]");
+ System.out.println(" list [--country <关键词>]");
+ System.out.println(" export --out ");
+ System.out.println(" resume --in ");
+ System.out.println(" stats");
+ System.out.println(" sites");
+ System.out.println(" help");
+ System.out.println();
+ System.out.println("crawl 参数:");
+ System.out.println(" --threads 默认 3");
+ System.out.println(" --start-url 覆盖站点入口");
+ System.out.println(" --timeout 默认 20000");
+ System.out.println(" --ua 覆盖 UA");
+ System.out.println(" --proxy 代理配置");
+ System.out.println(" --proxy-host / --proxy-port ");
+ System.out.println(" --no-proxy 禁用代理");
+ System.out.println(" --width 表格宽度");
+ System.out.println(" --color 表头上色(可选)");
+ System.out.println(" --show-failures 结束时输出失败列表(可选)");
+ System.out.println(" --full 全量抓取(忽略去重,仍然不会往仓库写重复 URL)");
+ System.out.println(" --retry 默认 3");
+ System.out.println(" --retry-sleep 默认 1000");
+ System.out.println(" --dry-run 不写入仓库/不导出文件(仅展示)");
+ }
+}
diff --git a/src/main/java/com/ski/crawler/command/ListCommand.java b/src/main/java/com/ski/crawler/command/ListCommand.java
new file mode 100644
index 0000000..2981502
--- /dev/null
+++ b/src/main/java/com/ski/crawler/command/ListCommand.java
@@ -0,0 +1,53 @@
+package com.ski.crawler.command;
+
+import com.ski.crawler.controller.CrawlerContext;
+import com.ski.crawler.model.SkiResort;
+import com.ski.crawler.repository.SkiResortRepository;
+import com.ski.crawler.util.CliArgs;
+import com.ski.crawler.view.ConsoleView;
+
+import java.util.List;
+import java.util.Map;
+
+public class ListCommand implements Command {
+ @Override
+ public String name() {
+ return "list";
+ }
+
+ @Override
+ public void execute(String[] args, CrawlerContext context) {
+ Map opts = CliArgs.parseOptions(args, 1);
+ String country = opts.get("country");
+ boolean color = CliArgs.parseBoolean(opts.get("color"));
+ Integer widthArg = CliArgs.parseNullableInt(opts.get("width"));
+ int width = resolveWidth(widthArg);
+
+ SkiResortRepository repo = context.repository();
+ List list = (country == null || country.trim().isEmpty()) ? repo.getAll() : repo.filterByCountry(country);
+
+ ConsoleView view = new ConsoleView(width, color);
+ view.printHeader();
+ for (SkiResort r : list) {
+ view.printResort(r);
+ }
+ }
+
+ private int resolveWidth(Integer widthArg) {
+ if (widthArg != null && widthArg > 20) {
+ return widthArg;
+ }
+ String cols = System.getenv("COLUMNS");
+ if (cols != null) {
+ try {
+ int n = Integer.parseInt(cols.trim());
+ if (n > 20) {
+ return n;
+ }
+ } catch (Exception ignored) {
+ }
+ }
+ return 120;
+ }
+}
+
diff --git a/src/main/java/com/ski/crawler/command/ResumeCommand.java b/src/main/java/com/ski/crawler/command/ResumeCommand.java
new file mode 100644
index 0000000..319ce28
--- /dev/null
+++ b/src/main/java/com/ski/crawler/command/ResumeCommand.java
@@ -0,0 +1,140 @@
+package com.ski.crawler.command;
+
+import com.fasterxml.jackson.core.type.TypeReference;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.ski.crawler.controller.CrawlerContext;
+import com.ski.crawler.model.SkiResort;
+import com.ski.crawler.repository.SkiResortRepository;
+import com.ski.crawler.util.CliArgs;
+import com.ski.crawler.util.JsonUtil;
+
+import java.io.BufferedReader;
+import java.time.LocalDateTime;
+import java.util.List;
+import java.util.Map;
+
+public class ResumeCommand implements Command {
+ @Override
+ public String name() {
+ return "resume";
+ }
+
+ @Override
+ public void execute(String[] args, CrawlerContext context) throws Exception {
+ Map opts = CliArgs.parseOptions(args, 1);
+ String in = opts.get("in");
+ if (in == null || in.trim().isEmpty()) {
+ System.err.println("Missing --in ");
+ return;
+ }
+
+ SkiResortRepository repo = context.repository();
+ ObjectMapper mapper = JsonUtil.mapper();
+ int loaded = 0;
+ int skipped = 0;
+ try (BufferedReader br = JsonUtil.openJsonlReader(in.trim())) {
+ String line;
+ while ((line = br.readLine()) != null) {
+ String t = line.trim();
+ if (t.isEmpty()) {
+ continue;
+ }
+ Map obj = mapper.readValue(t, new TypeReference