From f6deacf725e2998d208759db1a2003bb7675dbac Mon Sep 17 00:00:00 2001
From: Songrui <1778280163@qq.com>
Date: Fri, 29 May 2026 17:00:57 +0800
Subject: [PATCH] =?UTF-8?q?=E5=AE=8B=E7=91=9E-202506050301?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
w12/java-cli/.gitignore | 4 +
w12/java-cli/.vscode/settings.json | 3 +
w12/java-cli/pom.xml | 67 ++++++++++
.../java/com/example/datacollect/Main.java | 25 ++++
.../datacollect/command/AnalyzeCommand.java | 75 +++++++++++
.../example/datacollect/command/Command.java | 8 ++
.../datacollect/command/CrawlCommand.java | 88 +++++++++++++
.../datacollect/command/ExitCommand.java | 28 +++++
.../datacollect/command/HelpCommand.java | 27 ++++
.../command/JsonExporterCommand.java | 84 +++++++++++++
.../command/JsonImporterCommand.java | 118 ++++++++++++++++++
.../datacollect/command/ListCommand.java | 27 ++++
.../controller/CrawlerController.java | 61 +++++++++
.../exception/CrawlerException.java | 11 ++
.../exception/NetworkException.java | 11 ++
.../datacollect/exception/ParseException.java | 11 ++
.../example/datacollect/model/Article.java | 65 ++++++++++
.../repository/ArticleRepository.java | 76 +++++++++++
.../datacollect/strategy/BlogStrategy.java | 28 +++++
.../datacollect/strategy/CrawlStrategy.java | 11 ++
.../datacollect/strategy/DefaultStrategy.java | 38 ++++++
.../datacollect/strategy/HnuNewsStrategy.java | 52 ++++++++
.../datacollect/strategy/NewsStrategy.java | 28 +++++
.../strategy/PriorityStrategy.java | 27 ++++
.../datacollect/strategy/StrategyFactory.java | 49 ++++++++
.../example/datacollect/view/ConsoleView.java | 47 +++++++
w12/java-cli/src/main/resources/logback.xml | 26 ++++
w12/java-cli/target/classes/logback.xml | 26 ++++
.../target/maven-archiver/pom.properties | 3 +
.../compile/default-compile/createdFiles.lst | 21 ++++
.../compile/default-compile/inputFiles.lst | 23 ++++
31 files changed, 1168 insertions(+)
create mode 100644 w12/java-cli/.gitignore
create mode 100644 w12/java-cli/.vscode/settings.json
create mode 100644 w12/java-cli/pom.xml
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/Main.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/command/Command.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/command/JsonExporterCommand.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/command/JsonImporterCommand.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/model/Article.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/strategy/DefaultStrategy.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/strategy/PriorityStrategy.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
create mode 100644 w12/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
create mode 100644 w12/java-cli/src/main/resources/logback.xml
create mode 100644 w12/java-cli/target/classes/logback.xml
create mode 100644 w12/java-cli/target/maven-archiver/pom.properties
create mode 100644 w12/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
create mode 100644 w12/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
diff --git a/w12/java-cli/.gitignore b/w12/java-cli/.gitignore
new file mode 100644
index 0000000..0ebcf1a
--- /dev/null
+++ b/w12/java-cli/.gitignore
@@ -0,0 +1,4 @@
+*.jar
+*.jar
+*.class
+*.log
\ No newline at end of file
diff --git a/w12/java-cli/.vscode/settings.json b/w12/java-cli/.vscode/settings.json
new file mode 100644
index 0000000..c5f3f6b
--- /dev/null
+++ b/w12/java-cli/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+ "java.configuration.updateBuildConfiguration": "interactive"
+}
\ No newline at end of file
diff --git a/w12/java-cli/pom.xml b/w12/java-cli/pom.xml
new file mode 100644
index 0000000..2b195c9
--- /dev/null
+++ b/w12/java-cli/pom.xml
@@ -0,0 +1,67 @@
+
+ 4.0.0
+ com.example
+ datacollect-cli
+ 0.1.0
+
+ 11
+ 11
+
+
+
+ org.jsoup
+ jsoup
+ 1.17.2
+
+
+ ch.qos.logback
+ logback-classic
+ 1.4.14
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.15.3
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-jsr310
+ 2.15.3
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.3.0
+
+
+
+ com.example.datacollect.Main
+
+
+
+ jar-with-dependencies
+
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/w12/java-cli/src/main/java/com/example/datacollect/Main.java b/w12/java-cli/src/main/java/com/example/datacollect/Main.java
new file mode 100644
index 0000000..ef65b3e
--- /dev/null
+++ b/w12/java-cli/src/main/java/com/example/datacollect/Main.java
@@ -0,0 +1,25 @@
+package com.example.datacollect;
+
+import com.example.datacollect.controller.CrawlerController;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class Main {
+ private static final Logger logger = LoggerFactory.getLogger(Main.class);
+
+ public static void main(String[] args) {
+ logger.info("Starting CLI Crawler application");
+ ConsoleView view = new ConsoleView();
+ ArticleRepository repository = new ArticleRepository();
+ StrategyFactory strategyFactory = new StrategyFactory();
+ CrawlerController controller = new CrawlerController(view, repository, strategyFactory);
+
+ view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands.");
+ while (true) {
+ controller.handle(view.readLine());
+ }
+ }
+}
diff --git a/w12/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java b/w12/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
new file mode 100644
index 0000000..d73f855
--- /dev/null
+++ b/w12/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
@@ -0,0 +1,75 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class AnalyzeCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class);
+
+ private final ConsoleView view;
+ private final StrategyFactory strategyFactory;
+
+ public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.strategyFactory = strategyFactory;
+ }
+
+ @Override
+ public String getName() {
+ return "analyze";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ logger.warn("Analyze command called without URL argument");
+ view.printError("Usage: analyze ");
+ return;
+ }
+ String url = args[1];
+ logger.info("Analyzing URL: {}", url);
+
+ CrawlStrategy strategy = strategyFactory.getStrategy(url);
+ logger.debug("Using strategy: {}", strategy.getClass().getSimpleName());
+
+ try {
+ view.printInfo("Analyzing: " + url);
+ Document doc = Jsoup.connect(url).get();
+ var articles = strategy.parse(url, doc);
+
+ int count = articles.size();
+ int totalTitleLength = 0;
+ int totalContentLength = 0;
+
+ for (var article : articles) {
+ if (article.getTitle() != null) {
+ totalTitleLength += article.getTitle().length();
+ }
+ if (article.getContent() != null) {
+ totalContentLength += article.getContent().length();
+ }
+ }
+
+ double avgTitleLength = count > 0 ? (double) totalTitleLength / count : 0;
+ double avgContentLength = count > 0 ? (double) totalContentLength / count : 0;
+
+ logger.info("Analysis complete - Articles: {}, Avg Title Length: {:.2f}, Avg Content Length: {:.2f}",
+ count, avgTitleLength, avgContentLength);
+
+ view.printSuccess("Analysis Results:");
+ view.printInfo(" Total Articles: " + count);
+ view.printInfo(" Average Title Length: " + String.format("%.2f", avgTitleLength));
+ view.printInfo(" Average Content Length: " + String.format("%.2f", avgContentLength));
+ view.printInfo(" Strategy Used: " + strategy.getClass().getSimpleName());
+ } catch (Exception e) {
+ logger.error("Failed to analyze URL {}: {}", url, e.getMessage(), e);
+ view.printError("Failed to analyze: " + e.getMessage());
+ }
+ }
+}
diff --git a/w12/java-cli/src/main/java/com/example/datacollect/command/Command.java b/w12/java-cli/src/main/java/com/example/datacollect/command/Command.java
new file mode 100644
index 0000000..029cadc
--- /dev/null
+++ b/w12/java-cli/src/main/java/com/example/datacollect/command/Command.java
@@ -0,0 +1,8 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+
+public interface Command {
+ String getName();
+ void execute(String[] args, ArticleRepository repository);
+}
diff --git a/w12/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java b/w12/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
new file mode 100644
index 0000000..13f5b3d
--- /dev/null
+++ b/w12/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
@@ -0,0 +1,88 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.io.IOException;
+
+public class CrawlCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
+ private static final int MAX_RETRIES = 3;
+ private static final long RETRY_DELAY_MS = 1000;
+
+ private final ConsoleView view;
+ private final StrategyFactory strategyFactory;
+
+ public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.strategyFactory = strategyFactory;
+ }
+
+ @Override
+ public String getName() {
+ return "crawl";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ logger.warn("Crawl command called without URL argument");
+ view.printError("Usage: crawl ");
+ return;
+ }
+ String url = args[1];
+ logger.info("Starting crawl for URL: {}", url);
+
+ CrawlStrategy strategy = strategyFactory.getStrategy(url);
+ logger.debug("Using strategy: {}", strategy.getClass().getSimpleName());
+
+ int retryCount = 0;
+ boolean success = false;
+
+ while (retryCount < MAX_RETRIES && !success) {
+ try {
+ view.printInfo("Crawling: " + url + (retryCount > 0 ? " (attempt " + (retryCount + 1) + ")" : ""));
+ logger.debug("Attempt {} to fetch URL: {}", retryCount + 1, url);
+ Document doc = Jsoup.connect(url).get();
+ var articles = strategy.parse(url, doc);
+ repository.addAll(articles);
+ logger.info("Successfully crawled {} articles from {}", articles.size(), url);
+ view.printSuccess("Crawled " + articles.size() + " articles.");
+ success = true;
+ } catch (IOException e) {
+ retryCount++;
+ logger.error("Network error on attempt {} for URL {}: {}", retryCount, url, e.getMessage());
+ if (retryCount < MAX_RETRIES) {
+ view.printWarning("Network error: " + e.getMessage() + ", retrying...");
+ sleep(RETRY_DELAY_MS);
+ } else {
+ logger.error("Failed to crawl URL {} after {} attempts", url, MAX_RETRIES);
+ view.printError("Failed to crawl after " + MAX_RETRIES + " attempts: " + e.getMessage());
+ }
+ } catch (ParseException e) {
+ logger.error("Parse error for URL {}: {}", url, e.getMessage());
+ view.printError("Parse error: " + e.getMessage());
+ break;
+ } catch (Exception e) {
+ logger.error("Unexpected error for URL {}: {}", url, e.getMessage(), e);
+ view.printError("Unexpected error: " + e.getMessage());
+ break;
+ }
+ }
+ }
+
+ private void sleep(long millis) {
+ try {
+ Thread.sleep(millis);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ logger.warn("Sleep interrupted");
+ }
+ }
+}
diff --git a/w12/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java b/w12/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
new file mode 100644
index 0000000..51ee001
--- /dev/null
+++ b/w12/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
@@ -0,0 +1,28 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ExitCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class);
+
+ private final ConsoleView view;
+
+ public ExitCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "exit";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("Exiting application");
+ view.printSuccess("Bye!");
+ System.exit(0);
+ }
+}
diff --git a/w12/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java b/w12/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
new file mode 100644
index 0000000..90793e3
--- /dev/null
+++ b/w12/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
@@ -0,0 +1,27 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HelpCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class);
+
+ private final ConsoleView view;
+
+ public HelpCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "help";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Displaying help information");
+ view.printInfo("Commands: crawl , analyze , list, export [--format json], import , help, exit");
+ }
+}
\ No newline at end of file
diff --git a/w12/java-cli/src/main/java/com/example/datacollect/command/JsonExporterCommand.java b/w12/java-cli/src/main/java/com/example/datacollect/command/JsonExporterCommand.java
new file mode 100644
index 0000000..5edacba
--- /dev/null
+++ b/w12/java-cli/src/main/java/com/example/datacollect/command/JsonExporterCommand.java
@@ -0,0 +1,84 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.FileWriter;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+public class JsonExporterCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(JsonExporterCommand.class);
+ private static final String DEFAULT_FILENAME = "articles.json";
+
+ private final ConsoleView view;
+
+ public JsonExporterCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "export";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ String filename = DEFAULT_FILENAME;
+ String format = null;
+
+ for (int i = 1; i < args.length; i++) {
+ if (args[i].equals("--format") && i + 1 < args.length) {
+ format = args[i + 1];
+ i++;
+ } else if (!args[i].startsWith("-")) {
+ filename = args[i];
+ }
+ }
+
+ if (format != null && !format.equals("json")) {
+ logger.warn("Unsupported export format: {}", format);
+ view.printError("Unsupported format: " + format + ". Only 'json' is supported.");
+ return;
+ }
+
+ List articles = repository.getAll();
+ if (articles.isEmpty()) {
+ logger.warn("Attempted to export empty repository");
+ view.printWarning("No articles to export.");
+ return;
+ }
+
+ logger.info("Exporting {} articles to JSON file: {}", articles.size(), filename);
+
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.registerModule(new JavaTimeModule());
+ mapper.enable(SerializationFeature.INDENT_OUTPUT);
+ mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);
+
+ Map exportData = new HashMap<>();
+ exportData.put("articles", articles);
+ exportData.put("count", articles.size());
+ exportData.put("exportedAt", java.time.LocalDateTime.now().toString());
+
+ Path path = Paths.get(filename);
+ try (FileWriter writer = new FileWriter(path.toFile())) {
+ mapper.writeValue(writer, exportData);
+ logger.info("Successfully exported articles to {}", path.toAbsolutePath());
+ view.printSuccess("Exported " + articles.size() + " articles to " + filename);
+ } catch (IOException e) {
+ logger.error("Failed to export articles to {}: {}", filename, e.getMessage());
+ view.printError("Failed to export: " + e.getMessage());
+ }
+ }
+}
\ No newline at end of file
diff --git a/w12/java-cli/src/main/java/com/example/datacollect/command/JsonImporterCommand.java b/w12/java-cli/src/main/java/com/example/datacollect/command/JsonImporterCommand.java
new file mode 100644
index 0000000..514f15a
--- /dev/null
+++ b/w12/java-cli/src/main/java/com/example/datacollect/command/JsonImporterCommand.java
@@ -0,0 +1,118 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.Set;
+
+public class JsonImporterCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(JsonImporterCommand.class);
+
+ private final ConsoleView view;
+
+ public JsonImporterCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "import";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ logger.warn("Import command called without filename argument");
+ view.printError("Usage: import ");
+ return;
+ }
+
+ String filename = args[1];
+ Path path = Paths.get(filename);
+
+ if (!Files.exists(path)) {
+ logger.error("Import file does not exist: {}", filename);
+ view.printError("File not found: " + filename);
+ return;
+ }
+
+ ObjectMapper mapper = new ObjectMapper();
+ mapper.registerModule(new JavaTimeModule());
+ mapper.disable(SerializationFeature.WRITE_DATES_AS_TIMESTAMPS);
+
+ try {
+ String content = Files.readString(path);
+ Map data = mapper.readValue(content, Map.class);
+
+ List