From 4b8cda09c81642a626cbd5dd7288509cd423f71f Mon Sep 17 00:00:00 2001
From: XuJiexian <3445002374@qq.com>
Date: Sat, 30 May 2026 21:27:35 +0800
Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E4=BA=A4=20w11=20=E4=BD=9C=E4=B8=9A?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
---
w11/java-cli/.gitignore | 4 +
w11/java-cli/README.md | 17 +++
w11/java-cli/pom.xml | 62 ++++++++++
.../java/com/example/datacollect/Main.java | 25 ++++
.../example/datacollect/command/Command.java | 8 ++
.../datacollect/command/CrawlCommand.java | 109 ++++++++++++++++++
.../datacollect/command/ExitCommand.java | 27 +++++
.../datacollect/command/HelpCommand.java | 26 +++++
.../datacollect/command/HistoryCommand.java | 60 ++++++++++
.../datacollect/command/ListCommand.java | 26 +++++
.../controller/CrawlerController.java | 59 ++++++++++
.../exception/CrawlerException.java | 11 ++
.../exception/NetworkException.java | 11 ++
.../datacollect/exception/ParseException.java | 11 ++
.../example/datacollect/model/Article.java | 75 ++++++++++++
.../repository/ArticleRepository.java | 44 +++++++
.../datacollect/strategy/BlogStrategy.java | 40 +++++++
.../datacollect/strategy/CrawlStrategy.java | 11 ++
.../datacollect/strategy/HnuNewsStrategy.java | 50 ++++++++
.../datacollect/strategy/NewsStrategy.java | 26 +++++
.../datacollect/strategy/StrategyFactory.java | 34 ++++++
.../example/datacollect/view/ConsoleView.java | 45 ++++++++
w11/java-cli/src/main/resources/logback.xml | 26 +++++
w11/logs/crawler.log | 3 +
24 files changed, 810 insertions(+)
create mode 100644 w11/java-cli/.gitignore
create mode 100644 w11/java-cli/README.md
create mode 100644 w11/java-cli/pom.xml
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/Main.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/command/Command.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/model/Article.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
create mode 100644 w11/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
create mode 100644 w11/java-cli/src/main/resources/logback.xml
create mode 100644 w11/logs/crawler.log
diff --git a/w11/java-cli/.gitignore b/w11/java-cli/.gitignore
new file mode 100644
index 0000000..0ebcf1a
--- /dev/null
+++ b/w11/java-cli/.gitignore
@@ -0,0 +1,4 @@
+*.jar
+*.jar
+*.class
+*.log
\ No newline at end of file
diff --git a/w11/java-cli/README.md b/w11/java-cli/README.md
new file mode 100644
index 0000000..3ea02ec
--- /dev/null
+++ b/w11/java-cli/README.md
@@ -0,0 +1,17 @@
+# DataCollect 教学项目 — 最小可运行版本
+
+这是一个最小可用的 Java CLI 演示工程,目标:打印帮助信息以验证运行环境。
+
+构建:
+```bash
+mvn -q package
+```
+
+运行(示例):
+```bash
+java -jar target/datacollect-cli-0.1.0-jar-with-dependencies.jar --help
+```
+
+项目结构(最小):
+- `src/main/java/com/example/datacollect/Main.java` — CLI 入口,打印帮助
+- `pom.xml` — Maven 构建配置,生成可执行 jar
diff --git a/w11/java-cli/pom.xml b/w11/java-cli/pom.xml
new file mode 100644
index 0000000..a738b40
--- /dev/null
+++ b/w11/java-cli/pom.xml
@@ -0,0 +1,62 @@
+
+ 4.0.0
+ com.example
+ datacollect-cli
+ 0.1.0
+
+ 11
+ 11
+
+
+
+ org.jsoup
+ jsoup
+ 1.14.3
+
+
+ ch.qos.logback
+ logback-classic
+ 1.2.11
+
+
+ org.slf4j
+ slf4j-api
+ 1.7.36
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.3.0
+
+
+
+ com.example.datacollect.Main
+
+
+
+ jar-with-dependencies
+
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/Main.java b/w11/java-cli/src/main/java/com/example/datacollect/Main.java
new file mode 100644
index 0000000..7839ef5
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/Main.java
@@ -0,0 +1,25 @@
+package com.example.datacollect;
+
+import com.example.datacollect.controller.CrawlerController;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class Main {
+ private static final Logger logger = LoggerFactory.getLogger(Main.class);
+
+ public static void main(String[] args) {
+ logger.info("Starting CLI Crawler W11");
+ ConsoleView view = new ConsoleView();
+ ArticleRepository repository = new ArticleRepository();
+ StrategyFactory strategyFactory = new StrategyFactory();
+ CrawlerController controller = new CrawlerController(view, repository, strategyFactory);
+
+ view.printSuccess("Welcome to CLI Crawler (W11)! Type help for commands.");
+ while (true) {
+ controller.handle(view.readLine());
+ }
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/command/Command.java b/w11/java-cli/src/main/java/com/example/datacollect/command/Command.java
new file mode 100644
index 0000000..029cadc
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/command/Command.java
@@ -0,0 +1,8 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+
+public interface Command {
+ String getName();
+ void execute(String[] args, ArticleRepository repository);
+}
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java b/w11/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
new file mode 100644
index 0000000..8a541c7
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
@@ -0,0 +1,109 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.exception.CrawlerException;
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.List;
+
+public class CrawlCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
+ private static final int MAX_RETRIES = 3;
+ private static final int RETRY_DELAY_MS = 1000;
+
+ private final ConsoleView view;
+ private final StrategyFactory strategyFactory;
+
+ public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.strategyFactory = strategyFactory;
+ }
+
+ @Override
+ public String getName() {
+ return "crawl";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ view.printError("Usage: crawl ");
+ return;
+ }
+
+ String url = args[1];
+ CrawlStrategy strategy = strategyFactory.getStrategy(url);
+
+ if (strategy == null) {
+ view.printError("No strategy found for URL: " + url);
+ return;
+ }
+
+ int attempts = 0;
+ Exception lastException = null;
+
+ while (attempts < MAX_RETRIES) {
+ attempts++;
+ try {
+ Document doc = fetchWithRetry(url, attempts);
+ List articles = strategy.parse(url, doc);
+
+ for (Article article : articles) {
+ repository.add(article);
+ }
+
+ logger.info("Successfully crawled {} - {} article(s)", url, articles.size());
+ view.printSuccess("Crawled " + articles.size() + " article(s) from " + url);
+ return;
+ } catch (NetworkException e) {
+ lastException = e;
+ logger.warn("Network error fetching {} (attempt {}/{}): {}",
+ url, attempts, MAX_RETRIES, e.getMessage());
+ if (attempts < MAX_RETRIES) {
+ try {
+ Thread.sleep(RETRY_DELAY_MS * attempts);
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ break;
+ }
+ }
+ } catch (ParseException e) {
+ lastException = e;
+ logger.error("Parse error for {} (attempt {}/{}): {}",
+ url, attempts, MAX_RETRIES, e.getMessage());
+ break;
+ } catch (CrawlerException e) {
+ lastException = e;
+ logger.error("Crawler error for {}: {}", url, e.getMessage());
+ break;
+ } catch (Exception e) {
+ lastException = e;
+ logger.error("Unexpected error fetching {}: {}", url, e.getMessage());
+ break;
+ }
+ }
+
+ logger.error("Failed to crawl {} after {} attempts", url, attempts);
+ view.printError("Failed to crawl: " + (lastException != null ? lastException.getMessage() : "Unknown error"));
+ }
+
+ private Document fetchWithRetry(String url, int attempt) throws NetworkException {
+ try {
+ logger.debug("Fetching {} (attempt {})", url, attempt);
+ return Jsoup.connect(url)
+ .userAgent("Mozilla/5.0")
+ .timeout(5000)
+ .get();
+ } catch (Exception e) {
+ throw new NetworkException("Failed to fetch " + url, e);
+ }
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java b/w11/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
new file mode 100644
index 0000000..fc1ccdb
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
@@ -0,0 +1,27 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ExitCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class);
+ private final ConsoleView view;
+
+ public ExitCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "exit";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("User exiting application");
+ view.printSuccess("Bye!");
+ System.exit(0);
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java b/w11/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
new file mode 100644
index 0000000..eae0377
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
@@ -0,0 +1,26 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HelpCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class);
+ private final ConsoleView view;
+
+ public HelpCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "help";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Showing help");
+ view.printInfo("Commands: crawl , list, help, history, exit");
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java b/w11/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java
new file mode 100644
index 0000000..82a4535
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java
@@ -0,0 +1,60 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+
+public class HistoryCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(HistoryCommand.class);
+ private final ConsoleView view;
+ private final List commandHistory;
+
+ public HistoryCommand(ConsoleView view) {
+ this.view = view;
+ this.commandHistory = new ArrayList<>();
+ }
+
+ public void addCommand(String command) {
+ commandHistory.add(command);
+ }
+
+ public List getAllHistory() {
+ return new ArrayList<>(commandHistory);
+ }
+
+ public String getCommand(int index) {
+ if (index < 0 || index >= commandHistory.size()) {
+ return null;
+ }
+ return commandHistory.get(index);
+ }
+
+ public void clearHistory() {
+ commandHistory.clear();
+ }
+
+ public int getHistorySize() {
+ return commandHistory.size();
+ }
+
+ @Override
+ public String getName() {
+ return "history";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (commandHistory.isEmpty()) {
+ view.printInfo("No command history.");
+ return;
+ }
+
+ view.printInfo("Command History:");
+ for (int i = 0; i < commandHistory.size(); i++) {
+ view.printInfo((i + 1) + ". " + commandHistory.get(i));
+ }
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java b/w11/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
new file mode 100644
index 0000000..9d7c650
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
@@ -0,0 +1,26 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ListCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ListCommand.class);
+ private final ConsoleView view;
+
+ public ListCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "list";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Listing articles");
+ view.display(repository.getAll());
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java b/w11/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
new file mode 100644
index 0000000..6a83224
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
@@ -0,0 +1,59 @@
+package com.example.datacollect.controller;
+
+import com.example.datacollect.command.Command;
+import com.example.datacollect.command.CrawlCommand;
+import com.example.datacollect.command.ExitCommand;
+import com.example.datacollect.command.HelpCommand;
+import com.example.datacollect.command.HistoryCommand;
+import com.example.datacollect.command.ListCommand;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.HashMap;
+import java.util.Map;
+
+public class CrawlerController {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class);
+ private final Map commands = new HashMap<>();
+ private final ConsoleView view;
+ private final ArticleRepository repository;
+ private HistoryCommand historyCommand;
+
+ public CrawlerController(ConsoleView view, ArticleRepository repository, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.repository = repository;
+ register(new HelpCommand(view));
+ register(new ListCommand(view));
+ register(new CrawlCommand(view, strategyFactory));
+ register(new ExitCommand(view));
+ historyCommand = new HistoryCommand(view);
+ register(historyCommand);
+ logger.info("CrawlerController initialized");
+ }
+
+ private void register(Command command) {
+ commands.put(command.getName(), command);
+ }
+
+ public void handle(String input) {
+ String text = input == null ? "" : input.trim();
+ if (text.isEmpty()) {
+ return;
+ }
+
+ historyCommand.addCommand(text);
+
+ String[] args = text.split("\\s+");
+ String cmdName = args[0].toLowerCase();
+ Command command = commands.get(cmdName);
+ if (command == null) {
+ logger.warn("Unknown command: {}", cmdName);
+ view.printError("Unknown command: " + cmdName);
+ return;
+ }
+ logger.debug("Executing command: {}", cmdName);
+ command.execute(args, repository);
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java b/w11/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java
new file mode 100644
index 0000000..e81c3c9
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.exception;
+
+public class CrawlerException extends Exception {
+ public CrawlerException(String message) {
+ super(message);
+ }
+
+ public CrawlerException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java b/w11/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java
new file mode 100644
index 0000000..0fb8e5e
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.exception;
+
+public class NetworkException extends CrawlerException {
+ public NetworkException(String message) {
+ super(message);
+ }
+
+ public NetworkException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java b/w11/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java
new file mode 100644
index 0000000..205665a
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.exception;
+
+public class ParseException extends CrawlerException {
+ public ParseException(String message) {
+ super(message);
+ }
+
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/model/Article.java b/w11/java-cli/src/main/java/com/example/datacollect/model/Article.java
new file mode 100644
index 0000000..f3b0ca8
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/model/Article.java
@@ -0,0 +1,75 @@
+package com.example.datacollect.model;
+
+import java.time.LocalDate;
+
+public class Article {
+ private String title;
+ private String url;
+ private String content;
+ private String author;
+ private LocalDate publishDate;
+
+ public Article(String title, String url, String content) {
+ this.title = title;
+ this.url = url;
+ this.content = content;
+ }
+
+ public Article(String title, String url, String content, String author, LocalDate publishDate) {
+ this.title = title;
+ this.url = url;
+ this.content = content;
+ this.author = author;
+ this.publishDate = publishDate;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ public void setContent(String content) {
+ this.content = content;
+ }
+
+ public String getAuthor() {
+ return author;
+ }
+
+ public void setAuthor(String author) {
+ this.author = author;
+ }
+
+ public LocalDate getPublishDate() {
+ return publishDate;
+ }
+
+ public void setPublishDate(LocalDate publishDate) {
+ this.publishDate = publishDate;
+ }
+
+ @Override
+ public String toString() {
+ return "Article{"
+ + "title='" + title + '\''
+ + ", url='" + url + '\''
+ + ", author='" + author + '\''
+ + ", publishDate=" + publishDate
+ + '}';
+ }
+}
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/w11/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
new file mode 100644
index 0000000..42be8b2
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
@@ -0,0 +1,44 @@
+package com.example.datacollect.repository;
+
+import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class ArticleRepository {
+ private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
+ private static final int MAX_CAPACITY = 10000;
+ private final List articles = new ArrayList<>();
+
+ public void add(Article article) {
+ if (article == null) {
+ throw new IllegalArgumentException("Article cannot be null");
+ }
+ if (article.getTitle() == null || article.getTitle().trim().isEmpty()) {
+ throw new IllegalArgumentException("Article title cannot be null or empty");
+ }
+ if (article.getUrl() == null || article.getUrl().trim().isEmpty()) {
+ throw new IllegalArgumentException("Article URL cannot be null or empty");
+ }
+ if (articles.size() >= MAX_CAPACITY) {
+ throw new IllegalStateException("Repository capacity exceeded: " + MAX_CAPACITY);
+ }
+ articles.add(article);
+ logger.debug("Added article: {}", article.getTitle());
+ }
+
+ public List getAll() {
+ return Collections.unmodifiableList(articles);
+ }
+
+ public int size() {
+ return articles.size();
+ }
+
+ public void clear() {
+ articles.clear();
+ logger.debug("Cleared all articles");
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/w11/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
new file mode 100644
index 0000000..238d407
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
@@ -0,0 +1,40 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.util.ArrayList;
+import java.util.List;
+
+public class BlogStrategy implements CrawlStrategy {
+ @Override
+ public boolean supports(String url) {
+ return url.contains("blog.example.com");
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ List articles = new ArrayList<>();
+ Elements postItems = doc.select(".post-item");
+
+ for (Element item : postItems) {
+ Element titleEl = item.selectFirst(".post-title");
+ Element linkEl = item.selectFirst("a");
+ Element contentEl = item.selectFirst(".post-excerpt");
+
+ if (titleEl == null) continue;
+
+ String title = titleEl.text().trim();
+ String articleUrl = linkEl != null ? linkEl.attr("href") : url;
+ String content = contentEl != null ? contentEl.text().trim() : "";
+
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, articleUrl, content));
+ }
+ }
+
+ return articles;
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/w11/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
new file mode 100644
index 0000000..3758b21
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import java.util.List;
+
+public interface CrawlStrategy {
+ List parse(String url, Document doc) throws ParseException;
+ boolean supports(String url);
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java b/w11/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
new file mode 100644
index 0000000..c32fac9
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
@@ -0,0 +1,50 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.util.ArrayList;
+import java.util.List;
+
+public class HnuNewsStrategy implements CrawlStrategy {
+ @Override
+ public boolean supports(String url) {
+ return url.contains("news.hnu.edu.cn");
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ List articles = new ArrayList<>();
+ Elements listItems = doc.select("ul.list11 li");
+
+ for (Element li : listItems) {
+ Element link = li.selectFirst("a");
+ if (link == null) continue;
+
+ String articleUrl = link.attr("href");
+ if (!articleUrl.startsWith("http")) {
+ articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", "");
+ }
+
+ String title = "";
+ Element titleEl = link.selectFirst("h4.l2.h4s2");
+ if (titleEl != null) {
+ title = titleEl.text().trim();
+ }
+
+ String content = "";
+ Element contentEl = link.selectFirst("p.l3.ps3");
+ if (contentEl != null) {
+ content = contentEl.text().trim();
+ }
+
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, articleUrl, content));
+ }
+ }
+
+ return articles;
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java b/w11/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
new file mode 100644
index 0000000..4abe877
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
@@ -0,0 +1,26 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.util.ArrayList;
+import java.util.List;
+
+public class NewsStrategy implements CrawlStrategy {
+ @Override
+ public boolean supports(String url) {
+ return url.contains("news.example.com");
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ List articles = new ArrayList<>();
+ Elements items = doc.select(".article-headline");
+ for (Element e : items) {
+ articles.add(new Article(e.text(), url, ""));
+ }
+ return articles;
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/w11/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
new file mode 100644
index 0000000..22bbc21
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
@@ -0,0 +1,34 @@
+package com.example.datacollect.strategy;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.List;
+
+public class StrategyFactory {
+ private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class);
+ private final List strategies = new ArrayList<>();
+
+ public StrategyFactory() {
+ strategies.add(new HnuNewsStrategy());
+ strategies.add(new BlogStrategy());
+ strategies.add(new NewsStrategy());
+ logger.info("StrategyFactory initialized with {} strategies", strategies.size());
+ }
+
+ public CrawlStrategy getStrategy(String url) {
+ for (CrawlStrategy s : strategies) {
+ if (s.supports(url)) {
+ logger.debug("Found strategy {} for URL: {}", s.getClass().getSimpleName(), url);
+ return s;
+ }
+ }
+ logger.warn("No strategy found for URL: {}", url);
+ return null;
+ }
+
+ public void register(CrawlStrategy strategy) {
+ strategies.add(strategy);
+ logger.info("Registered new strategy: {}", strategy.getClass().getSimpleName());
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java b/w11/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
new file mode 100644
index 0000000..22b4f3a
--- /dev/null
+++ b/w11/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
@@ -0,0 +1,45 @@
+package com.example.datacollect.view;
+
+import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.List;
+import java.util.Scanner;
+
+public class ConsoleView {
+ private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class);
+ private static final String ANSI_RESET = "\u001B[0m";
+ private static final String ANSI_GREEN = "\u001B[32m";
+ private static final String ANSI_RED = "\u001B[31m";
+ private static final String ANSI_BLUE = "\u001B[34m";
+
+ private final Scanner scanner = new Scanner(System.in);
+
+ public String readLine() {
+ System.out.print("> ");
+ return scanner.nextLine();
+ }
+
+ public void printSuccess(String msg) {
+ System.out.println(ANSI_GREEN + msg + ANSI_RESET);
+ }
+
+ public void printError(String msg) {
+ System.out.println(ANSI_RED + msg + ANSI_RESET);
+ }
+
+ public void printInfo(String msg) {
+ System.out.println(ANSI_BLUE + msg + ANSI_RESET);
+ }
+
+ public void display(List articles) {
+ if (articles.isEmpty()) {
+ printInfo("暂无文章,请先执行 crawl。");
+ return;
+ }
+ for (int i = 0; i < articles.size(); i++) {
+ Article a = articles.get(i);
+ System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());
+ }
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli/src/main/resources/logback.xml b/w11/java-cli/src/main/resources/logback.xml
new file mode 100644
index 0000000..893d39f
--- /dev/null
+++ b/w11/java-cli/src/main/resources/logback.xml
@@ -0,0 +1,26 @@
+
+
+
+
+ %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/crawler.log
+
+ logs/crawler-%d{yyyy-MM-dd}.log
+ 7
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/w11/logs/crawler.log b/w11/logs/crawler.log
new file mode 100644
index 0000000..20c4eca
--- /dev/null
+++ b/w11/logs/crawler.log
@@ -0,0 +1,3 @@
+2026-05-30 21:21:52.964 [main] INFO com.example.datacollect.Main - Starting CLI Crawler W11
+2026-05-30 21:21:53.090 [main] INFO c.e.d.strategy.StrategyFactory - StrategyFactory initialized with 3 strategies
+2026-05-30 21:21:53.245 [main] INFO c.e.d.controller.CrawlerController - CrawlerController initialized