diff --git a/w11/java-cli - 副本/.gitignore b/w11/java-cli - 副本/.gitignore
new file mode 100644
index 0000000..0ebcf1a
--- /dev/null
+++ b/w11/java-cli - 副本/.gitignore
@@ -0,0 +1,4 @@
+*.jar
+*.jar
+*.class
+*.log
\ No newline at end of file
diff --git a/w11/java-cli - 副本/.vscode/settings.json b/w11/java-cli - 副本/.vscode/settings.json
new file mode 100644
index 0000000..c5f3f6b
--- /dev/null
+++ b/w11/java-cli - 副本/.vscode/settings.json
@@ -0,0 +1,3 @@
+{
+ "java.configuration.updateBuildConfiguration": "interactive"
+}
\ No newline at end of file
diff --git a/w11/java-cli - 副本/pom.xml b/w11/java-cli - 副本/pom.xml
new file mode 100644
index 0000000..c5ae7b7
--- /dev/null
+++ b/w11/java-cli - 副本/pom.xml
@@ -0,0 +1,57 @@
+
+ 4.0.0
+ com.example
+ datacollect-cli
+ 0.1.0
+
+ 11
+ 11
+
+
+
+ org.jsoup
+ jsoup
+ 1.17.2
+
+
+ ch.qos.logback
+ logback-classic
+ 1.4.14
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.3.0
+
+
+
+ com.example.datacollect.Main
+
+
+
+ jar-with-dependencies
+
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/Main.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/Main.java
new file mode 100644
index 0000000..ef65b3e
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/Main.java
@@ -0,0 +1,25 @@
+package com.example.datacollect;
+
+import com.example.datacollect.controller.CrawlerController;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class Main {
+ private static final Logger logger = LoggerFactory.getLogger(Main.class);
+
+ public static void main(String[] args) {
+ logger.info("Starting CLI Crawler application");
+ ConsoleView view = new ConsoleView();
+ ArticleRepository repository = new ArticleRepository();
+ StrategyFactory strategyFactory = new StrategyFactory();
+ CrawlerController controller = new CrawlerController(view, repository, strategyFactory);
+
+ view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands.");
+ while (true) {
+ controller.handle(view.readLine());
+ }
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/AnalyzeCommand.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
new file mode 100644
index 0000000..d73f855
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
@@ -0,0 +1,75 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class AnalyzeCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class);
+
+ private final ConsoleView view;
+ private final StrategyFactory strategyFactory;
+
+ public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.strategyFactory = strategyFactory;
+ }
+
+ @Override
+ public String getName() {
+ return "analyze";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ logger.warn("Analyze command called without URL argument");
+ view.printError("Usage: analyze ");
+ return;
+ }
+ String url = args[1];
+ logger.info("Analyzing URL: {}", url);
+
+ CrawlStrategy strategy = strategyFactory.getStrategy(url);
+ logger.debug("Using strategy: {}", strategy.getClass().getSimpleName());
+
+ try {
+ view.printInfo("Analyzing: " + url);
+ Document doc = Jsoup.connect(url).get();
+ var articles = strategy.parse(url, doc);
+
+ int count = articles.size();
+ int totalTitleLength = 0;
+ int totalContentLength = 0;
+
+ for (var article : articles) {
+ if (article.getTitle() != null) {
+ totalTitleLength += article.getTitle().length();
+ }
+ if (article.getContent() != null) {
+ totalContentLength += article.getContent().length();
+ }
+ }
+
+ double avgTitleLength = count > 0 ? (double) totalTitleLength / count : 0;
+ double avgContentLength = count > 0 ? (double) totalContentLength / count : 0;
+
+ logger.info("Analysis complete - Articles: {}, Avg Title Length: {:.2f}, Avg Content Length: {:.2f}",
+ count, avgTitleLength, avgContentLength);
+
+ view.printSuccess("Analysis Results:");
+ view.printInfo(" Total Articles: " + count);
+ view.printInfo(" Average Title Length: " + String.format("%.2f", avgTitleLength));
+ view.printInfo(" Average Content Length: " + String.format("%.2f", avgContentLength));
+ view.printInfo(" Strategy Used: " + strategy.getClass().getSimpleName());
+ } catch (Exception e) {
+ logger.error("Failed to analyze URL {}: {}", url, e.getMessage(), e);
+ view.printError("Failed to analyze: " + e.getMessage());
+ }
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/Command.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/Command.java
new file mode 100644
index 0000000..029cadc
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/Command.java
@@ -0,0 +1,8 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+
+public interface Command {
+ String getName();
+ void execute(String[] args, ArticleRepository repository);
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/CrawlCommand.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/CrawlCommand.java
new file mode 100644
index 0000000..13f5b3d
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/CrawlCommand.java
@@ -0,0 +1,88 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.io.IOException;
+
+public class CrawlCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
+ private static final int MAX_RETRIES = 3;
+ private static final long RETRY_DELAY_MS = 1000;
+
+ private final ConsoleView view;
+ private final StrategyFactory strategyFactory;
+
+ public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.strategyFactory = strategyFactory;
+ }
+
+ @Override
+ public String getName() {
+ return "crawl";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ logger.warn("Crawl command called without URL argument");
+ view.printError("Usage: crawl ");
+ return;
+ }
+ String url = args[1];
+ logger.info("Starting crawl for URL: {}", url);
+
+ CrawlStrategy strategy = strategyFactory.getStrategy(url);
+ logger.debug("Using strategy: {}", strategy.getClass().getSimpleName());
+
+ int retryCount = 0;
+ boolean success = false;
+
+ while (retryCount < MAX_RETRIES && !success) {
+ try {
+ view.printInfo("Crawling: " + url + (retryCount > 0 ? " (attempt " + (retryCount + 1) + ")" : ""));
+ logger.debug("Attempt {} to fetch URL: {}", retryCount + 1, url);
+ Document doc = Jsoup.connect(url).get();
+ var articles = strategy.parse(url, doc);
+ repository.addAll(articles);
+ logger.info("Successfully crawled {} articles from {}", articles.size(), url);
+ view.printSuccess("Crawled " + articles.size() + " articles.");
+ success = true;
+ } catch (IOException e) {
+ retryCount++;
+ logger.error("Network error on attempt {} for URL {}: {}", retryCount, url, e.getMessage());
+ if (retryCount < MAX_RETRIES) {
+ view.printWarning("Network error: " + e.getMessage() + ", retrying...");
+ sleep(RETRY_DELAY_MS);
+ } else {
+ logger.error("Failed to crawl URL {} after {} attempts", url, MAX_RETRIES);
+ view.printError("Failed to crawl after " + MAX_RETRIES + " attempts: " + e.getMessage());
+ }
+ } catch (ParseException e) {
+ logger.error("Parse error for URL {}: {}", url, e.getMessage());
+ view.printError("Parse error: " + e.getMessage());
+ break;
+ } catch (Exception e) {
+ logger.error("Unexpected error for URL {}: {}", url, e.getMessage(), e);
+ view.printError("Unexpected error: " + e.getMessage());
+ break;
+ }
+ }
+ }
+
+ private void sleep(long millis) {
+ try {
+ Thread.sleep(millis);
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ logger.warn("Sleep interrupted");
+ }
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/ExitCommand.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/ExitCommand.java
new file mode 100644
index 0000000..51ee001
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/ExitCommand.java
@@ -0,0 +1,28 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ExitCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class);
+
+ private final ConsoleView view;
+
+ public ExitCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "exit";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("Exiting application");
+ view.printSuccess("Bye!");
+ System.exit(0);
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/HelpCommand.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/HelpCommand.java
new file mode 100644
index 0000000..ee03cfb
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/HelpCommand.java
@@ -0,0 +1,27 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HelpCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class);
+
+ private final ConsoleView view;
+
+ public HelpCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "help";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Displaying help information");
+ view.printInfo("Commands: crawl , analyze , list, help, exit");
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/ListCommand.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/ListCommand.java
new file mode 100644
index 0000000..ea383a8
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/command/ListCommand.java
@@ -0,0 +1,27 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ListCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ListCommand.class);
+
+ private final ConsoleView view;
+
+ public ListCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "list";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Listing {} articles", repository.size());
+ view.display(repository.getAll());
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/controller/CrawlerController.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/controller/CrawlerController.java
new file mode 100644
index 0000000..0ea7179
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/controller/CrawlerController.java
@@ -0,0 +1,57 @@
+package com.example.datacollect.controller;
+
+import com.example.datacollect.command.AnalyzeCommand;
+import com.example.datacollect.command.Command;
+import com.example.datacollect.command.CrawlCommand;
+import com.example.datacollect.command.ExitCommand;
+import com.example.datacollect.command.HelpCommand;
+import com.example.datacollect.command.ListCommand;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.HashMap;
+import java.util.Map;
+
+public class CrawlerController {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class);
+
+ private final Map commands = new HashMap<>();
+ private final ConsoleView view;
+ private final ArticleRepository repository;
+
+ public CrawlerController(ConsoleView view, ArticleRepository repository, StrategyFactory strategyFactory) {
+ this.view = view;
+ this.repository = repository;
+ register(new HelpCommand(view));
+ register(new ListCommand(view));
+ register(new CrawlCommand(view, strategyFactory));
+ register(new AnalyzeCommand(view, strategyFactory));
+ register(new ExitCommand(view));
+ logger.info("CrawlerController initialized with {} commands", commands.size());
+ }
+
+ private void register(Command command) {
+ commands.put(command.getName(), command);
+ logger.debug("Registered command: {}", command.getName());
+ }
+
+ public void handle(String input) {
+ String text = input == null ? "" : input.trim();
+ if (text.isEmpty()) {
+ return;
+ }
+
+ String[] args = text.split("\\s+");
+ String cmdName = args[0].toLowerCase();
+ Command command = commands.get(cmdName);
+ if (command == null) {
+ logger.warn("Unknown command: {}", cmdName);
+ view.printError("Unknown command: " + cmdName);
+ return;
+ }
+ logger.info("Executing command: {}", cmdName);
+ command.execute(args, repository);
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/exception/CrawlerException.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/exception/CrawlerException.java
new file mode 100644
index 0000000..e81c3c9
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/exception/CrawlerException.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.exception;
+
+public class CrawlerException extends Exception {
+ public CrawlerException(String message) {
+ super(message);
+ }
+
+ public CrawlerException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/exception/NetworkException.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/exception/NetworkException.java
new file mode 100644
index 0000000..0fb8e5e
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/exception/NetworkException.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.exception;
+
+public class NetworkException extends CrawlerException {
+ public NetworkException(String message) {
+ super(message);
+ }
+
+ public NetworkException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/exception/ParseException.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/exception/ParseException.java
new file mode 100644
index 0000000..205665a
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/exception/ParseException.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.exception;
+
+public class ParseException extends CrawlerException {
+ public ParseException(String message) {
+ super(message);
+ }
+
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/model/Article.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/model/Article.java
new file mode 100644
index 0000000..147dbe6
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/model/Article.java
@@ -0,0 +1,45 @@
+package com.example.datacollect.model;
+
+public class Article {
+ private String title;
+ private String url;
+ private String content;
+
+ public Article(String title, String url, String content) {
+ this.title = title;
+ this.url = url;
+ this.content = content;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ public void setContent(String content) {
+ this.content = content;
+ }
+
+ @Override
+ public String toString() {
+ return "Article{"
+ + "title='" + title + '\''
+ + ", url='" + url + '\''
+ + '}';
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/repository/ArticleRepository.java
new file mode 100644
index 0000000..ad717cc
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/repository/ArticleRepository.java
@@ -0,0 +1,76 @@
+package com.example.datacollect.repository;
+
+import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class ArticleRepository {
+ private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
+
+ private final List articles = new ArrayList<>();
+
+ public void add(Article article) {
+ if (article == null) {
+ logger.error("Attempted to add null article");
+ throw new IllegalArgumentException("Article cannot be null");
+ }
+ if (article.getTitle() == null || article.getTitle().trim().isEmpty()) {
+ logger.warn("Attempted to add article with empty title");
+ throw new IllegalArgumentException("Article title cannot be null or empty");
+ }
+ if (article.getUrl() == null || article.getUrl().trim().isEmpty()) {
+ logger.warn("Attempted to add article with empty URL");
+ throw new IllegalArgumentException("Article URL cannot be null or empty");
+ }
+ articles.add(article);
+ logger.debug("Added article: {}", article.getTitle());
+ }
+
+ public void addAll(List articleList) {
+ if (articleList == null) {
+ logger.error("Attempted to add null article list");
+ throw new IllegalArgumentException("Article list cannot be null");
+ }
+ if (articleList.isEmpty()) {
+ logger.debug("Attempted to add empty article list");
+ return;
+ }
+
+ for (int i = 0; i < articleList.size(); i++) {
+ Article article = articleList.get(i);
+ if (article == null) {
+ logger.warn("Skipping null article at index {}", i);
+ throw new IllegalArgumentException("Article in list cannot be null at index " + i);
+ }
+ if (article.getTitle() == null || article.getTitle().trim().isEmpty()) {
+ logger.warn("Skipping article with empty title at index {}", i);
+ throw new IllegalArgumentException("Article title cannot be null or empty at index " + i);
+ }
+ if (article.getUrl() == null || article.getUrl().trim().isEmpty()) {
+ logger.warn("Skipping article with empty URL at index {}", i);
+ throw new IllegalArgumentException("Article URL cannot be null or empty at index " + i);
+ }
+ }
+
+ articles.addAll(articleList);
+ logger.info("Added {} articles to repository", articleList.size());
+ }
+
+ public List getAll() {
+ logger.debug("Retrieving all articles, count: {}", articles.size());
+ return Collections.unmodifiableList(articles);
+ }
+
+ public int size() {
+ return articles.size();
+ }
+
+ public void clear() {
+ int size = articles.size();
+ articles.clear();
+ logger.info("Cleared repository, removed {} articles", size);
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
new file mode 100644
index 0000000..66d5b16
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
@@ -0,0 +1,28 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.util.ArrayList;
+import java.util.List;
+
+public class BlogStrategy extends PriorityStrategy {
+ private static final int PRIORITY = 100;
+ private static final String URL_PATTERN = ".*blog\\.example\\.com.*";
+
+ public BlogStrategy() {
+ super(PRIORITY, URL_PATTERN);
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ List articles = new ArrayList<>();
+ Elements titles = doc.select(".post-title");
+ for (Element e : titles) {
+ articles.add(new Article(e.text(), url, ""));
+ }
+ return articles;
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
new file mode 100644
index 0000000..ed69e19
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import java.util.List;
+
+public interface CrawlStrategy {
+ List parse(String url, Document doc) throws ParseException;
+ boolean supports(String url);
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/DefaultStrategy.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/DefaultStrategy.java
new file mode 100644
index 0000000..fb46b0d
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/DefaultStrategy.java
@@ -0,0 +1,38 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.util.ArrayList;
+import java.util.List;
+
+public class DefaultStrategy implements CrawlStrategy {
+
+ @Override
+ public boolean supports(String url) {
+ return true;
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ List articles = new ArrayList<>();
+
+ Elements links = doc.select("a[href]");
+ for (Element link : links) {
+ String title = link.text().trim();
+ String href = link.attr("abs:href");
+
+ if (!title.isEmpty() && title.length() > 5) {
+ articles.add(new Article(title, href.isEmpty() ? url : href, ""));
+ }
+
+ if (articles.size() >= 20) {
+ break;
+ }
+ }
+
+ return articles;
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
new file mode 100644
index 0000000..bbf56b2
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
@@ -0,0 +1,52 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.util.ArrayList;
+import java.util.List;
+
+public class HnuNewsStrategy extends PriorityStrategy {
+ private static final int PRIORITY = 200;
+ private static final String URL_PATTERN = ".*news\\.hnu\\.edu\\.cn.*";
+
+ public HnuNewsStrategy() {
+ super(PRIORITY, URL_PATTERN);
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ List articles = new ArrayList<>();
+ Elements listItems = doc.select("ul.list11 li");
+
+ for (Element li : listItems) {
+ Element link = li.selectFirst("a");
+ if (link == null) continue;
+
+ String articleUrl = link.attr("href");
+ if (!articleUrl.startsWith("http")) {
+ articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", "");
+ }
+
+ String title = "";
+ Element titleEl = link.selectFirst("h4.l2.h4s2");
+ if (titleEl != null) {
+ title = titleEl.text().trim();
+ }
+
+ String content = "";
+ Element contentEl = link.selectFirst("p.l3.ps3");
+ if (contentEl != null) {
+ content = contentEl.text().trim();
+ }
+
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, articleUrl, content));
+ }
+ }
+
+ return articles;
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/NewsStrategy.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
new file mode 100644
index 0000000..c6c6b98
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
@@ -0,0 +1,28 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.exception.ParseException;
+import com.example.datacollect.model.Article;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+import java.util.ArrayList;
+import java.util.List;
+
+public class NewsStrategy extends PriorityStrategy {
+ private static final int PRIORITY = 100;
+ private static final String URL_PATTERN = ".*news\\.example\\.com.*";
+
+ public NewsStrategy() {
+ super(PRIORITY, URL_PATTERN);
+ }
+
+ @Override
+ public List parse(String url, Document doc) throws ParseException {
+ List articles = new ArrayList<>();
+ Elements items = doc.select(".article-headline");
+ for (Element e : items) {
+ articles.add(new Article(e.text(), url, ""));
+ }
+ return articles;
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/PriorityStrategy.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/PriorityStrategy.java
new file mode 100644
index 0000000..ded1239
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/PriorityStrategy.java
@@ -0,0 +1,27 @@
+package com.example.datacollect.strategy;
+
+import java.util.regex.Pattern;
+
+public abstract class PriorityStrategy implements CrawlStrategy, Comparable {
+ private final int priority;
+ private final Pattern urlPattern;
+
+ public PriorityStrategy(int priority, String regexPattern) {
+ this.priority = priority;
+ this.urlPattern = Pattern.compile(regexPattern);
+ }
+
+ @Override
+ public boolean supports(String url) {
+ return urlPattern.matcher(url).matches();
+ }
+
+ @Override
+ public int compareTo(PriorityStrategy other) {
+ return Integer.compare(other.priority, this.priority);
+ }
+
+ public int getPriority() {
+ return priority;
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
new file mode 100644
index 0000000..26d217c
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
@@ -0,0 +1,49 @@
+package com.example.datacollect.strategy;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class StrategyFactory {
+ private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class);
+
+ private final List strategies = new ArrayList<>();
+ private final CrawlStrategy defaultStrategy;
+
+ public StrategyFactory() {
+ strategies.add(new HnuNewsStrategy());
+ strategies.add(new BlogStrategy());
+ strategies.add(new NewsStrategy());
+ Collections.sort(strategies);
+ this.defaultStrategy = new DefaultStrategy();
+ logger.info("StrategyFactory initialized with {} strategies", strategies.size());
+ }
+
+ public CrawlStrategy getStrategy(String url) {
+ if (url == null || url.trim().isEmpty()) {
+ logger.debug("Empty URL provided, using default strategy");
+ return defaultStrategy;
+ }
+
+ for (PriorityStrategy s : strategies) {
+ if (s.supports(url)) {
+ logger.debug("URL {} matched strategy: {}", url, s.getClass().getSimpleName());
+ return s;
+ }
+ }
+ logger.debug("URL {} did not match any specific strategy, using default", url);
+ return defaultStrategy;
+ }
+
+ public void register(PriorityStrategy strategy) {
+ strategies.add(strategy);
+ Collections.sort(strategies);
+ logger.info("Registered new strategy: {}", strategy.getClass().getSimpleName());
+ }
+
+ public CrawlStrategy getDefaultStrategy() {
+ return defaultStrategy;
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/java/com/example/datacollect/view/ConsoleView.java b/w11/java-cli - 副本/src/main/java/com/example/datacollect/view/ConsoleView.java
new file mode 100644
index 0000000..6c058ba
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/java/com/example/datacollect/view/ConsoleView.java
@@ -0,0 +1,47 @@
+package com.example.datacollect.view;
+
+import com.example.datacollect.model.Article;
+import java.util.List;
+import java.util.Scanner;
+
+public class ConsoleView {
+ private static final String ANSI_RESET = "\u001B[0m";
+ private static final String ANSI_GREEN = "\u001B[32m";
+ private static final String ANSI_RED = "\u001B[31m";
+ private static final String ANSI_BLUE = "\u001B[34m";
+ private static final String ANSI_YELLOW = "\u001B[33m";
+
+ private final Scanner scanner = new Scanner(System.in);
+
+ public String readLine() {
+ System.out.print("> ");
+ return scanner.nextLine();
+ }
+
+ public void printSuccess(String msg) {
+ System.out.println(ANSI_GREEN + msg + ANSI_RESET);
+ }
+
+ public void printError(String msg) {
+ System.out.println(ANSI_RED + msg + ANSI_RESET);
+ }
+
+ public void printInfo(String msg) {
+ System.out.println(ANSI_BLUE + msg + ANSI_RESET);
+ }
+
+ public void printWarning(String msg) {
+ System.out.println(ANSI_YELLOW + msg + ANSI_RESET);
+ }
+
+ public void display(List articles) {
+ if (articles.isEmpty()) {
+ printInfo("暂无文章,请先执行 crawl。");
+ return;
+ }
+ for (int i = 0; i < articles.size(); i++) {
+ Article a = articles.get(i);
+ System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());
+ }
+ }
+}
diff --git a/w11/java-cli - 副本/src/main/resources/logback.xml b/w11/java-cli - 副本/src/main/resources/logback.xml
new file mode 100644
index 0000000..8a3b75d
--- /dev/null
+++ b/w11/java-cli - 副本/src/main/resources/logback.xml
@@ -0,0 +1,26 @@
+
+
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/crawler.log
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+ logs/crawler.%d{yyyy-MM-dd}.log
+ 30
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/w11/java-cli - 副本/target/classes/logback.xml b/w11/java-cli - 副本/target/classes/logback.xml
new file mode 100644
index 0000000..8a3b75d
--- /dev/null
+++ b/w11/java-cli - 副本/target/classes/logback.xml
@@ -0,0 +1,26 @@
+
+
+
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/crawler.log
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+ logs/crawler.%d{yyyy-MM-dd}.log
+ 30
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/w11/java-cli - 副本/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/w11/java-cli - 副本/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
new file mode 100644
index 0000000..a9870f6
--- /dev/null
+++ b/w11/java-cli - 副本/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
@@ -0,0 +1,21 @@
+com\example\datacollect\strategy\DefaultStrategy.class
+com\example\datacollect\strategy\PriorityStrategy.class
+com\example\datacollect\command\ListCommand.class
+com\example\datacollect\command\CrawlCommand.class
+com\example\datacollect\strategy\BlogStrategy.class
+com\example\datacollect\repository\ArticleRepository.class
+com\example\datacollect\Main.class
+com\example\datacollect\view\ConsoleView.class
+com\example\datacollect\command\ExitCommand.class
+com\example\datacollect\command\HelpCommand.class
+com\example\datacollect\strategy\NewsStrategy.class
+com\example\datacollect\command\Command.class
+com\example\datacollect\controller\CrawlerController.class
+com\example\datacollect\exception\CrawlerException.class
+com\example\datacollect\exception\NetworkException.class
+com\example\datacollect\command\AnalyzeCommand.class
+com\example\datacollect\strategy\StrategyFactory.class
+com\example\datacollect\strategy\HnuNewsStrategy.class
+com\example\datacollect\exception\ParseException.class
+com\example\datacollect\strategy\CrawlStrategy.class
+com\example\datacollect\model\Article.class
diff --git a/w11/java-cli - 副本/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/w11/java-cli - 副本/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
new file mode 100644
index 0000000..99bc177
--- /dev/null
+++ b/w11/java-cli - 副本/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
@@ -0,0 +1,21 @@
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\strategy\DefaultStrategy.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\command\Command.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\exception\ParseException.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\command\ListCommand.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\exception\NetworkException.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\command\AnalyzeCommand.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\command\HelpCommand.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\strategy\NewsStrategy.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\Main.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\command\ExitCommand.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\exception\CrawlerException.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\command\CrawlCommand.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\strategy\BlogStrategy.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\controller\CrawlerController.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\model\Article.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\repository\ArticleRepository.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\view\ConsoleView.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\strategy\PriorityStrategy.java
+D:\桌面\java-cli - 副本\src\main\java\com\example\datacollect\strategy\StrategyFactory.java