diff --git a/project/202506050230-焦妍-期末实验报告.docx b/project/202506050230-焦妍-期末实验报告.docx
new file mode 100644
index 0000000..eab7b17
Binary files /dev/null and b/project/202506050230-焦妍-期末实验报告.docx differ
diff --git a/project/java-cli/.DS_Store b/project/java-cli/.DS_Store
new file mode 100644
index 0000000..09150ec
Binary files /dev/null and b/project/java-cli/.DS_Store differ
diff --git a/project/java-cli/.gitignore b/project/java-cli/.gitignore
new file mode 100644
index 0000000..0ebcf1a
--- /dev/null
+++ b/project/java-cli/.gitignore
@@ -0,0 +1,4 @@
+*.jar
+*.jar
+*.class
+*.log
\ No newline at end of file
diff --git a/project/java-cli/README.md b/project/java-cli/README.md
new file mode 100644
index 0000000..3ea02ec
--- /dev/null
+++ b/project/java-cli/README.md
@@ -0,0 +1,17 @@
+# DataCollect 教学项目 — 最小可运行版本
+
+这是一个最小可用的 Java CLI 演示工程,目标:打印帮助信息以验证运行环境。
+
+构建:
+```bash
+mvn -q package
+```
+
+运行(示例):
+```bash
+java -jar target/datacollect-cli-0.1.0-jar-with-dependencies.jar --help
+```
+
+项目结构(最小):
+- `src/main/java/com/example/datacollect/Main.java` — CLI 入口,打印帮助
+- `pom.xml` — Maven 构建配置,生成可执行 jar
diff --git a/project/java-cli/pom.xml b/project/java-cli/pom.xml
new file mode 100644
index 0000000..5c5fa18
--- /dev/null
+++ b/project/java-cli/pom.xml
@@ -0,0 +1,69 @@
+
+ 4.0.0
+ com.example
+ datacollect-cli
+ 0.1.0
+
+ 11
+ 11
+
+
+
+
+ org.jsoup
+ jsoup
+ 1.17.2
+
+
+ ch.qos.logback
+ logback-classic
+ 1.4.14
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+ 2.15.2
+
+
+ com.fasterxml.jackson.datatype
+ jackson-datatype-jsr310
+ 2.15.2
+
+
+
+
+
+
+ org.apache.maven.plugins
+ maven-compiler-plugin
+ 3.8.1
+
+
+ org.apache.maven.plugins
+ maven-assembly-plugin
+ 3.3.0
+
+
+
+ com.example.datacollect.Main
+
+
+
+ jar-with-dependencies
+
+
+
+
+ make-assembly
+ package
+
+ single
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/project/java-cli/src/.DS_Store b/project/java-cli/src/.DS_Store
new file mode 100644
index 0000000..daf9c39
Binary files /dev/null and b/project/java-cli/src/.DS_Store differ
diff --git a/project/java-cli/src/main/.DS_Store b/project/java-cli/src/main/.DS_Store
new file mode 100644
index 0000000..5b3c866
Binary files /dev/null and b/project/java-cli/src/main/.DS_Store differ
diff --git a/project/java-cli/src/main/java/.DS_Store b/project/java-cli/src/main/java/.DS_Store
new file mode 100644
index 0000000..f1ec01e
Binary files /dev/null and b/project/java-cli/src/main/java/.DS_Store differ
diff --git a/project/java-cli/src/main/java/com/.DS_Store b/project/java-cli/src/main/java/com/.DS_Store
new file mode 100644
index 0000000..3496c03
Binary files /dev/null and b/project/java-cli/src/main/java/com/.DS_Store differ
diff --git a/project/java-cli/src/main/java/com/example/.DS_Store b/project/java-cli/src/main/java/com/example/.DS_Store
new file mode 100644
index 0000000..4b49caf
Binary files /dev/null and b/project/java-cli/src/main/java/com/example/.DS_Store differ
diff --git a/project/java-cli/src/main/java/com/example/datacollect/Main.java b/project/java-cli/src/main/java/com/example/datacollect/Main.java
new file mode 100644
index 0000000..7c1ae59
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/Main.java
@@ -0,0 +1,31 @@
+package com.example.datacollect;
+
+import com.example.datacollect.controller.CrawlerController;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class Main {
+ private static final Logger logger = LoggerFactory.getLogger(Main.class);
+
+ public static void main(String[] args) {
+ logger.info("Starting CLI Crawler application");
+
+ ConsoleView view = new ConsoleView();
+ ArticleRepository repository = new ArticleRepository();
+ CrawlerController controller = new CrawlerController(view, repository);
+
+ view.printSuccess("Welcome to CLI Crawler (W11)! Type help for commands.");
+ logger.info("Application started successfully");
+
+ while (true) {
+ try {
+ controller.handle(view.readLine());
+ } catch (Exception e) {
+ logger.error("Error processing command", e);
+ view.printError("Error: " + e.getMessage());
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java b/project/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
new file mode 100644
index 0000000..87cbdb0
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
@@ -0,0 +1,130 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+public class AnalyzeCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class);
+ private final ConsoleView view;
+
+ public AnalyzeCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "analyze";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Executing analyze command");
+
+ if (args.length >= 2) {
+ String url = args[1];
+ analyzeUrl(url);
+ } else {
+ analyzeRepository(repository);
+ }
+ }
+
+ private void analyzeUrl(String url) {
+ logger.info("Analyzing URL: {}", url);
+ CrawlStrategy strategy = StrategyFactory.getStrategy(url);
+
+ if (strategy == null) {
+ logger.error("No strategy found for URL: {}", url);
+ view.printError("No strategy found for URL: " + url);
+ return;
+ }
+
+ view.printInfo("Analyzing URL: " + url);
+ view.printInfo("Using strategy: " + strategy.getClass().getSimpleName());
+
+ List articles = strategy.crawl(url);
+
+ printAnalysis(articles);
+ logger.info("Analysis completed for URL: {}", url);
+ view.printInfo("Note: Analysis results are NOT stored.");
+ }
+
+ private void analyzeRepository(ArticleRepository repository) {
+ List articles = repository.getAll();
+
+ if (articles.isEmpty()) {
+ logger.info("No articles to analyze");
+ view.printInfo("No articles to analyze. Use 'analyze ' to analyze a URL without storing.");
+ return;
+ }
+
+ logger.info("Analyzing {} articles from repository", articles.size());
+ view.printInfo("Analyzing " + articles.size() + " articles from repository:");
+ printAnalysis(articles);
+ }
+
+ private void printAnalysis(List articles) {
+ if (articles.isEmpty()) {
+ logger.info("No articles found for analysis");
+ view.printInfo("No articles found.");
+ return;
+ }
+
+ int totalArticles = articles.size();
+ int totalContentLength = 0;
+ int articlesWithAuthor = 0;
+ int articlesWithDate = 0;
+
+ for (Article article : articles) {
+ if (article.getContent() != null) {
+ totalContentLength += article.getContent().length();
+ }
+ if (article.getAuthor() != null && !article.getAuthor().isEmpty()) {
+ articlesWithAuthor++;
+ }
+ if (article.getPublishDate() != null) {
+ articlesWithDate++;
+ }
+ }
+
+ double avgContentLength = totalArticles > 0 ? (double) totalContentLength / totalArticles : 0;
+
+ logger.info("Analysis results: {} articles, {} avg length", totalArticles, avgContentLength);
+ view.printInfo("=== Analysis Results ===");
+ view.printInfo("Total articles: " + totalArticles);
+ view.printInfo("Total content length: " + totalContentLength);
+ view.printInfo("Average content length: " + String.format("%.2f", avgContentLength));
+ view.printInfo("Articles with author: " + articlesWithAuthor);
+ view.printInfo("Articles with publish date: " + articlesWithDate);
+ }
+
+ @Override
+ public String getName1() {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'getName1'");
+ }
+
+ @Override
+ public void execute(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute'");
+ }
+
+ @Override
+ public void execute1(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+
+ @Override
+ public void execute1(String[] args, ArticleRepository repository) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/command/Command.java b/project/java-cli/src/main/java/com/example/datacollect/command/Command.java
new file mode 100644
index 0000000..a730991
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/command/Command.java
@@ -0,0 +1,15 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+
+import java.util.List;
+
+public interface Command {
+ String getName1();
+ void execute(String[] args, List articles);
+ void execute(String[] args, ArticleRepository repository);
+ String getName();
+ void execute1(String[] args, List articles);
+ void execute1(String[] args, ArticleRepository repository);
+}
diff --git a/project/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java b/project/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
new file mode 100644
index 0000000..12d91b3
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java
@@ -0,0 +1,85 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.service.ScraperService;
+import com.example.datacollect.strategy.CrawlStrategy;
+import com.example.datacollect.strategy.StrategyFactory;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+public class CrawlCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
+ private final ConsoleView view;
+ private final ScraperService scraperService;
+
+ public CrawlCommand(ConsoleView view) {
+ this.view = view;
+ this.scraperService = new ScraperService();
+ }
+
+ @Override
+ public String getName() {
+ return "crawl";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ if (args.length < 2) {
+ logger.warn("Missing URL argument");
+ view.printError("Usage: crawl ");
+ return;
+ }
+
+ String url = args[1];
+ logger.info("Crawl started for: {}", url);
+
+ CrawlStrategy strategy = StrategyFactory.getStrategy(url);
+
+ if (strategy == null) {
+ logger.error("No strategy found for URL: {}", url);
+ view.printError("No strategy found for URL: " + url);
+ return;
+ }
+
+ logger.info("Using strategy: {}", strategy.getClass().getSimpleName());
+ view.printInfo("Crawling " + url + " with strategy: " + strategy.getClass().getSimpleName());
+
+ try {
+ List articles = scraperService.scrapeWithRetry(strategy, url);
+ repository.addAll(articles);
+ logger.info("Crawled {} articles successfully", articles.size());
+ view.printSuccess("Crawled " + articles.size() + " articles");
+ } catch (Exception e) {
+ logger.error("Error crawling URL: {}", url, e);
+ view.printError("Error: " + e.getMessage());
+ }
+ }
+
+ @Override
+ public String getName1() {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'getName1'");
+ }
+
+ @Override
+ public void execute(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute'");
+ }
+
+ @Override
+ public void execute1(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+
+ @Override
+ public void execute1(String[] args, ArticleRepository repository) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java b/project/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
new file mode 100644
index 0000000..7b743f9
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java
@@ -0,0 +1,55 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class ExitCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class);
+ private final ConsoleView view;
+
+ public ExitCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "exit";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.info("User requested exit");
+ view.printSuccess("Goodbye!");
+ System.exit(0);
+ }
+
+ @Override
+ public void execute(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute'");
+ }
+
+ @Override
+ public String getName1() {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'getName1'");
+ }
+
+ @Override
+ public void execute1(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+
+ @Override
+ public void execute1(String[] args, ArticleRepository repository) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java b/project/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
new file mode 100644
index 0000000..ce1d2c8
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java
@@ -0,0 +1,61 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+
+import java.util.List;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class HelpCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class);
+ private final ConsoleView view;
+
+ public HelpCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "help";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Executing help command");
+ view.printInfo("Commands:");
+ view.printInfo(" crawl - 爬取指定 URL 的文章");
+ view.printInfo(" list - 列出已爬取的文章");
+ view.printInfo(" analyze - 分析文章统计信息");
+ view.printInfo(" history - 显示命令历史记录");
+ view.printInfo(" save [file] - 保存文章到 JSON 文件(默认 articles.json)");
+ view.printInfo(" help - 显示此帮助信息");
+ view.printInfo(" exit - 退出程序");
+ }
+
+ @Override
+ public String getName1() {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'getName1'");
+ }
+
+ @Override
+ public void execute(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute'");
+ }
+
+ @Override
+ public void execute1(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+
+ @Override
+ public void execute1(String[] args, ArticleRepository repository) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java b/project/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java
new file mode 100644
index 0000000..f227ddd
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java
@@ -0,0 +1,80 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class HistoryCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(HistoryCommand.class);
+ private static final List commandHistory = new ArrayList<>();
+ private final ConsoleView view;
+
+ public HistoryCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "history";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Executing history command");
+
+ if (commandHistory.isEmpty()) {
+ logger.info("Command history is empty");
+ view.printInfo("No command history.");
+ return;
+ }
+
+ logger.info("Showing {} command history items", commandHistory.size());
+ view.printInfo("Command History:");
+ for (int i = 0; i < commandHistory.size(); i++) {
+ view.printInfo((i + 1) + ". " + commandHistory.get(i));
+ }
+ }
+
+ public static void addCommand(String command) {
+ commandHistory.add(command);
+ logger.debug("Command added to history: {}", command);
+ }
+
+ public static List getCommandHistory() {
+ return new ArrayList<>(commandHistory);
+ }
+
+ public static void clearHistory() {
+ commandHistory.clear();
+ logger.info("Command history cleared");
+ }
+
+ @Override
+ public String getName1() {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'getName1'");
+ }
+
+ @Override
+ public void execute(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute'");
+ }
+
+ @Override
+ public void execute1(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+
+ @Override
+ public void execute1(String[] args, ArticleRepository repository) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java b/project/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
new file mode 100644
index 0000000..048f001
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java
@@ -0,0 +1,65 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+public class ListCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(ListCommand.class);
+ private final ConsoleView view;
+
+ public ListCommand(ConsoleView view) {
+ this.view = view;
+ }
+
+ @Override
+ public String getName() {
+ return "list";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Executing list command");
+ List articles = repository.getAll();
+
+ if (articles.isEmpty()) {
+ logger.info("No articles found");
+ view.printInfo("No articles yet. Use 'crawl ' to get started.");
+ return;
+ }
+
+ logger.info("Listing {} articles", articles.size());
+ view.printInfo("=== Articles (" + articles.size() + ") ===");
+ for (Article article : articles) {
+ view.printInfo(article.toString());
+ }
+ }
+
+ @Override
+ public String getName1() {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'getName1'");
+ }
+
+ @Override
+ public void execute(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute'");
+ }
+
+ @Override
+ public void execute1(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+
+ @Override
+ public void execute1(String[] args, ArticleRepository repository) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/command/SaveCommand.java b/project/java-cli/src/main/java/com/example/datacollect/command/SaveCommand.java
new file mode 100644
index 0000000..0ced928
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/command/SaveCommand.java
@@ -0,0 +1,85 @@
+package com.example.datacollect.command;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import com.fasterxml.jackson.databind.ObjectMapper;
+import com.fasterxml.jackson.databind.SerializationFeature;
+import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.List;
+
+public class SaveCommand implements Command {
+ private static final Logger logger = LoggerFactory.getLogger(SaveCommand.class);
+ private final ConsoleView view;
+ private final ObjectMapper objectMapper;
+
+ public SaveCommand(ConsoleView view) {
+ this.view = view;
+ this.objectMapper = new ObjectMapper();
+ this.objectMapper.registerModule(new JavaTimeModule());
+ this.objectMapper.enable(SerializationFeature.INDENT_OUTPUT);
+ }
+
+ @Override
+ public String getName() {
+ return "save";
+ }
+
+ @Override
+ public void execute(String[] args, ArticleRepository repository) {
+ logger.debug("Executing save command");
+
+ String fileName = args.length >= 2 ? args[1] : "articles.json";
+
+ if (!fileName.endsWith(".json")) {
+ fileName = fileName + ".json";
+ }
+
+ List articles = repository.getAll();
+
+ if (articles.isEmpty()) {
+ logger.warn("No articles to save");
+ view.printError("No articles to save. Use 'crawl ' first.");
+ return;
+ }
+
+ try {
+ File file = new File(fileName);
+ objectMapper.writeValue(file, articles);
+ logger.info("Successfully saved {} articles to {}", articles.size(), fileName);
+ view.printSuccess("Saved " + articles.size() + " articles to " + fileName);
+ } catch (IOException e) {
+ logger.error("Failed to save articles to {}", fileName, e);
+ view.printError("Error saving articles: " + e.getMessage());
+ }
+ }
+
+ @Override
+ public String getName1() {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'getName1'");
+ }
+
+ @Override
+ public void execute(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute'");
+ }
+
+ @Override
+ public void execute1(String[] args, List articles) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+
+ @Override
+ public void execute1(String[] args, ArticleRepository repository) {
+ // TODO Auto-generated method stub
+ throw new UnsupportedOperationException("Unimplemented method 'execute1'");
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java b/project/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
new file mode 100644
index 0000000..97e67b5
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java
@@ -0,0 +1,60 @@
+package com.example.datacollect.controller;
+
+import com.example.datacollect.command.*;
+import com.example.datacollect.repository.ArticleRepository;
+import com.example.datacollect.view.ConsoleView;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.HashMap;
+import java.util.Map;
+
+public class CrawlerController {
+ private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class);
+ private final Map commands = new HashMap<>();
+ private final ConsoleView view;
+ private final ArticleRepository repository;
+
+ public CrawlerController(ConsoleView view, ArticleRepository repository) {
+ this.view = view;
+ this.repository = repository;
+ logger.info("Initializing CrawlerController with {} commands", 7);
+ register(new HelpCommand(view));
+ register(new ListCommand(view));
+ register(new CrawlCommand(view));
+ register(new ExitCommand(view));
+ register(new HistoryCommand(view));
+ register(new AnalyzeCommand(view));
+ register(new SaveCommand(view));
+ logger.info("CrawlerController initialized successfully");
+ }
+
+ private void register(Command command) {
+ commands.put(command.getName(), command);
+ logger.debug("Registered command: {}", command.getName());
+ }
+
+ public void handle(String input) {
+ String text = input == null ? "" : input.trim();
+ if (text.isEmpty()) {
+ return;
+ }
+
+ logger.debug("Handling input: {}", text);
+
+ // 记录命令历史
+ HistoryCommand.addCommand(text);
+
+ String[] args = text.split("\\s+");
+ String cmdName = args[0].toLowerCase();
+ Command command = commands.get(cmdName);
+ if (command == null) {
+ logger.warn("Unknown command: {}", cmdName);
+ view.printError("Unknown command: " + cmdName);
+ return;
+ }
+
+ logger.info("Executing command: {}", cmdName);
+ command.execute(args, repository);
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java b/project/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java
new file mode 100644
index 0000000..d9c9c2e
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.exception;
+
+public class CrawlerException extends RuntimeException {
+ public CrawlerException(String message) {
+ super(message);
+ }
+
+ public CrawlerException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java b/project/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java
new file mode 100644
index 0000000..0fb8e5e
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.exception;
+
+public class NetworkException extends CrawlerException {
+ public NetworkException(String message) {
+ super(message);
+ }
+
+ public NetworkException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java b/project/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java
new file mode 100644
index 0000000..205665a
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java
@@ -0,0 +1,11 @@
+package com.example.datacollect.exception;
+
+public class ParseException extends CrawlerException {
+ public ParseException(String message) {
+ super(message);
+ }
+
+ public ParseException(String message, Throwable cause) {
+ super(message, cause);
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/model/Article.java b/project/java-cli/src/main/java/com/example/datacollect/model/Article.java
new file mode 100644
index 0000000..746abf6
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/model/Article.java
@@ -0,0 +1,75 @@
+package com.example.datacollect.model;
+
+import java.time.LocalDate;
+
+public class Article {
+ private String title;
+ private String url;
+ private String content;
+ private String author;
+ private LocalDate publishDate;
+
+ public Article(String title, String url, String content) {
+ this.title = title;
+ this.url = url;
+ this.content = content;
+ }
+
+ public Article(String title, String url, String content, String author, LocalDate publishDate) {
+ this.title = title;
+ this.url = url;
+ this.content = content;
+ this.author = author;
+ this.publishDate = publishDate;
+ }
+
+ public String getTitle() {
+ return title;
+ }
+
+ public void setTitle(String title) {
+ this.title = title;
+ }
+
+ public String getUrl() {
+ return url;
+ }
+
+ public void setUrl(String url) {
+ this.url = url;
+ }
+
+ public String getContent() {
+ return content;
+ }
+
+ public void setContent(String content) {
+ this.content = content;
+ }
+
+ public String getAuthor() {
+ return author;
+ }
+
+ public void setAuthor(String author) {
+ this.author = author;
+ }
+
+ public LocalDate getPublishDate() {
+ return publishDate;
+ }
+
+ public void setPublishDate(LocalDate publishDate) {
+ this.publishDate = publishDate;
+ }
+
+ @Override
+ public String toString() {
+ return "Article{"
+ + "title='" + title + '\''
+ + ", url='" + url + '\''
+ + ", author='" + author + '\''
+ + ", publishDate=" + publishDate
+ + '}';
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/project/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
new file mode 100644
index 0000000..761ec36
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java
@@ -0,0 +1,52 @@
+package com.example.datacollect.repository;
+
+import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+
+public class ArticleRepository {
+ private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
+ private final List articles = new ArrayList<>();
+
+ public void add(Article article) {
+ if (article == null) {
+ logger.warn("Attempted to add null article");
+ return;
+ }
+ if (article.getTitle() == null || article.getTitle().isEmpty()) {
+ logger.warn("Attempted to add article with empty title");
+ return;
+ }
+ articles.add(article);
+ logger.debug("Added article: {}", article.getTitle());
+ }
+
+ public void addAll(List articleList) {
+ if (articleList == null) {
+ logger.warn("Attempted to add null article list");
+ return;
+ }
+ for (Article article : articleList) {
+ add(article);
+ }
+ logger.info("Added {} articles", articleList.size());
+ }
+
+ public List getAll() {
+ return Collections.unmodifiableList(articles);
+ }
+
+ public void clear() {
+ int size = articles.size();
+ articles.clear();
+ logger.info("Cleared {} articles from repository", size);
+ }
+
+ public int size() {
+ return articles.size();
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/service/ScraperService.java b/project/java-cli/src/main/java/com/example/datacollect/service/ScraperService.java
new file mode 100644
index 0000000..c55bd9b
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/service/ScraperService.java
@@ -0,0 +1,56 @@
+package com.example.datacollect.service;
+
+import com.example.datacollect.exception.CrawlerException;
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.model.Article;
+import com.example.datacollect.strategy.CrawlStrategy;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+
+public class ScraperService {
+ private static final Logger logger = LoggerFactory.getLogger(ScraperService.class);
+
+ private static final int MAX_RETRY = 3;
+ private static final long INITIAL_DELAY_MS = 1000;
+ private static final double BACKOFF_MULTIPLIER = 2.0;
+
+ public List scrapeWithRetry(CrawlStrategy strategy, String url) {
+ int attempt = 0;
+ long delay = INITIAL_DELAY_MS;
+
+ while (attempt < MAX_RETRY) {
+ try {
+ attempt++;
+ logger.info("Attempt {}/{} to crawl {}", attempt, MAX_RETRY, url);
+
+ List articles = strategy.crawl(url);
+
+ if (attempt > 1) {
+ logger.info("Successfully crawled {} on attempt {}", url, attempt);
+ }
+ return articles;
+
+ } catch (NetworkException e) {
+ logger.warn("Network error on attempt {} for {}: {}", attempt, url, e.getMessage());
+
+ if (attempt < MAX_RETRY) {
+ try {
+ logger.info("Retrying after {}ms...", delay);
+ Thread.sleep(delay);
+ delay = (long) (delay * BACKOFF_MULTIPLIER);
+ } catch (InterruptedException ie) {
+ Thread.currentThread().interrupt();
+ throw new CrawlerException("Interrupted during retry wait", ie);
+ }
+ } else {
+ logger.error("Failed to crawl {} after {} attempts due to network errors", url, MAX_RETRY);
+ throw new CrawlerException("Failed to crawl " + url + " after " + MAX_RETRY + " attempts", e);
+ }
+ }
+ }
+
+ throw new CrawlerException("Unexpected error: max retry attempts exhausted");
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
new file mode 100644
index 0000000..ded867d
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
@@ -0,0 +1,93 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.exception.ParseException;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class BlogStrategy implements CrawlStrategy {
+ @Override
+ public boolean supports(String url) {
+ return url != null && (url.contains("blog") || url.contains("wordpress") || url.contains("lofter") || url.contains("hexo"));
+ }
+
+ @Override
+ public List crawl(String url) {
+ List articles = new ArrayList<>();
+ try {
+ Document doc = Jsoup.connect(url)
+ .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+ .timeout(10000)
+ .get();
+ articles = parse(doc, url);
+ } catch (IOException e) {
+ throw new NetworkException("网络请求失败:" + e.getMessage(), e);
+ } catch (ParseException e) {
+ throw e;
+ } catch (Exception e) {
+ articles.add(new Article("爬取失败", url, "错误:" + e.getMessage(), "系统", null));
+ }
+ return articles;
+ }
+
+ @Override
+ public List parse(Document doc, String url) throws ParseException {
+ List articles = new ArrayList<>();
+ try {
+ if (url.contains("lofter")) {
+ crawlLofter(doc, url, articles);
+ } else if (url.contains("wordpress")) {
+ crawlWordpress(doc, url, articles);
+ } else {
+ crawlGenericBlog(doc, url, articles);
+ }
+ } catch (Exception e) {
+ throw new ParseException("解析博客网站失败:" + e.getMessage(), e);
+ }
+ return articles;
+ }
+
+ private void crawlLofter(Document doc, String url, List articles) {
+ Elements items = doc.select(".m-post");
+ for (Element item : items) {
+ String title = item.select(".m-post-title a").text();
+ String link = item.select(".m-post-title a").attr("href");
+ String author = item.select(".m-user-name").text();
+ String summary = item.select(".m-post-content").text();
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, summary.length() > 300 ? summary.substring(0, 300) : summary, author, null));
+ }
+ }
+ }
+
+ private void crawlWordpress(Document doc, String url, List articles) {
+ Elements items = doc.select(".post");
+ for (Element item : items) {
+ String title = item.select(".entry-title a").text();
+ String link = item.select(".entry-title a").attr("href");
+ String author = item.select(".author").text();
+ String summary = item.select(".entry-summary").text();
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, summary.length() > 300 ? summary.substring(0, 300) : summary, author, null));
+ }
+ }
+ }
+
+ private void crawlGenericBlog(Document doc, String url, List articles) {
+ Elements items = doc.select(".article, .post, .blog-post");
+ for (Element item : items) {
+ String title = item.select("h1, h2, .title").text();
+ String content = item.select(".content, .post-content").text();
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, url, content.length() > 300 ? content.substring(0, 300) : content, "未知作者", null));
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
new file mode 100644
index 0000000..b3cc570
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
@@ -0,0 +1,12 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.exception.ParseException;
+import org.jsoup.nodes.Document;
+import java.util.List;
+
+public interface CrawlStrategy {
+ boolean supports(String url);
+ List crawl(String url);
+ List parse(Document doc, String url) throws ParseException;
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
new file mode 100644
index 0000000..c111a3e
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
@@ -0,0 +1,118 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.exception.ParseException;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class NewsStrategy implements CrawlStrategy {
+ @Override
+ public boolean supports(String url) {
+ return url != null && (url.contains("news") || url.contains("sina") || url.contains("163") || url.contains("sohu") || url.contains("qq.com"));
+ }
+
+ @Override
+ public List crawl(String url) {
+ List articles = new ArrayList<>();
+ try {
+ Document doc = Jsoup.connect(url)
+ .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+ .timeout(10000)
+ .get();
+ articles = parse(doc, url);
+ } catch (IOException e) {
+ throw new NetworkException("网络请求失败:" + e.getMessage(), e);
+ } catch (ParseException e) {
+ throw e;
+ } catch (Exception e) {
+ articles.add(new Article("爬取失败", url, "错误:" + e.getMessage(), "系统", null));
+ }
+ return articles;
+ }
+
+ @Override
+ public List parse(Document doc, String url) throws ParseException {
+ List articles = new ArrayList<>();
+ try {
+ if (url.contains("sina")) {
+ crawlSina(doc, url, articles);
+ } else if (url.contains("163") || url.contains("netease")) {
+ crawlNetease(doc, url, articles);
+ } else if (url.contains("sohu")) {
+ crawlSohu(doc, url, articles);
+ } else if (url.contains("qq")) {
+ crawlQQ(doc, url, articles);
+ } else {
+ crawlGenericNews(doc, url, articles);
+ }
+ } catch (Exception e) {
+ throw new ParseException("解析新闻网站失败:" + e.getMessage(), e);
+ }
+ return articles;
+ }
+
+ private void crawlSina(Document doc, String url, List articles) {
+ Elements items = doc.select(".news-item");
+ for (Element item : items) {
+ String title = item.select("a").text();
+ String link = item.select("a").attr("href");
+ String summary = item.select(".news-summary").text();
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, summary.length() > 300 ? summary.substring(0, 300) : summary, "新浪新闻", null));
+ }
+ }
+ }
+
+ private void crawlNetease(Document doc, String url, List articles) {
+ Elements items = doc.select(".news-list li");
+ for (Element item : items) {
+ String title = item.select("a").text();
+ String link = item.select("a").attr("href");
+ if (!link.startsWith("http")) link = "https://news.163.com" + link;
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, "", "网易新闻", null));
+ }
+ }
+ }
+
+ private void crawlSohu(Document doc, String url, List articles) {
+ Elements items = doc.select(".news-item h3 a");
+ for (Element item : items) {
+ String title = item.text();
+ String link = item.attr("href");
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, "", "搜狐新闻", null));
+ }
+ }
+ }
+
+ private void crawlQQ(Document doc, String url, List articles) {
+ Elements items = doc.select(".list li a");
+ for (Element item : items) {
+ String title = item.text();
+ String link = item.attr("href");
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, "", "腾讯新闻", null));
+ }
+ }
+ }
+
+ private void crawlGenericNews(Document doc, String url, List articles) {
+ Elements items = doc.select(".news, .article-item");
+ for (Element item : items) {
+ String title = item.select("h2, h3, .title").text();
+ String link = item.select("a").attr("href");
+ if (!link.startsWith("http")) link = url + link;
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, "", "新闻网站", null));
+ }
+ }
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
new file mode 100644
index 0000000..4bd0ce2
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
@@ -0,0 +1,27 @@
+package com.example.datacollect.strategy;
+
+import java.util.ArrayList;
+import java.util.List;
+
+public class StrategyFactory {
+ private static final List strategies = new ArrayList<>();
+
+ static {
+ strategies.add(new BlogStrategy());
+ strategies.add(new NewsStrategy());
+ strategies.add(new TechStrategy());
+ }
+
+ public static CrawlStrategy getStrategy(String url) {
+ for (CrawlStrategy strategy : strategies) {
+ if (strategy.supports(url)) {
+ return strategy;
+ }
+ }
+ return null;
+ }
+
+ public static List getAllStrategies() {
+ return new ArrayList<>(strategies);
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/strategy/TechStrategy.java b/project/java-cli/src/main/java/com/example/datacollect/strategy/TechStrategy.java
new file mode 100644
index 0000000..9255405
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/strategy/TechStrategy.java
@@ -0,0 +1,105 @@
+package com.example.datacollect.strategy;
+
+import com.example.datacollect.model.Article;
+import com.example.datacollect.exception.NetworkException;
+import com.example.datacollect.exception.ParseException;
+import org.jsoup.Jsoup;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+public class TechStrategy implements CrawlStrategy {
+ @Override
+ public boolean supports(String url) {
+ return url != null && (url.contains("csdn") || url.contains("oschina") || url.contains("iteye") || url.contains("cnblogs"));
+ }
+
+ @Override
+ public List crawl(String url) {
+ List articles = new ArrayList<>();
+ try {
+ Document doc = Jsoup.connect(url)
+ .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36")
+ .timeout(10000)
+ .get();
+ articles = parse(doc, url);
+ } catch (IOException e) {
+ throw new NetworkException("网络请求失败:" + e.getMessage(), e);
+ } catch (ParseException e) {
+ throw e;
+ } catch (Exception e) {
+ articles.add(new Article("爬取失败", url, "错误:" + e.getMessage(), "系统", null));
+ }
+ return articles;
+ }
+
+ @Override
+ public List parse(Document doc, String url) throws ParseException {
+ List articles = new ArrayList<>();
+ try {
+ if (url.contains("csdn")) {
+ crawlCsdn(doc, url, articles);
+ } else if (url.contains("cnblogs")) {
+ crawlCnblogs(doc, url, articles);
+ } else if (url.contains("oschina")) {
+ crawlOschina(doc, url, articles);
+ } else {
+ crawlGeneric(doc, url, articles);
+ }
+ } catch (Exception e) {
+ throw new ParseException("解析技术网站失败:" + e.getMessage(), e);
+ }
+ return articles;
+ }
+
+ private void crawlCsdn(Document doc, String url, List articles) {
+ Elements items = doc.select(".article-item-box");
+ for (Element item : items) {
+ String title = item.select("h4 a").text();
+ String link = item.select("h4 a").attr("href");
+ String author = item.select(".name").text();
+ String summary = item.select(".content").text();
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, summary, author, null));
+ }
+ }
+ }
+
+ private void crawlCnblogs(Document doc, String url, List articles) {
+ Elements items = doc.select(".post-item");
+ for (Element item : items) {
+ String title = item.select(".post-item-title a").text();
+ String link = item.select(".post-item-title a").attr("href");
+ String author = item.select(".post-item-author a").text();
+ String summary = item.select(".post-item-summary").text();
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, summary, author, null));
+ }
+ }
+ }
+
+ private void crawlOschina(Document doc, String url, List articles) {
+ Elements items = doc.select(".news-list .news-item");
+ for (Element item : items) {
+ String title = item.select(".title a").text();
+ String link = "https://www.oschina.net" + item.select(".title a").attr("href");
+ String author = item.select(".author").text();
+ String summary = item.select(".description").text();
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, link, summary, author, null));
+ }
+ }
+ }
+
+ private void crawlGeneric(Document doc, String url, List articles) {
+ String title = doc.title();
+ String content = doc.select("article, .article-content, .post-content").text();
+ if (!title.isEmpty()) {
+ articles.add(new Article(title, url, content.length() > 500 ? content.substring(0, 500) : content, "未知", null));
+ }
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java b/project/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
new file mode 100644
index 0000000..f501e0d
--- /dev/null
+++ b/project/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java
@@ -0,0 +1,53 @@
+package com.example.datacollect.view;
+
+import com.example.datacollect.model.Article;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.List;
+import java.util.Scanner;
+
+public class ConsoleView {
+ private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class);
+ private static final String ANSI_RESET = "\u001B[0m";
+ private static final String ANSI_GREEN = "\u001B[32m";
+ private static final String ANSI_RED = "\u001B[31m";
+ private static final String ANSI_BLUE = "\u001B[34m";
+
+ private final Scanner scanner = new Scanner(System.in);
+
+ public String readLine() {
+ System.out.print("> ");
+ String line = scanner.nextLine();
+ logger.debug("User input: {}", line);
+ return line;
+ }
+
+ public void printSuccess(String msg) {
+ logger.info("Success: {}", msg);
+ System.out.println(ANSI_GREEN + msg + ANSI_RESET);
+ }
+
+ public void printError(String msg) {
+ logger.error("Error: {}", msg);
+ System.out.println(ANSI_RED + msg + ANSI_RESET);
+ }
+
+ public void printInfo(String msg) {
+ logger.debug("Info: {}", msg);
+ System.out.println(ANSI_BLUE + msg + ANSI_RESET);
+ }
+
+ public void display(List articles) {
+ if (articles.isEmpty()) {
+ logger.info("No articles to display");
+ printInfo("暂无文章,请先执行 crawl。");
+ return;
+ }
+ logger.info("Displaying {} articles", articles.size());
+ for (int i = 0; i < articles.size(); i++) {
+ Article a = articles.get(i);
+ System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());
+ }
+ }
+}
\ No newline at end of file
diff --git a/project/java-cli/src/main/resources/logback.xml b/project/java-cli/src/main/resources/logback.xml
new file mode 100644
index 0000000..e374143
--- /dev/null
+++ b/project/java-cli/src/main/resources/logback.xml
@@ -0,0 +1,22 @@
+
+
+
+ %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/crawler.log
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/project/java-cli/target/classes/.DS_Store b/project/java-cli/target/classes/.DS_Store
new file mode 100644
index 0000000..f1ec01e
Binary files /dev/null and b/project/java-cli/target/classes/.DS_Store differ
diff --git a/project/java-cli/target/classes/com/.DS_Store b/project/java-cli/target/classes/com/.DS_Store
new file mode 100644
index 0000000..3496c03
Binary files /dev/null and b/project/java-cli/target/classes/com/.DS_Store differ
diff --git a/project/java-cli/target/classes/com/example/.DS_Store b/project/java-cli/target/classes/com/example/.DS_Store
new file mode 100644
index 0000000..4b49caf
Binary files /dev/null and b/project/java-cli/target/classes/com/example/.DS_Store differ
diff --git a/project/java-cli/target/classes/logback.xml b/project/java-cli/target/classes/logback.xml
new file mode 100644
index 0000000..e374143
--- /dev/null
+++ b/project/java-cli/target/classes/logback.xml
@@ -0,0 +1,22 @@
+
+
+
+ %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+ logs/crawler.log
+
+ %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/project/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/project/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
new file mode 100644
index 0000000..e69de29
diff --git a/project/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/project/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
new file mode 100644
index 0000000..270e15d
--- /dev/null
+++ b/project/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst
@@ -0,0 +1,22 @@
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\model\Article.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\exception\CrawlerException.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\strategy\TechStrategy.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\repository\ArticleRepository.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\Main.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\strategy\BlogStrategy.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\command\Command.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\command\ExitCommand.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\command\HelpCommand.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\command\CrawlCommand.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\command\HistoryCommand.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\strategy\NewsStrategy.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\exception\NetworkException.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\command\SaveCommand.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\controller\CrawlerController.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\command\ListCommand.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\service\ScraperService.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\exception\ParseException.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\view\ConsoleView.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\strategy\StrategyFactory.java
+D:\ҵ\ZY\W11\java-cli\src\main\java\com\example\datacollect\command\AnalyzeCommand.java