From ff0d876dd0569475b165dbec26b607d603bc5fd6 Mon Sep 17 00:00:00 2001 From: 86150 <86150@hunnu.edu.cn> Date: Thu, 28 May 2026 19:48:45 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0W11?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- W11/java-cli/.gitignore | 4 + W11/java-cli/README.md | 17 + W11/java-cli/pom.xml | 59 ++ .../java/com/example/datacollect/Main.java | 31 + .../datacollect/command/AnalyzeCommand.java | 106 +++ .../example/datacollect/command/Command.java | 9 + .../datacollect/command/CrawlCommand.java | 61 ++ .../datacollect/command/ExitCommand.java | 27 + .../datacollect/command/HelpCommand.java | 32 + .../datacollect/command/HistoryCommand.java | 55 ++ .../datacollect/command/ListCommand.java | 41 + .../controller/CrawlerController.java | 59 ++ .../exception/CrawlerException.java | 11 + .../exception/NetworkException.java | 11 + .../datacollect/exception/ParseException.java | 11 + .../example/datacollect/model/Article.java | 75 ++ .../repository/ArticleRepository.java | 52 ++ .../datacollect/service/ScraperService.java | 56 ++ .../datacollect/strategy/BlogStrategy.java | 93 +++ .../datacollect/strategy/CrawlStrategy.java | 12 + .../datacollect/strategy/NewsStrategy.java | 118 +++ .../datacollect/strategy/StrategyFactory.java | 27 + .../datacollect/strategy/TechStrategy.java | 105 +++ .../example/datacollect/view/ConsoleView.java | 53 ++ W11/java-cli/src/main/resources/logback.xml | 22 + .../target/W9工程架构 - 教案v3.md | 758 ++++++++++++++++++ .../target/maven-archiver/pom.properties | 5 + .../compile/default-compile/createdFiles.lst | 0 W11/java-cli/target/w9-ppt.md | 530 ++++++++++++ 29 files changed, 2440 insertions(+) create mode 100644 W11/java-cli/.gitignore create mode 100644 W11/java-cli/README.md create mode 100644 W11/java-cli/pom.xml create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/Main.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/command/Command.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/model/Article.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/service/ScraperService.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/strategy/TechStrategy.java create mode 100644 W11/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java create mode 100644 W11/java-cli/src/main/resources/logback.xml create mode 100644 W11/java-cli/target/W9工程架构 - 教案v3.md create mode 100644 W11/java-cli/target/maven-archiver/pom.properties create mode 100644 W11/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst create mode 100644 W11/java-cli/target/w9-ppt.md diff --git a/W11/java-cli/.gitignore b/W11/java-cli/.gitignore new file mode 100644 index 0000000..0ebcf1a --- /dev/null +++ b/W11/java-cli/.gitignore @@ -0,0 +1,4 @@ +*.jar +*.jar +*.class +*.log \ No newline at end of file diff --git a/W11/java-cli/README.md b/W11/java-cli/README.md new file mode 100644 index 0000000..3ea02ec --- /dev/null +++ b/W11/java-cli/README.md @@ -0,0 +1,17 @@ +# DataCollect 教学项目 — 最小可运行版本 + +这是一个最小可用的 Java CLI 演示工程,目标:打印帮助信息以验证运行环境。 + +构建: +```bash +mvn -q package +``` + +运行(示例): +```bash +java -jar target/datacollect-cli-0.1.0-jar-with-dependencies.jar --help +``` + +项目结构(最小): +- `src/main/java/com/example/datacollect/Main.java` — CLI 入口,打印帮助 +- `pom.xml` — Maven 构建配置,生成可执行 jar diff --git a/W11/java-cli/pom.xml b/W11/java-cli/pom.xml new file mode 100644 index 0000000..53f5449 --- /dev/null +++ b/W11/java-cli/pom.xml @@ -0,0 +1,59 @@ + + 4.0.0 + com.example + datacollect-cli + 0.1.0 + + 11 + 11 + + + + + org.jsoup + jsoup + 1.17.2 + + + ch.qos.logback + logback-classic + 1.4.14 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + + org.apache.maven.plugins + maven-assembly-plugin + 3.3.0 + + + + com.example.datacollect.Main + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/Main.java b/W11/java-cli/src/main/java/com/example/datacollect/Main.java new file mode 100644 index 0000000..7c1ae59 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/Main.java @@ -0,0 +1,31 @@ +package com.example.datacollect; + +import com.example.datacollect.controller.CrawlerController; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class Main { + private static final Logger logger = LoggerFactory.getLogger(Main.class); + + public static void main(String[] args) { + logger.info("Starting CLI Crawler application"); + + ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + CrawlerController controller = new CrawlerController(view, repository); + + view.printSuccess("Welcome to CLI Crawler (W11)! Type help for commands."); + logger.info("Application started successfully"); + + while (true) { + try { + controller.handle(view.readLine()); + } catch (Exception e) { + logger.error("Error processing command", e); + view.printError("Error: " + e.getMessage()); + } + } + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java b/W11/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java new file mode 100644 index 0000000..b27fb87 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/command/AnalyzeCommand.java @@ -0,0 +1,106 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public class AnalyzeCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class); + private final ConsoleView view; + + public AnalyzeCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "analyze"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + logger.debug("Executing analyze command"); + + if (args.length >= 2) { + String url = args[1]; + analyzeUrl(url); + } else { + analyzeRepository(repository); + } + } + + private void analyzeUrl(String url) { + logger.info("Analyzing URL: {}", url); + CrawlStrategy strategy = StrategyFactory.getStrategy(url); + + if (strategy == null) { + logger.error("No strategy found for URL: {}", url); + view.printError("No strategy found for URL: " + url); + return; + } + + view.printInfo("Analyzing URL: " + url); + view.printInfo("Using strategy: " + strategy.getClass().getSimpleName()); + + List
articles = strategy.crawl(url); + + printAnalysis(articles); + logger.info("Analysis completed for URL: {}", url); + view.printInfo("Note: Analysis results are NOT stored."); + } + + private void analyzeRepository(ArticleRepository repository) { + List
articles = repository.getAll(); + + if (articles.isEmpty()) { + logger.info("No articles to analyze"); + view.printInfo("No articles to analyze. Use 'analyze ' to analyze a URL without storing."); + return; + } + + logger.info("Analyzing {} articles from repository", articles.size()); + view.printInfo("Analyzing " + articles.size() + " articles from repository:"); + printAnalysis(articles); + } + + private void printAnalysis(List
articles) { + if (articles.isEmpty()) { + logger.info("No articles found for analysis"); + view.printInfo("No articles found."); + return; + } + + int totalArticles = articles.size(); + int totalContentLength = 0; + int articlesWithAuthor = 0; + int articlesWithDate = 0; + + for (Article article : articles) { + if (article.getContent() != null) { + totalContentLength += article.getContent().length(); + } + if (article.getAuthor() != null && !article.getAuthor().isEmpty()) { + articlesWithAuthor++; + } + if (article.getPublishDate() != null) { + articlesWithDate++; + } + } + + double avgContentLength = totalArticles > 0 ? (double) totalContentLength / totalArticles : 0; + + logger.info("Analysis results: {} articles, {} avg length", totalArticles, avgContentLength); + view.printInfo("=== Analysis Results ==="); + view.printInfo("Total articles: " + totalArticles); + view.printInfo("Total content length: " + totalContentLength); + view.printInfo("Average content length: " + String.format("%.2f", avgContentLength)); + view.printInfo("Articles with author: " + articlesWithAuthor); + view.printInfo("Articles with publish date: " + articlesWithDate); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/command/Command.java b/W11/java-cli/src/main/java/com/example/datacollect/command/Command.java new file mode 100644 index 0000000..24e59a6 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/command/Command.java @@ -0,0 +1,9 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import java.util.List; + +public interface Command { + String getName(); + void execute(String[] args, List
articles); +} diff --git a/W11/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java b/W11/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java new file mode 100644 index 0000000..76b1e6b --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/command/CrawlCommand.java @@ -0,0 +1,61 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.service.ScraperService; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public class CrawlCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); + private final ConsoleView view; + private final ScraperService scraperService; + + public CrawlCommand(ConsoleView view) { + this.view = view; + this.scraperService = new ScraperService(); + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + if (args.length < 2) { + logger.warn("Missing URL argument"); + view.printError("Usage: crawl "); + return; + } + + String url = args[1]; + logger.info("Crawl started for: {}", url); + + CrawlStrategy strategy = StrategyFactory.getStrategy(url); + + if (strategy == null) { + logger.error("No strategy found for URL: {}", url); + view.printError("No strategy found for URL: " + url); + return; + } + + logger.info("Using strategy: {}", strategy.getClass().getSimpleName()); + view.printInfo("Crawling " + url + " with strategy: " + strategy.getClass().getSimpleName()); + + try { + List
articles = scraperService.scrapeWithRetry(strategy, url); + repository.addAll(articles); + logger.info("Crawled {} articles successfully", articles.size()); + view.printSuccess("Crawled " + articles.size() + " articles"); + } catch (Exception e) { + logger.error("Error crawling URL: {}", url, e); + view.printError("Error: " + e.getMessage()); + } + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java b/W11/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java new file mode 100644 index 0000000..8670081 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/command/ExitCommand.java @@ -0,0 +1,27 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ExitCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class); + private final ConsoleView view; + + public ExitCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "exit"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + logger.info("User requested exit"); + view.printSuccess("Goodbye!"); + System.exit(0); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java b/W11/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java new file mode 100644 index 0000000..4d41c43 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/command/HelpCommand.java @@ -0,0 +1,32 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class HelpCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class); + private final ConsoleView view; + + public HelpCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "help"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + logger.debug("Executing help command"); + view.printInfo("Commands:"); + view.printInfo(" crawl - 爬取指定 URL 的文章"); + view.printInfo(" list - 列出已爬取的文章"); + view.printInfo(" analyze - 分析文章统计信息"); + view.printInfo(" history - 显示命令历史记录"); + view.printInfo(" help - 显示此帮助信息"); + view.printInfo(" exit - 退出程序"); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java b/W11/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java new file mode 100644 index 0000000..ba5a182 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/command/HistoryCommand.java @@ -0,0 +1,55 @@ +package com.example.datacollect.command; + +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.List; + +public class HistoryCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(HistoryCommand.class); + private static final List commandHistory = new ArrayList<>(); + private final ConsoleView view; + + public HistoryCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "history"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + logger.debug("Executing history command"); + + if (commandHistory.isEmpty()) { + logger.info("Command history is empty"); + view.printInfo("No command history."); + return; + } + + logger.info("Showing {} command history items", commandHistory.size()); + view.printInfo("Command History:"); + for (int i = 0; i < commandHistory.size(); i++) { + view.printInfo((i + 1) + ". " + commandHistory.get(i)); + } + } + + public static void addCommand(String command) { + commandHistory.add(command); + logger.debug("Command added to history: {}", command); + } + + public static List getCommandHistory() { + return new ArrayList<>(commandHistory); + } + + public static void clearHistory() { + commandHistory.clear(); + logger.info("Command history cleared"); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java b/W11/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java new file mode 100644 index 0000000..551f634 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/command/ListCommand.java @@ -0,0 +1,41 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public class ListCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); + private final ConsoleView view; + + public ListCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "list"; + } + + @Override + public void execute(String[] args, ArticleRepository repository) { + logger.debug("Executing list command"); + List
articles = repository.getAll(); + + if (articles.isEmpty()) { + logger.info("No articles found"); + view.printInfo("No articles yet. Use 'crawl ' to get started."); + return; + } + + logger.info("Listing {} articles", articles.size()); + view.printInfo("=== Articles (" + articles.size() + ") ==="); + for (Article article : articles) { + view.printInfo(article.toString()); + } + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java b/W11/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java new file mode 100644 index 0000000..17e7cf8 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/controller/CrawlerController.java @@ -0,0 +1,59 @@ +package com.example.datacollect.controller; + +import com.example.datacollect.command.*; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.HashMap; +import java.util.Map; + +public class CrawlerController { + private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); + private final Map commands = new HashMap<>(); + private final ConsoleView view; + private final ArticleRepository repository; + + public CrawlerController(ConsoleView view, ArticleRepository repository) { + this.view = view; + this.repository = repository; + logger.info("Initializing CrawlerController with {} commands", 6); + register(new HelpCommand(view)); + register(new ListCommand(view)); + register(new CrawlCommand(view)); + register(new ExitCommand(view)); + register(new HistoryCommand(view)); + register(new AnalyzeCommand(view)); + logger.info("CrawlerController initialized successfully"); + } + + private void register(Command command) { + commands.put(command.getName(), command); + logger.debug("Registered command: {}", command.getName()); + } + + public void handle(String input) { + String text = input == null ? "" : input.trim(); + if (text.isEmpty()) { + return; + } + + logger.debug("Handling input: {}", text); + + // 记录命令历史 + HistoryCommand.addCommand(text); + + String[] args = text.split("\\s+"); + String cmdName = args[0].toLowerCase(); + Command command = commands.get(cmdName); + if (command == null) { + logger.warn("Unknown command: {}", cmdName); + view.printError("Unknown command: " + cmdName); + return; + } + + logger.info("Executing command: {}", cmdName); + command.execute(args, repository); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java b/W11/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java new file mode 100644 index 0000000..d9c9c2e --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/exception/CrawlerException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class CrawlerException extends RuntimeException { + public CrawlerException(String message) { + super(message); + } + + public CrawlerException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java b/W11/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java new file mode 100644 index 0000000..0fb8e5e --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/exception/NetworkException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class NetworkException extends CrawlerException { + public NetworkException(String message) { + super(message); + } + + public NetworkException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java b/W11/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java new file mode 100644 index 0000000..205665a --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/exception/ParseException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class ParseException extends CrawlerException { + public ParseException(String message) { + super(message); + } + + public ParseException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/model/Article.java b/W11/java-cli/src/main/java/com/example/datacollect/model/Article.java new file mode 100644 index 0000000..746abf6 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/model/Article.java @@ -0,0 +1,75 @@ +package com.example.datacollect.model; + +import java.time.LocalDate; + +public class Article { + private String title; + private String url; + private String content; + private String author; + private LocalDate publishDate; + + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + } + + public Article(String title, String url, String content, String author, LocalDate publishDate) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishDate = publishDate; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public String getAuthor() { + return author; + } + + public void setAuthor(String author) { + this.author = author; + } + + public LocalDate getPublishDate() { + return publishDate; + } + + public void setPublishDate(LocalDate publishDate) { + this.publishDate = publishDate; + } + + @Override + public String toString() { + return "Article{" + + "title='" + title + '\'' + + ", url='" + url + '\'' + + ", author='" + author + '\'' + + ", publishDate=" + publishDate + + '}'; + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/W11/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java new file mode 100644 index 0000000..761ec36 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/repository/ArticleRepository.java @@ -0,0 +1,52 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ArticleRepository { + private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + logger.warn("Attempted to add null article"); + return; + } + if (article.getTitle() == null || article.getTitle().isEmpty()) { + logger.warn("Attempted to add article with empty title"); + return; + } + articles.add(article); + logger.debug("Added article: {}", article.getTitle()); + } + + public void addAll(List
articleList) { + if (articleList == null) { + logger.warn("Attempted to add null article list"); + return; + } + for (Article article : articleList) { + add(article); + } + logger.info("Added {} articles", articleList.size()); + } + + public List
getAll() { + return Collections.unmodifiableList(articles); + } + + public void clear() { + int size = articles.size(); + articles.clear(); + logger.info("Cleared {} articles from repository", size); + } + + public int size() { + return articles.size(); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/service/ScraperService.java b/W11/java-cli/src/main/java/com/example/datacollect/service/ScraperService.java new file mode 100644 index 0000000..c55bd9b --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/service/ScraperService.java @@ -0,0 +1,56 @@ +package com.example.datacollect.service; + +import com.example.datacollect.exception.CrawlerException; +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.model.Article; +import com.example.datacollect.strategy.CrawlStrategy; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; + +public class ScraperService { + private static final Logger logger = LoggerFactory.getLogger(ScraperService.class); + + private static final int MAX_RETRY = 3; + private static final long INITIAL_DELAY_MS = 1000; + private static final double BACKOFF_MULTIPLIER = 2.0; + + public List
scrapeWithRetry(CrawlStrategy strategy, String url) { + int attempt = 0; + long delay = INITIAL_DELAY_MS; + + while (attempt < MAX_RETRY) { + try { + attempt++; + logger.info("Attempt {}/{} to crawl {}", attempt, MAX_RETRY, url); + + List
articles = strategy.crawl(url); + + if (attempt > 1) { + logger.info("Successfully crawled {} on attempt {}", url, attempt); + } + return articles; + + } catch (NetworkException e) { + logger.warn("Network error on attempt {} for {}: {}", attempt, url, e.getMessage()); + + if (attempt < MAX_RETRY) { + try { + logger.info("Retrying after {}ms...", delay); + Thread.sleep(delay); + delay = (long) (delay * BACKOFF_MULTIPLIER); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new CrawlerException("Interrupted during retry wait", ie); + } + } else { + logger.error("Failed to crawl {} after {} attempts due to network errors", url, MAX_RETRY); + throw new CrawlerException("Failed to crawl " + url + " after " + MAX_RETRY + " attempts", e); + } + } + } + + throw new CrawlerException("Unexpected error: max retry attempts exhausted"); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/W11/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java new file mode 100644 index 0000000..ded867d --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/strategy/BlogStrategy.java @@ -0,0 +1,93 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.model.Article; +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class BlogStrategy implements CrawlStrategy { + @Override + public boolean supports(String url) { + return url != null && (url.contains("blog") || url.contains("wordpress") || url.contains("lofter") || url.contains("hexo")); + } + + @Override + public List
crawl(String url) { + List
articles = new ArrayList<>(); + try { + Document doc = Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + .timeout(10000) + .get(); + articles = parse(doc, url); + } catch (IOException e) { + throw new NetworkException("网络请求失败:" + e.getMessage(), e); + } catch (ParseException e) { + throw e; + } catch (Exception e) { + articles.add(new Article("爬取失败", url, "错误:" + e.getMessage(), "系统", null)); + } + return articles; + } + + @Override + public List
parse(Document doc, String url) throws ParseException { + List
articles = new ArrayList<>(); + try { + if (url.contains("lofter")) { + crawlLofter(doc, url, articles); + } else if (url.contains("wordpress")) { + crawlWordpress(doc, url, articles); + } else { + crawlGenericBlog(doc, url, articles); + } + } catch (Exception e) { + throw new ParseException("解析博客网站失败:" + e.getMessage(), e); + } + return articles; + } + + private void crawlLofter(Document doc, String url, List
articles) { + Elements items = doc.select(".m-post"); + for (Element item : items) { + String title = item.select(".m-post-title a").text(); + String link = item.select(".m-post-title a").attr("href"); + String author = item.select(".m-user-name").text(); + String summary = item.select(".m-post-content").text(); + if (!title.isEmpty()) { + articles.add(new Article(title, link, summary.length() > 300 ? summary.substring(0, 300) : summary, author, null)); + } + } + } + + private void crawlWordpress(Document doc, String url, List
articles) { + Elements items = doc.select(".post"); + for (Element item : items) { + String title = item.select(".entry-title a").text(); + String link = item.select(".entry-title a").attr("href"); + String author = item.select(".author").text(); + String summary = item.select(".entry-summary").text(); + if (!title.isEmpty()) { + articles.add(new Article(title, link, summary.length() > 300 ? summary.substring(0, 300) : summary, author, null)); + } + } + } + + private void crawlGenericBlog(Document doc, String url, List
articles) { + Elements items = doc.select(".article, .post, .blog-post"); + for (Element item : items) { + String title = item.select("h1, h2, .title").text(); + String content = item.select(".content, .post-content").text(); + if (!title.isEmpty()) { + articles.add(new Article(title, url, content.length() > 300 ? content.substring(0, 300) : content, "未知作者", null)); + } + } + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/W11/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java new file mode 100644 index 0000000..b3cc570 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java @@ -0,0 +1,12 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.model.Article; +import com.example.datacollect.exception.ParseException; +import org.jsoup.nodes.Document; +import java.util.List; + +public interface CrawlStrategy { + boolean supports(String url); + List
crawl(String url); + List
parse(Document doc, String url) throws ParseException; +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java b/W11/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java new file mode 100644 index 0000000..c111a3e --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/strategy/NewsStrategy.java @@ -0,0 +1,118 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.model.Article; +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class NewsStrategy implements CrawlStrategy { + @Override + public boolean supports(String url) { + return url != null && (url.contains("news") || url.contains("sina") || url.contains("163") || url.contains("sohu") || url.contains("qq.com")); + } + + @Override + public List
crawl(String url) { + List
articles = new ArrayList<>(); + try { + Document doc = Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + .timeout(10000) + .get(); + articles = parse(doc, url); + } catch (IOException e) { + throw new NetworkException("网络请求失败:" + e.getMessage(), e); + } catch (ParseException e) { + throw e; + } catch (Exception e) { + articles.add(new Article("爬取失败", url, "错误:" + e.getMessage(), "系统", null)); + } + return articles; + } + + @Override + public List
parse(Document doc, String url) throws ParseException { + List
articles = new ArrayList<>(); + try { + if (url.contains("sina")) { + crawlSina(doc, url, articles); + } else if (url.contains("163") || url.contains("netease")) { + crawlNetease(doc, url, articles); + } else if (url.contains("sohu")) { + crawlSohu(doc, url, articles); + } else if (url.contains("qq")) { + crawlQQ(doc, url, articles); + } else { + crawlGenericNews(doc, url, articles); + } + } catch (Exception e) { + throw new ParseException("解析新闻网站失败:" + e.getMessage(), e); + } + return articles; + } + + private void crawlSina(Document doc, String url, List
articles) { + Elements items = doc.select(".news-item"); + for (Element item : items) { + String title = item.select("a").text(); + String link = item.select("a").attr("href"); + String summary = item.select(".news-summary").text(); + if (!title.isEmpty()) { + articles.add(new Article(title, link, summary.length() > 300 ? summary.substring(0, 300) : summary, "新浪新闻", null)); + } + } + } + + private void crawlNetease(Document doc, String url, List
articles) { + Elements items = doc.select(".news-list li"); + for (Element item : items) { + String title = item.select("a").text(); + String link = item.select("a").attr("href"); + if (!link.startsWith("http")) link = "https://news.163.com" + link; + if (!title.isEmpty()) { + articles.add(new Article(title, link, "", "网易新闻", null)); + } + } + } + + private void crawlSohu(Document doc, String url, List
articles) { + Elements items = doc.select(".news-item h3 a"); + for (Element item : items) { + String title = item.text(); + String link = item.attr("href"); + if (!title.isEmpty()) { + articles.add(new Article(title, link, "", "搜狐新闻", null)); + } + } + } + + private void crawlQQ(Document doc, String url, List
articles) { + Elements items = doc.select(".list li a"); + for (Element item : items) { + String title = item.text(); + String link = item.attr("href"); + if (!title.isEmpty()) { + articles.add(new Article(title, link, "", "腾讯新闻", null)); + } + } + } + + private void crawlGenericNews(Document doc, String url, List
articles) { + Elements items = doc.select(".news, .article-item"); + for (Element item : items) { + String title = item.select("h2, h3, .title").text(); + String link = item.select("a").attr("href"); + if (!link.startsWith("http")) link = url + link; + if (!title.isEmpty()) { + articles.add(new Article(title, link, "", "新闻网站", null)); + } + } + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/W11/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java new file mode 100644 index 0000000..4bd0ce2 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/strategy/StrategyFactory.java @@ -0,0 +1,27 @@ +package com.example.datacollect.strategy; + +import java.util.ArrayList; +import java.util.List; + +public class StrategyFactory { + private static final List strategies = new ArrayList<>(); + + static { + strategies.add(new BlogStrategy()); + strategies.add(new NewsStrategy()); + strategies.add(new TechStrategy()); + } + + public static CrawlStrategy getStrategy(String url) { + for (CrawlStrategy strategy : strategies) { + if (strategy.supports(url)) { + return strategy; + } + } + return null; + } + + public static List getAllStrategies() { + return new ArrayList<>(strategies); + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/strategy/TechStrategy.java b/W11/java-cli/src/main/java/com/example/datacollect/strategy/TechStrategy.java new file mode 100644 index 0000000..9255405 --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/strategy/TechStrategy.java @@ -0,0 +1,105 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.model.Article; +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class TechStrategy implements CrawlStrategy { + @Override + public boolean supports(String url) { + return url != null && (url.contains("csdn") || url.contains("oschina") || url.contains("iteye") || url.contains("cnblogs")); + } + + @Override + public List
crawl(String url) { + List
articles = new ArrayList<>(); + try { + Document doc = Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + .timeout(10000) + .get(); + articles = parse(doc, url); + } catch (IOException e) { + throw new NetworkException("网络请求失败:" + e.getMessage(), e); + } catch (ParseException e) { + throw e; + } catch (Exception e) { + articles.add(new Article("爬取失败", url, "错误:" + e.getMessage(), "系统", null)); + } + return articles; + } + + @Override + public List
parse(Document doc, String url) throws ParseException { + List
articles = new ArrayList<>(); + try { + if (url.contains("csdn")) { + crawlCsdn(doc, url, articles); + } else if (url.contains("cnblogs")) { + crawlCnblogs(doc, url, articles); + } else if (url.contains("oschina")) { + crawlOschina(doc, url, articles); + } else { + crawlGeneric(doc, url, articles); + } + } catch (Exception e) { + throw new ParseException("解析技术网站失败:" + e.getMessage(), e); + } + return articles; + } + + private void crawlCsdn(Document doc, String url, List
articles) { + Elements items = doc.select(".article-item-box"); + for (Element item : items) { + String title = item.select("h4 a").text(); + String link = item.select("h4 a").attr("href"); + String author = item.select(".name").text(); + String summary = item.select(".content").text(); + if (!title.isEmpty()) { + articles.add(new Article(title, link, summary, author, null)); + } + } + } + + private void crawlCnblogs(Document doc, String url, List
articles) { + Elements items = doc.select(".post-item"); + for (Element item : items) { + String title = item.select(".post-item-title a").text(); + String link = item.select(".post-item-title a").attr("href"); + String author = item.select(".post-item-author a").text(); + String summary = item.select(".post-item-summary").text(); + if (!title.isEmpty()) { + articles.add(new Article(title, link, summary, author, null)); + } + } + } + + private void crawlOschina(Document doc, String url, List
articles) { + Elements items = doc.select(".news-list .news-item"); + for (Element item : items) { + String title = item.select(".title a").text(); + String link = "https://www.oschina.net" + item.select(".title a").attr("href"); + String author = item.select(".author").text(); + String summary = item.select(".description").text(); + if (!title.isEmpty()) { + articles.add(new Article(title, link, summary, author, null)); + } + } + } + + private void crawlGeneric(Document doc, String url, List
articles) { + String title = doc.title(); + String content = doc.select("article, .article-content, .post-content").text(); + if (!title.isEmpty()) { + articles.add(new Article(title, url, content.length() > 500 ? content.substring(0, 500) : content, "未知", null)); + } + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java b/W11/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java new file mode 100644 index 0000000..f501e0d --- /dev/null +++ b/W11/java-cli/src/main/java/com/example/datacollect/view/ConsoleView.java @@ -0,0 +1,53 @@ +package com.example.datacollect.view; + +import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.List; +import java.util.Scanner; + +public class ConsoleView { + private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class); + private static final String ANSI_RESET = "\u001B[0m"; + private static final String ANSI_GREEN = "\u001B[32m"; + private static final String ANSI_RED = "\u001B[31m"; + private static final String ANSI_BLUE = "\u001B[34m"; + + private final Scanner scanner = new Scanner(System.in); + + public String readLine() { + System.out.print("> "); + String line = scanner.nextLine(); + logger.debug("User input: {}", line); + return line; + } + + public void printSuccess(String msg) { + logger.info("Success: {}", msg); + System.out.println(ANSI_GREEN + msg + ANSI_RESET); + } + + public void printError(String msg) { + logger.error("Error: {}", msg); + System.out.println(ANSI_RED + msg + ANSI_RESET); + } + + public void printInfo(String msg) { + logger.debug("Info: {}", msg); + System.out.println(ANSI_BLUE + msg + ANSI_RESET); + } + + public void display(List
articles) { + if (articles.isEmpty()) { + logger.info("No articles to display"); + printInfo("暂无文章,请先执行 crawl。"); + return; + } + logger.info("Displaying {} articles", articles.size()); + for (int i = 0; i < articles.size(); i++) { + Article a = articles.get(i); + System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); + } + } +} \ No newline at end of file diff --git a/W11/java-cli/src/main/resources/logback.xml b/W11/java-cli/src/main/resources/logback.xml new file mode 100644 index 0000000..e374143 --- /dev/null +++ b/W11/java-cli/src/main/resources/logback.xml @@ -0,0 +1,22 @@ + + + + %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + logs/crawler.log + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + + \ No newline at end of file diff --git a/W11/java-cli/target/W9工程架构 - 教案v3.md b/W11/java-cli/target/W9工程架构 - 教案v3.md new file mode 100644 index 0000000..09de868 --- /dev/null +++ b/W11/java-cli/target/W9工程架构 - 教案v3.md @@ -0,0 +1,758 @@ +--- + +# 教案:《高级程序设计》第9周——工程架构:从"写代码"到"造系统" + +| 项目 | 内容 | +|------|------| +| **课程名称** | 高级程序设计 | +| **周次** | 第9周 | +| **主题** | 工程架构——从"写代码"到"造系统" | +| **学时** | 2学时(90分钟) | +| **授课对象** | 具备Python基础、已完成Java面向对象特性学习的学生 | +| **教学环境** | JDK 17+、IntelliJ IDEA、Maven(模板) | +| **前情提要** | 本课程原计划使用JavaFX GUI,后根据教学反馈转向CLI + MVC + 爬虫工程化 | + +--- + +## 教学调整说明:为什么选择CLI而不是GUI? + +> **原计划**:JavaFX桌面应用 → **新计划**:CLI命令行应用 + +| 维度 | GUI (JavaFX) | CLI (命令行) | +|------|--------------|-------------| +| **学习重心** | 布局、控件、事件监听 | 架构、分层、命令路由 | +| **学生痛点** | "窗口点击"与后端能力无关 | 真正锻炼工程思维 | +| **AI辅助** | AI生成FXML,学生看不懂 | AI辅助重构架构 | +| **工程化** | 脱离真实后端开发场景 | 模拟真实服务器/大数据开发 | +| **核心转型** | "视觉装饰"优先 | "逻辑架构"优先 | + +**决策理由**: +1. **985学生需要的是工程思维**,不是拖控件 +2. **接口抽象**是弱项,CLI + MVC更能暴露这个问题 +3. **彩色终端**足够酷炫,且代码量可控 + +**更深层的教育价值**: +> 在GUI框架中,架构已被框架强制划定,学生只是"遵守规矩";而CLI世界里没有任何框架告诉你模型在哪、视图在哪——**当外部约束消失,内部的工程纪律才真正建立**。这正是本节课要传递的核心精神。 + +--- + +## 一、教学目标 + +| 目标维度 | 具体描述 | +|----------|----------| +| **知识掌握** | 理解MVC架构的职责划分及其演化脉络;掌握Maven项目结构与pom.xml基础;理解Command模式的路由原理。 | +| **工程实践** | 能搭建规范的Maven项目包结构;能实现基于Scanner的控制台交互;能用Command接口实现可扩展的命令路由;能识别架构中的"越权行为"。 | +| **思维转型** | 从"一个类写全部"转向"分层解耦";从"修改现有代码"转向"新增类实现功能";从"满足功能"转向"代码的工程洁癖"。 | +| **工具应用** | 利用AI辅助审查MVC职责越权;让AI扮演"架构审计师"检查分层是否清晰;理解AI生成代码中的架构缺陷。 | + +--- + +## 二、教学重点与难点 + +| 项目 | 内容 | 突破方法 | +|------|------|----------| +| **重点** | MVC三层职责划分、CLI交互实现、Command接口解耦、代码中的工程细节(常量、输出归属) | 以"新增命令需要改什么"为切入点,展示Command模式的优势;通过现场"代码找茬"强化细节意识 | +| **难点** | Controller不写业务逻辑、Command接口的多态实现、共享数据模型的设计缺陷识别 | 现场演示:增加一个命令只需新建类,无需修改Controller;暴露`List
`共享引用的问题并预告解决方案 | + +--- + +## 三、教学过程设计(90分钟) + +| 环节 | 时间 | 教学内容 | 师生活动 | AI协同点 | +|------|------|----------|----------|----------| +| **1. 痛点引入:从脚本到工程的鸿沟** | 10' | 展示"意大利面"式爬虫代码,演示改一处需要动全身 | **教师演示**:现场展示一段混乱代码,让学生找问题 | 用AI分析代码耦合度 | +| **2. CLI vs GUI:架构选择的思考** | 10' | 对比两种方案的优缺点,解释为什么CLI更适合培养工程思维 | **教师讲解**:用对比表格说明选择CLI的理由 | — | +| **3. MVC分层设计** | 20' | 讲解Model/View/Controller三层职责,用"餐厅类比"强化理解,随后批判类比局限性 | **教师讲解**:配合架构图讲解三层交互,引导学生寻找类比破绽 | 用AI生成MVC职责对照表 | +| **4. Command模式:可扩展的命令路由** | 15' | 引入Command接口,解释"一个命令就是一个类" | **类比**:Command像酒店的服务部门,Controller是前台 | 让AI解释Command模式的多态原理 | +| **5. Maven模板与环境** | 5' | 直接使用提供的Maven模板,讲解目录结构 | **教师演示**:解压模板 → IDEA打开 → 运行 | — | +| **6. 三层代码落地** | 20' | **Model**:Article实体
**View**:ConsoleView(ANSI常量)
**Command接口**+实现
**Controller**:Map路由 | **教师演示**:分步写出代码,刻意埋入1~2个"越权细节"让学生找茬 | 学生用AI做"架构审计" | +| **7. 架构反思与展望** | 5' | 指出当前`List
`共享引用的问题,预告W10策略模式与仓库层 | **师生互动**:你发现这个设计有什么风险? | 让AI分析共享可变状态的危害 | +| **8. 实践任务:空壳程序** | 5' | 搭建完整包结构,实现CLI循环 | 学生现场编码,教师巡视 | 完成后用AI检查包结构 | +| **9. 总结与过渡** | 5' | 本周实现了"骨架+命令可扩展",下周填入"灵魂"——解析器,并解决数据安全问题 | 总结Command模式优势,预告策略模式 | — | + +--- + +## 四、核心教学内容脚本 + +### 4.1 痛点引入:从脚本到工程的鸿沟(10分钟) + +**教师口播**: +> "同学们,前8周我们学的是Java语法,从变量到类,从继承到接口。但有一个问题:代码写完之后,怎么组织?" +> +> "来看这段代码——这是某个同学写的'爬虫',他一个人完成了一个'完整'的项目。" + +**展示"脚本式"代码**: +```java +public class Crawler { + public static void main(String[] args) { + System.out.print("请输入URL: "); + Scanner scanner = new Scanner(System.in); + String url = scanner.nextLine(); + + List titles = new ArrayList(); + try { + Document doc = Jsoup.connect(url).get(); + Elements elements = doc.select(".post-title"); + for (Element e : elements) { + String title = e.text(); + System.out.println("标题: " + title); + titles.add(title); + } + } catch (Exception ex) { + System.out.println("出错啦: " + ex.getMessage()); + } + } +} +``` + +**提问引导**: +1. "如果我想把标题保存到文件,要改哪里?" +2. "如果我想支持另一个网站,它的HTML结构不一样,要怎么办?" +3. "如果我想让输出变成彩色,要改哪里?" + +**痛点提炼**: +> "看到了吗?才60行代码,已经'牵一发而动全身'了。这就是一个'脚本'的宿命——功能全混在一起,改一个小需求,整个文件都要翻。" +> +> "这周我们要解决:**怎么让代码'改起来不疼'?**" + +--- + +### 4.2 CLI vs GUI:架构选择的思考(10分钟) + +**教师口播**: +> "既然要写一个'完整'的爬虫应用,我们有两个选择:图形界面(GUI)或命令行界面(CLI)。为什么我推荐CLI而不是GUI?" + +**对比表格** + +| 维度 | GUI (JavaFX) | CLI (命令行) | +|------|--------------|-------------| +| **代码量** | FXML + Controller + CSS,大量模板代码 | 纯Java,代码量可控 | +| **学习重心** | 布局、控件、事件监听 | 架构、分层、命令路由 | +| **后端能力** | 几乎无关 | 模拟真实服务器开发 | +| **可测试性** | 难(需要UI测试框架) | 易(直接测试Command类) | +| **工程思维** | 弱(关注视觉) | 强(关注逻辑) | + +**核心观点**: +> **CLI更需要MVC!** GUI有现成的事件系统(点击按钮→触发事件),而CLI只有字符流。**没有架构,分分钟写成脚本**。MVC在CLI里是"刚需",不是"装饰"。 +> +> **更深一层**:在GUI里,框架已经硬塞给你一套架构,你只是在填空;但在CLI里,所有结构都必须由你亲手搭建。**当外部约束消失,内部的工程纪律才真正开始建立**——这才是本节课的真正目的。 + +**CLI也能很酷**: +- ANSI彩色输出(红/绿/黄/蓝) +- 表格展示数据 +- 进度条动画 +- 模拟真实大数据开发场景 + +--- + +### 4.3 MVC分层设计(20分钟) + +#### 4.3.1 MVC的起源与演进 + +**教师口播**: +> "MVC不是新东西,它是1970年代为桌面应用设计的架构思想。但它的核心——'职责分离'——在任何软件里都适用。" + +| 年代 | 场景 | MVC的角色 | +|------|------|----------| +| 1970s | Smalltalk-72 GUI | 最早的用户界面架构 | +| 1990s | Web开发 (Struts) | 后端模板引擎 | +| 2000s | ASP.NET MVC | 现代Web框架 | +| 2020s | CLI + API | 解耦业务逻辑与表现层 | + +#### 4.3.2 从GUI到CLI的映射 + +| GUI组件 | CLI对应 | 说明 | +|--------|--------|------| +| 窗口/按钮 | 命令行输入 | **View = 用户交互** | +| 数据模型 | Article实体类 | **Model = 数据结构** | +| 事件监听 | Command路由 | **Controller = 调度** | + +#### 4.3.3 MVC三层职责 + +**架构图示**: + +``` +┌─────────────────────────────────────────┐ +│ 入口 │ +│ (main方法) │ +└─────────────────┬───────────────────────┘ + │ + ▼ +┌─────────────────────────────────────────┐ +│ Controller │ +│ - 接收命令(crawl, help, exit) │ +│ - 分发给对应的Command │ +│ 【口诀】:Controller不管"怎么做", │ +│ 只管"派给谁" │ +└─────────┬───────────────┬───────────────┘ + │ │ + ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ +│ Model │ │ View │ +│ - 数据实体 │ │ - 输入解析 │ +│ - 业务逻辑 │ │ - 输出格式化 │ +│ 【口诀】: │ │ 【口诀】: │ +│ Model管"数据" │ │ View管"呈现" │ +└─────────────────┘ └─────────────────┘ +``` + +**三层职责详解** + +| 层级 | 职责 | 典型代码 | 禁止做什么 | +|------|------|----------|------------| +| **Model** | 数据结构 + 业务逻辑 | `class Article { String title; String content; }` | 不能有`System.out.println`,不能有`Scanner` | +| **View** | 接收用户输入 + 格式化输出 | `class ConsoleView { String readInput(); void print(String); }` | 不能写爬虫逻辑,只做"传声筒" | +| **Controller** | 协调调度 | `class CrawlerController { void handle(String cmd) { ... } }` | 不能直接写业务细节,委托给Command | + +#### 4.3.4 类比强化:"餐厅类比" + +> "把MVC想象成一家餐厅: +> - **Model是后厨**:只管做菜(数据加工),不管谁来吃、怎么端 +> - **View是服务员**:只管端菜和收钱(输入输出),不管菜怎么做 +> - **Controller是前台**:只管把顾客的点单传给后厨,把做好的菜端给顾客 +> +> 如果后厨开始管'谁来吃饭',这餐厅就乱了。" + +#### 4.3.5 对"餐厅类比"的批判性思考(关键!) + +**教师导引**: +> "刚才的类比好理解吗?很好。但任何一个类比都有它的边界,如果把它当成真理,就会出问题。现在我们来给这个类比'找茬'。" + +**提问学生**: +1. "后厨真的完全不知道客人是谁吗?如果客人有忌口(比如不吃香菜),这个信息需不需要传到后厨?" +2. "服务员只是端菜吗?在真实餐厅里,服务员经常向后厨反馈'客人觉得今天的菜咸了',这属于View→Model的反向影响吗?" +3. "在这个类比里,我们把前台(Controller)和后厨(Model)的关系说成单向的。但实际上,后厨做完了菜,需要通知前台'菜好了',这不就是**观察者模式**吗?" + +**点明本质**: +> "实际MVC的数据流向常常是**双向**的:Controller调用Model的方法改变数据,Model变化后又通知View更新显示。只不过在本次CLI项目中,我们暂时使用'请求-响应'的单向简化模型——用户输入命令,系统处理,然后立即输出结果。这个简化版够用,但你要知道完整的MVC是更动态的。随着系统复杂,Model层需要一个专门的'仓库类'来管理数据,并通知视图刷新——这正是W10我们将要深入的内容。" + +#### 4.3.6 MVC的数据流向(本课程简化版) + +``` +CLI用户输入 + ↓ +View(解析命令字符串) + ↓ +Controller(找到对应Command) + ↓ +Command.execute()(执行业务逻辑) + ↓ +Model(Article数据,目前暂存于List) + ↓ +View(display()展示数据) + ↓ +CLI终端显示 +``` + +--- + +### 4.4 Command模式:可扩展的命令路由(15分钟) + +**教师口播**: +> "现在引入一个设计模式——Command(命令)模式。它的核心思想是:**一个命令就是一个类**。" + +#### 4.4.1 为什么需要Command模式? + +**演示:增加一个命令的代价(switch-case版)** +```java +// 现状代码 +switch (cmd) { + case "crawl": handleCrawl(); break; + case "help": showHelp(); break; + // 如果要增加 list 命令? + // 1. 加 case "list" + // 2. 加 handleList() 方法 + // 3. 可能还要改其他地方... +} +``` + +**提问**: +- "如果我想增加10个命令,这个类要改多少次?" +- "如果我不小心删了一个case,整个程序还能跑吗?" + +**痛点提炼**: +> "每加一个功能,就要在这个类里戳一个洞。**这就是'肥控制器'陷阱**——所有的逻辑都堆在Controller里,它变成了新的'意大利面'。" + +#### 4.4.2 Command模式的四个要素 + +| 要素 | 角色 | 示例 | +|------|------|------| +| **Command接口** | 抽象的"订单" | `Command` 接口 | +| **ConcreteCommand** | 具体的订单 | `HelpCommand`、`CrawlCommand` | +| **Invoker** | 接单的前台 | `CrawlerController` | +| **Receiver** | 执行者 | `ConsoleView`、`ArticleRepository` | + +#### 4.4.3 Command接口定义 + +```java +// src/main/java/com/crawler/command/Command.java +package com.crawler.command; + +import com.crawler.model.Article; +import java.util.List; + +public interface Command { + String getName(); // 命令名,如 "crawl" + void execute(String[] args, List
articles); // 执行逻辑 +} +``` + +#### 4.4.4 Controller的变革(从switch到Map) + +```java +// 修改后的Controller +public class CrawlerController { + private Map commands; // 用Map存命令 + private ConsoleView view; // 持有View以输出错误 + + public CrawlerController(ConsoleView view, List
articles) { + this.view = view; + this.commands = new HashMap<>(); + // 增加命令无需改Controller代码,只需在这里注册 + commands.put("crawl", new CrawlCommand(view)); + commands.put("help", new HelpCommand(view)); + commands.put("list", new ListCommand(view)); + commands.put("exit", new ExitCommand(view)); + } + + public void handle(String input) { + if (input.isEmpty()) return; + String[] parts = input.split("\\s+"); + String cmd = parts[0].toLowerCase(); + + Command command = commands.get(cmd); + if (command == null) { + view.printError("Unknown command: " + cmd); // 通过View输出,而非直接System.out + return; + } + + // 执行命令,传入参数和文章列表 + command.execute(parts, articles); + } +} +``` + +**对比表格** + +| 维度 | switch-case | Command模式 | +|------|-------------|-------------| +| 增加命令 | 要改Controller | 新建一个类 | +| 多态体验 | 无 | execute()的多态调用 | +| 可测试性 | 难 | 每个Command可单独测试 | +| 代码量 | 少 | 多,但更清晰 | + +**类比强化**: +> "Command模式就像**酒店的客房服务**:每个服务(清理、送餐、按摩)都是一个独立的部门。前台(Controller)只负责接电话,然后把请求'派发'给对应的部门。部门自己知道怎么干活,不需要前台教。" +> +> "如果想新增一个服务,前台只需要'登记'一下,不需要把现有部门重新装修。" + +--- + +### 4.5 Maven模板与环境(5分钟) + +**教师口播**: +> "这周我们不发愁pom.xml配置。我已经把 Maven 模板准备好了,你们只需要解压、打开、运行。" + +**模板使用流程**: +``` +1. 解压 [my-crawler-template.zip] +2. 用 IDEA 打开文件夹 +3. 右键 pom.xml → Maven → Reload Project +4. 运行 App.java +``` + +**标准目录结构**: +``` +src/main/java/com/crawler/ +├── model/ +│ └── Article.java +├── view/ +│ └── ConsoleView.java +├── command/ +│ ├── Command.java (接口) +│ ├── CrawlCommand.java +│ ├── HelpCommand.java +│ ├── ListCommand.java +│ └── ExitCommand.java +└── controller/ + └── CrawlerController.java +``` + +--- + +### 4.6 代码落地(20分钟) + +#### 4.6.1 Model层:Article实体 + +```java +// src/main/java/com/crawler/model/Article.java +package com.crawler.model; + +public class Article { + private String title; + private String url; + private String content; + + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + } + + public String getTitle() { return title; } + public void setTitle(String title) { this.title = title; } + public String getUrl() { return url; } + public void setUrl(String url) { this.url = url; } + public String getContent() { return content; } + public void setContent(String content) { this.content = content; } + + @Override + public String toString() { + return "Article{title='" + title + "', url='" + url + "'}"; + } +} +``` + +#### 4.6.2 View层:ANSI常量集中管理(工程细节!) + +```java +// src/main/java/com/crawler/view/ConsoleView.java +package com.crawler.view; + +import com.crawler.model.Article; +import java.util.List; +import java.util.Scanner; + +public class ConsoleView { + // ANSI颜色常量——集中管理,避免散落各处 + private static final String ANSI_GREEN = "\033[32m"; + private static final String ANSI_RED = "\033[31m"; + private static final String ANSI_CYAN = "\033[36m"; + private static final String ANSI_RESET = "\033[0m"; + + private Scanner scanner = new Scanner(System.in); + + public String readLine() { + System.out.print("crawler> "); + return scanner.nextLine().trim(); + } + + public void print(String msg) { + System.out.println(msg); + } + + public void printSuccess(String msg) { + print(ANSI_GREEN + msg + ANSI_RESET); + } + + public void printError(String msg) { + print(ANSI_RED + msg + ANSI_RESET); + } + + public void printInfo(String msg) { + print(ANSI_CYAN + msg + ANSI_RESET); + } + + // 展示文章列表 + public void display(List
articles) { + if (articles.isEmpty()) { + printInfo("No articles yet. Use 'crawl ' first."); + return; + } + print("+----------+--------------------------------+"); + print("| Title | URL |"); + print("+----------+--------------------------------+"); + for (Article a : articles) { + String title = a.getTitle(); + if (title.length() > 10) title = title.substring(0, 10) + ".."; + String url = a.getUrl(); + if (url.length() > 30) url = url.substring(0, 27) + "..."; + print("| " + String.format("%-10s", title) + " | " + url + " |"); + } + print("+----------+--------------------------------+"); + printInfo("Total: " + articles.size() + " articles"); + } +} +``` + +**教师提示**: +> "注意:所有ANSI转义码都被定义为`private static final`常量。如果把`\033[32m`散落在项目各处,一旦想调整颜色,就得满世界去改——这正是我们之前痛批的'意大利面'。**这就是工程细节**。" + +#### 4.6.3 Command接口与四个实现(全部通过View输出) + +```java +// Command.java +public interface Command { + String getName(); + void execute(String[] args, List
articles); +} + +// HelpCommand.java +public class HelpCommand implements Command { + private ConsoleView view; + public HelpCommand(ConsoleView v) { this.view = v; } + public String getName() { return "help"; } + public void execute(String[] args, List
articles) { + view.printInfo("Commands: crawl , list, help, exit"); + } +} + +// ListCommand.java +public class ListCommand implements Command { + private ConsoleView view; + public ListCommand(ConsoleView v) { this.view = v; } + public String getName() { return "list"; } + public void execute(String[] args, List
articles) { + view.display(articles); + } +} + +// CrawlCommand.java (存根) +public class CrawlCommand implements Command { + private ConsoleView view; + public CrawlCommand(ConsoleView v) { this.view = v; } + public String getName() { return "crawl"; } + public void execute(String[] args, List
articles) { + if (args.length < 2) { + view.printError("Usage: crawl "); + return; + } + view.printInfo("Stub: Would crawl " + args[1]); + } +} + +// ExitCommand.java +public class ExitCommand implements Command { + private ConsoleView view; + public ExitCommand(ConsoleView v) { this.view = v; } + public String getName() { return "exit"; } + public void execute(String[] args, List
articles) { + view.printSuccess("Bye!"); // 全部输出都通过View,绝不让System.out直接出现在这里 + System.exit(0); + } +} +``` + +**故意埋设的"找茬点"**: +> "我在刚才的代码里有没有隐藏违反MVC原则的地方?`CrawlCommand`的存根里,`view.printInfo("Stub: Would crawl " + args[1]);` —— 这个字符串拼接算是"业务逻辑"吗?留给大家用AI架构审计时讨论。 + +#### 4.6.4 Controller:Map路由(全部通过View输出) + +```java +// src/main/java/com/crawler/controller/CrawlerController.java +package com.crawler.controller; + +import com.crawler.command.*; +import com.crawler.model.Article; +import com.crawler.view.ConsoleView; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class CrawlerController { + private Map commands = new HashMap<>(); + private ConsoleView view; // 持有View + private List
articles; + + public CrawlerController(ConsoleView view, List
articles) { + this.view = view; + this.articles = articles; + commands.put("help", new HelpCommand(view)); + commands.put("list", new ListCommand(view)); + commands.put("crawl", new CrawlCommand(view)); + commands.put("exit", new ExitCommand(view)); + } + + public void handle(String input) { + if (input.isEmpty()) return; + String[] parts = input.split("\\s+"); + String cmdName = parts[0].toLowerCase(); + + Command cmd = commands.get(cmdName); + if (cmd == null) { + view.printError("Unknown command: " + cmdName); // 错误信息也走View! + return; + } + cmd.execute(parts, articles); + } +} +``` + +#### 4.6.5 main方法:组装 + +```java +// src/main/java/com/crawler/App.java +package com.crawler; + +import com.crawler.controller.CrawlerController; +import com.crawler.model.Article; +import com.crawler.view.ConsoleView; +import java.util.ArrayList; +import java.util.List; + +public class App { + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + List
articles = new ArrayList<>(); + CrawlerController controller = new CrawlerController(view, articles); + + view.printSuccess("Welcome to CLI Crawler!"); + view.printInfo("Type 'help' for commands."); + + while (true) { + controller.handle(view.readLine()); + } + } +} +``` + +#### 4.6.6 架构反思与展望:共享List
的隐患(关键!) + +**教师口播**: +> "现在这个架构已经可用了。但请大家审视一下:我们所有的Command都直接拿到了`List
`的引用。换句话说,任何一个命令都可以随意增、删、改这个列表。" +> +> "这就好像一家酒店,所有服务员、厨师、清洁工都能随意进出保险箱——**数据结构完全裸奔了**。" + +**提问**: +- "如果CrawlCommand不小心写错了代码,把一个null塞进articles,HelpCommand会不会受影响?" +- "如果未来我们要在添加文章时也写入日志文件,现在的设计能优雅实现吗?还是得在所有Command里分别加日志代码?" + +**预告解决方案**: +> "下周,我们将引入**策略模式**和一个真正的**Model仓库层(ArticleRepository)**。这个仓库会把`List`封装起来,对外只提供`add()`、`getAll()`等安全接口。任何命令想修改数据,都必须通过仓库。这就是从'数据结构'到'模型层'的进化——我们W9先搭骨架,W10给它装上盔甲。" + +--- + +### 4.7 实践任务(5分钟) + +**任务要求**: +1. 使用Maven模板创建项目 +2. 实现完整包结构(model/view/command/controller) +3. 实现4个Command:help/list/crawl/exit +4. `list`命令能展示已抓取的文章 +5. 运行并测试循环 +6. **代码找茬(额外加分)**:找出你自己代码中是否存在`System.out`直接调用、硬编码ANSI字符串等"越权行为" + +**验收标准**: +- [x] Maven编译通过 +- [x] Command接口和4个实现分离在不同文件 +- [x] Controller里没有switch-case +- [x] 新增命令只需新建类,不改Controller +- [x] list命令能正确显示空列表 +- [x] 所有输出均通过ConsoleView完成,无直接System.out.println(main除外) +- [x] ANSI颜色码集中定义为View常量 + +--- + +## 五、课后作业 + +### 5.1 必做任务 + +1. **完善Article**:增加`author`、`publishDate`字段 +2. **★ HistoryCommand(强制作业)**: + - 实现`history`命令,记录用户输入过的所有命令 + - 使用`List`存储历史(复习W8集合) + - 示例输出: + ``` + crawler> history + 1. help + 2. list + 3. crawl https://example.com + ``` +3. **AI架构审计**:将类名和方法名发给AI,指令: + > "作为Java架构审计师,请检查我的MVC三层划分是否存在越权行为?Model层是否包含输入输出代码?View层是否越权写了业务逻辑?有没有地方直接使用了System.out或硬编码ANSI码?" + +### 5.2 选做任务 + +1. **命令别名**:给`crawl`增加别名`c`,`help`增加别名`h` +2. **URL验证**:检查URL格式是否以http://或https://开头 +3. **暗色主题**:实现不同的配色方案(利用View中的ANSI常量,只需修改一处即可) +4. **思考并回答**:分析`List
`共享引用的潜在风险,写一段200字的小结 + +### 5.3 思考题 + +1. **Command vs switch-case**:增加10个命令,哪种方式代码改动量更小? +2. **如果不用Command接口,直接用Map存命令类行不行?** 接口的意义是什么? +3. **Controller里的`commands.put()`能否减少?** 提示:思考"注册机制" +4. **为什么ExitCommand里的`view.printSuccess("Bye!")`比直接`System.out.println`更"MVC"?** 提示:回忆View的职责 + +--- + +## 六、AI协同升级 + +### 架构审计师任务(必做) + +**学生执行步骤**: +1. 列出项目中所有类名(不含方法实现) +2. 将类名列表发给AI +3. 输入指令: + > "作为Java架构审计师,请检查我的MVC三层划分是否清晰。Model层是否包含了不应该有的代码(Scanner/System.out)?View层是否越权写了业务逻辑?请指出任何一处直接使用System.out.println的地方,并建议如何改正。" + +**预期AI输出**: +- 指出哪一层有越权行为 +- 建议如何整改 +- 评价整体架构健康度 + +### 进阶AI探究(选做) + +> "假设我的Command接口中execute方法接收了一个`List
`参数,请分析这种设计在工程上有什么隐患,并给出重构建议。" + +--- + +## 七、教学反思与调整记录 + +| 日期 | 事项 | 调整内容 | +|------|------|----------| +| 2026-04-28 | 首次编写 | 基于CLI+MVC重构 | +| 2026-04-30 | 教授反馈 | 引入Command模式、提供Maven模板、升级AI协同比 | +| 2026-04-30 | 逻辑重排 | 按"问题→选择→架构→模式"顺序重写 | +| 2026-05-01 | v2 vs V3合并 | 融合深度改进:增加教育哲学、批判性思考、ANSI常量、共享List隐患、故意埋坑 | + +--- + +## 附录1:Maven模板说明 + +> 老师提供`my-crawler-template.zip`压缩包,包含: +> - pom.xml(含Jsoup依赖) +> - 空的src/main/java结构 +> - .gitignore + +## 附录2:常见问题速查 + +| 问题 | 解答 | +|------|------| +| IDEA不识别pom.xml | 右键 pom.xml → Maven → Reload Project | +| 中文乱码 | Settings → Editor → File Encodings → UTF-8 | +| 包名大小写 | 包名全小写,类名首字母大写 | +| Command找不到 | 检查是否 implements Command,是否 @Override getName() | +| 命令不生效 | 检查 commands.put() 是否注册了该命令 | +| 输出颜色乱码 | IDEA控制台需支持ANSI,Windows下建议使用Windows Terminal或调整设置 | +| 我的System.out为什么被老师说越权 | View层才是与用户交互的唯一出口,所有输出都应通过View,这样将来改成GUI或日志时只需改View | + +## 附录3:教学逻辑说明 + +| 顺序 | 内容 | 设计理由 | +|------|------|----------| +| 1 | 痛点引入 | 从问题出发,让学生感受"为什么需要架构" | +| 2 | CLI vs GUI | 解释技术选型,建立"工程思维 > 视觉装饰"的认知 | +| 3 | MVC分层 | 核心架构概念,理解职责分离,通过类比及批判加深理解 | +| 4 | Command模式 | 具体实现方式,解决"肥控制器"问题 | +| 5 | Maven | 工具链支持 | +| 6 | 代码落地 | 实践验证,刻意植入细节规范,训练工程洁癖 | +| 7 | 架构反思 | 暴露共享可变状态隐患,为W10策略模式+仓库层做铺垫 | +| 8 | 实践任务 | 现场编码验证 | +| 9 | 总结 | 强化认知,预告下周 | + +--- + +## 版本说明 + +- **v1**:首次编写,CLI+MVC基础框架 +- **v2**:按"问题→选择→架构→模式"逻辑重排 +- **v3 (本版)**:融合v2结构 + V3深度改进,包含: + - 更深的CLI教育哲学 + - 餐厅类比批判性思考 + - ANSI常量集中管理工程细节 + - 全部输出走View + - 共享List架构隐患反思 + - 故意埋坑让学生找茬 + - W10铺垫(策略模式+仓库层) \ No newline at end of file diff --git a/W11/java-cli/target/maven-archiver/pom.properties b/W11/java-cli/target/maven-archiver/pom.properties new file mode 100644 index 0000000..08a8f9f --- /dev/null +++ b/W11/java-cli/target/maven-archiver/pom.properties @@ -0,0 +1,5 @@ +#Generated by Maven +#Thu Apr 30 11:50:54 CST 2026 +artifactId=datacollect-cli +groupId=com.example +version=0.1.0 diff --git a/W11/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/W11/java-cli/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..e69de29 diff --git a/W11/java-cli/target/w9-ppt.md b/W11/java-cli/target/w9-ppt.md new file mode 100644 index 0000000..5ddd5ad --- /dev/null +++ b/W11/java-cli/target/w9-ppt.md @@ -0,0 +1,530 @@ +## 高级程序设计 · 第9周 + +#### 工程架构:从"写代码"到"造系统" + +##### CLI + MVC + Command模式实战 + +--- + +### 📌 本周导航 + +- 痛点引入:脚本的宿命 +- CLI vs GUI:为什么选命令行? +- MVC分层:职责分离的艺术 +- Command模式:可扩展的路由 +- Maven模板:工程化第一步 +- 代码落地:从接口到实现 +- 架构反思:共享数据的隐患 +- 实践任务 + 课后作业 + +--- + +### 1️⃣ 痛点引入:从脚本到工程的鸿沟 + +#### 这是一段“意大利面”爬虫 + +```java +public class Crawler { + public static void main(String[] args) { + System.out.print("请输入URL: "); + Scanner scanner = new Scanner(System.in); + String url = scanner.nextLine(); + List titles = new ArrayList(); + try { + Document doc = Jsoup.connect(url).get(); + Elements elements = doc.select(".post-title"); + for (Element e : elements) { + String title = e.text(); + System.out.println("标题: " + title); + titles.add(title); + } + } catch (Exception ex) { + System.out.println("出错啦: " + ex.getMessage()); + } + } +} +``` + +--- + +### 脚本的三大痛点 + +| 需求 | 需要改哪里? | +|------|--------------| +| 保存标题到文件 | 改 main 内部逻辑 | +| 支持不同网站结构 | 全部重写解析代码 | +| 彩色输出 | 一个一个改 print | + +> 😫 **牵一发而动全身 → 改起来疼** + +### 本周目标:**让代码“改起来不疼”** + +--- + +## 2️⃣ CLI vs GUI:架构选择的思考 + +### 图形界面 vs 命令行 + +| 维度 | GUI (JavaFX) | CLI (命令行) | +|------|--------------|-------------| +| 学习重心 | 布局、控件、事件 | **架构、分层、路由** | +| 后端能力 | 弱 | 模拟真实服务器 | +| 工程思维 | 弱(关注视觉) | **强(关注逻辑)** | +| 可测试性 | 难 | 易 | + +--- + +## 核心观点 + +> **CLI 更需要 MVC!** + +- GUI 有现成事件系统,框架强塞给你一套架构 +- CLI 只有字符流 → **没有架构,分分钟写成脚本** + +> 🎯 **当外部约束消失,内部的工程纪律才真正开始建立** + +### CLI 也能很酷 + +- ANSI 彩色输出 +- 表格展示数据 +- 模拟大数据/后端开发 + +--- + +## 3️⃣ MVC 分层设计 + +### MVC 的起源与演进 + +| 年代 | 场景 | MVC的角色 | +|------|------|----------| +| 1970s | Smalltalk-72 GUI | 最早的用户界面架构 | +| 1990s | Web开发 (Struts) | 后端模板引擎 | +| 2000s | ASP.NET MVC | 现代Web框架 | +| 2020s | CLI + API | 解耦业务逻辑与表现层 | + +**核心不变:职责分离** + +--- + +## MVC 三层职责 + +![[mvc.png]] +``` +┌─────────────────────────────────────────┐ +│ 入口 │ +│ (main方法) │ +└─────────────────┬───────────────────────┘ + ▼ +┌─────────────────────────────────────────┐ +│ Controller │ +│ 只管"派给谁",不管"怎么做" │ +└─────────┬───────────────┬───────────────┘ + ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ +│ Model │ │ View │ +│ 管"数据" │ │ 管"呈现" │ +│ + 业务逻辑 │ │ + 输入输出 │ +└─────────────────┘ └─────────────────┘ +``` + +--- + +## 三层“禁止做什么” + +| 层级 | 禁止行为 | +| -------------- | -------------------------------------- | +| **Model** | 不能有 `System.out.println`,不能有 `Scanner` | +| **View** | 不能写爬虫逻辑,只做“传声筒” | +| **Controller** | 不能直接写业务细节,委托给 Command | + +> 🔴 **越权就是架构腐败的开始** + +--- + +## 🍽️ 餐厅类比(帮助理解) + +- **Model = 后厨**:只管做菜,不管谁来吃、怎么端 +- **View = 服务员**:只管端菜和收钱,不管菜怎么做 +- **Controller = 前台**:接单 → 派给后厨 → 叫服务员上菜 + +--- + +## 🤔 对类比的批判性思考(关键!) + +> 任何类比都有边界,不要当成真理 + +| 场景 | 暴露的问题 | +|------|------------| +| 客人有忌口(不吃香菜) | 信息需要传到后厨 → Model 可能需要知道 meta 信息 | +| 服务员反馈“今天的菜咸了” | View → Model 反向影响 | +| 后厨做完菜通知前台 | **观察者模式**,数据流可能是双向的 | + +**本课程简化模型**:请求-响应,单向流 + +--- + +## MVC 数据流向(本课程简化版) + +``` +CLI用户输入 + ↓ +View(解析命令字符串) + ↓ +Controller(找到对应Command) + ↓ +Command.execute()(执行业务逻辑) + ↓ +Model(Article数据,暂存于List) + ↓ +View(display()展示数据) + ↓ +CLI终端显示 +``` + +--- + +## 4️⃣ Command 模式:可扩展的命令路由 + +### 为什么需要 Command 模式? + +```java +switch (cmd) { + case "crawl": handleCrawl(); break; + case "help": showHelp(); break; + // 如果要增加 list 命令? + // 1. 加 case "list" + // 2. 加 handleList() 方法 + // 3. 可能还要改其他地方... +} +``` + +> 每加一个功能,就要在这个类里戳一个洞 → **肥控制器陷阱** + +--- + +## Command 模式的四个要素 + +| 要素 | 角色 | 示例 | +|------|------|------| +| Command接口 | 抽象的“订单” | `Command` | +| ConcreteCommand | 具体的订单 | `HelpCommand` | +| Invoker | 接单的前台 | `CrawlerController` | +| Receiver | 执行者 | `ConsoleView`、`ArticleRepository` | + +--- + +## Command 接口定义 + +```java +package com.crawler.command; + +import com.crawler.model.Article; +import java.util.List; + +public interface Command { + String getName(); + void execute(String[] args, List
articles); +} +``` + +--- + +## Controller 的变革:从 switch 到 Map + +```java +public class CrawlerController { + private Map commands = new HashMap<>(); + + public CrawlerController(ConsoleView view, List
articles) { + commands.put("help", new HelpCommand(view)); + commands.put("list", new ListCommand(view)); + commands.put("crawl", new CrawlCommand(view)); + commands.put("exit", new ExitCommand(view)); + } + + public void handle(String input) { + // 解析命令 → 从 Map 取 Command → 调用 execute + } +} +``` + +> **增加新命令:只需新建类,Controller 零改动!** + +--- + +## 对比:switch-case vs Command + +| 维度 | switch-case | Command模式 | +|------|-------------|-------------| +| 增加命令 | 要改 Controller | 新建一个类 | +| 多态体验 | 无 | `execute()` 多态 | +| 可测试性 | 难 | 每个 Command 单独测试 | +| 代码量 | 少 | 多,但更清晰 | + +> 🏨 **类比:酒店客房服务,前台只负责派单** + +--- + +## 5️⃣ Maven 模板与环境(5分钟) + +### 直接使用模板,不折腾配置 + +``` +my-crawler-template.zip + ↓ 解压 + IDEA打开 + ↓ 右键 pom.xml → Maven → Reload Project + ↓ 运行 App.java +``` + +### 标准目录结构 + +``` +src/main/java/com/crawler/ +├── model/Article.java +├── view/ConsoleView.java +├── command/ +│ ├── Command.java +│ ├── CrawlCommand.java +│ ├── HelpCommand.java +│ ├── ListCommand.java +│ └── ExitCommand.java +└── controller/CrawlerController.java +``` + +--- + +## 6️⃣ 代码落地(分步实现) + +### Model:Article 实体 + +```java +public class Article { + private String title; + private String url; + private String content; + // 构造器、getter/setter、toString +} +``` + +> 📦 只存放数据,没有任何输入输出代码 + +--- + +## View:ConsoleView(ANSI常量集中管理) + +```java +public class ConsoleView { + private static final String ANSI_GREEN = "\033[32m"; + private static final String ANSI_RED = "\033[31m"; + // ... 其他常量 + + public void printSuccess(String msg) { + System.out.println(ANSI_GREEN + msg + ANSI_RESET); + } + public void printError(String msg) { ... } + public void display(List
articles) { ... } +} +``` + +> ✨ **所有颜色码集中定义 → 改主题只需改一处** + +--- + +## Command 实现示例(HelpCommand) + +```java +public class HelpCommand implements Command { + private ConsoleView view; + public HelpCommand(ConsoleView v) { this.view = v; } + public String getName() { return "help"; } + public void execute(String[] args, List
articles) { + view.printInfo("Commands: crawl , list, help, exit"); + } +} +``` + +> ⚠️ 全部输出通过 `view`,绝不让 `System.out` 直接出现在这里 + +--- + +## CrawlCommand(存根,下周填坑) + +```java +public class CrawlCommand implements Command { + private ConsoleView view; + public CrawlCommand(ConsoleView v) { this.view = v; } + public String getName() { return "crawl"; } + public void execute(String[] args, List
articles) { + if (args.length < 2) { + view.printError("Usage: crawl "); + return; + } + view.printInfo("Stub: Would crawl " + args[1]); + } +} +``` + +> 🔍 **找茬点**:这里拼接字符串算是“业务逻辑”吗?留给大家用 AI 审计。 + +--- + +## ExitCommand + +```java +public class ExitCommand implements Command { + private ConsoleView view; + public ExitCommand(ConsoleView v) { this.view = v; } + public String getName() { return "exit"; } + public void execute(String[] args, List
articles) { + view.printSuccess("Bye!"); + System.exit(0); + } +} +``` + +> ✅ 所有输出都通过 View → 将来改 GUI 只需换 View 实现 + +--- + +## Controller + main 组装 + +```java +// Controller 中持有 Map +// App.java 中: +ConsoleView view = new ConsoleView(); +List
articles = new ArrayList<>(); +CrawlerController controller = new CrawlerController(view, articles); +view.printSuccess("Welcome to CLI Crawler!"); +while (true) { + controller.handle(view.readLine()); +} +``` + +> 🔁 完成交互循环 + +--- + +## 7️⃣ 架构反思:共享 List
的隐患 + +### 当前问题 + +- 所有 Command 都直接拿到 `List
` 引用 +- 任何一个命令都可以随意增、删、改列表 +- 数据完全“裸奔” + +> 🚨 就像酒店所有员工都能进保险箱 + +--- + +## 提问 + +- 如果 `CrawlCommand` 不小心把 `null` 塞进列表,`ListCommand` 会怎样? +- 如果我们要在添加文章时写日志,现在的设计能优雅实现吗? + +### 预告解决方案(W10) + +- **策略模式** + **仓库层(ArticleRepository)** +- 封装 `List`,对外只暴露 `add()`、`getAll()` 等安全接口 + +> W9 搭骨架,W10 装上盔甲 + +--- + +## 8️⃣ 实践任务(现场5分钟) + +### 必做项 + +1. 使用 Maven 模板创建项目 +2. 实现完整包结构(model/view/command/controller) +3. 实现 4 个 Command:help / list / crawl / exit +4. `list` 能展示已抓取的文章(目前存根即可) +5. 运行并测试循环 + +### 额外加分:代码找茬 + +- 检查是否仍有 `System.out` 直接调用 +- 检查 ANSI 码是否硬编码在多个地方 + +--- + +## 验收标准 + +- [x] Maven 编译通过 +- [x] Command 接口和 4 个实现在不同文件 +- [x] Controller 里没有 switch-case +- [x] 新增命令只需新建类,不改 Controller +- [x] list 能正确显示空列表 +- [x] 所有输出均通过 `ConsoleView` +- [x] ANSI 颜色码集中定义为常量 + +--- + +## 9️⃣ 课后作业 + +### 必做 + +1. **完善 Article**:增加 `author`、`publishDate` 字段 +2. **★ HistoryCommand**:记录用户输入过的所有命令(用 `List`) +3. **AI 架构审计**:将类名发给 AI,指令: + > “作为Java架构审计师,请检查我的MVC三层划分是否存在越权行为?” + +### 选做 + +- 命令别名(c 代替 crawl) +- URL 格式验证 +- 暗色主题(修改一处常量) +- 思考题:分析 `List
` 共享引用的风险(200字小结) + +--- + +## 🤖 AI 协同升级 + +### 架构审计师任务(必做) + +**步骤**: +1. 列出所有类名(不含方法实现) +2. 发给 AI +3. 指令:“检查 MVC 分层是否清晰,是否有越权行为” + +### 进阶探究(选做) + +> “假设我的 Command 接口中 execute 方法接收了一个 `List
` 参数,请分析这种设计在工程上有什么隐患,并给出重构建议。” + +--- + +## 📚 总结与过渡 + +### 本周成果 + +- ✅ 工程化包结构 +- ✅ MVC 分层清晰 +- ✅ Command 模式实现可扩展路由 +- ✅ 所有输出走 View,常量集中管理 + +### 下周预告 + +- **策略模式**:封装爬取算法 +- **仓库层(Repository)**:武装 `List
`,解决共享隐患 + +> 🚀 从“写代码”到“造系统”,踏出坚实第一步! + +--- + +## Q&A + +### 常见问题 + +| 问题 | 解答 | +|------|------| +| IDEA 不识别 pom.xml | 右键 → Maven → Reload Project | +| 中文乱码 | Settings → File Encodings → UTF-8 | +| 输出颜色乱码 | Windows 建议使用 Windows Terminal | +| 我的 System.out 被批评 | View 才是唯一输出出口 | + +--- + +## 谢谢! + +### 课件已上传,模板在课程群 + +**保持工程洁癖,下周见!** \ No newline at end of file