107 changed files with 10464 additions and 0 deletions
@ -0,0 +1,118 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
import java.util.stream.Collectors; |
||||
|
|
||||
|
public class AnalyzeCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
private final StrategyFactory strategyFactory; |
||||
|
|
||||
|
public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.strategyFactory = strategyFactory; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "analyze"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "analyze <url> - 分析URL页面内容,输出统计信息(不保存)"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: analyze <url>"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
|
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
if (strategy == null) { |
||||
|
view.printError("No strategy found for: " + url); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
view.printInfo("Analyzing: " + url); |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
List<Article> articles = strategy.parse(url, doc); |
||||
|
|
||||
|
printStatistics(articles, url); |
||||
|
} catch (Exception e) { |
||||
|
view.printError("Failed to analyze: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void printStatistics(List<Article> articles, String url) { |
||||
|
view.printSuccess("=== 分析报告 ==="); |
||||
|
view.printInfo("解析策略: " + strategyFactory.getStrategy(url).getClass().getSimpleName()); |
||||
|
view.printInfo("URL: " + url); |
||||
|
view.printInfo("文章数量: " + articles.size()); |
||||
|
|
||||
|
if (articles.isEmpty()) { |
||||
|
view.printInfo("未解析到任何文章"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
int emptyTitles = 0; |
||||
|
int minTitleLength = Integer.MAX_VALUE; |
||||
|
int maxTitleLength = 0; |
||||
|
int totalTitleLength = 0; |
||||
|
|
||||
|
for (Article article : articles) { |
||||
|
String title = article.getTitle(); |
||||
|
if (title == null || title.trim().isEmpty()) { |
||||
|
emptyTitles++; |
||||
|
} else { |
||||
|
int len = title.length(); |
||||
|
minTitleLength = Math.min(minTitleLength, len); |
||||
|
maxTitleLength = Math.max(maxTitleLength, len); |
||||
|
totalTitleLength += len; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
view.printInfo("--- 标题统计 ---"); |
||||
|
if (emptyTitles > 0) { |
||||
|
view.printInfo("空标题数量: " + emptyTitles); |
||||
|
} |
||||
|
view.printInfo("最短标题长度: " + (minTitleLength == Integer.MAX_VALUE ? 0 : minTitleLength)); |
||||
|
view.printInfo("最长标题长度: " + maxTitleLength); |
||||
|
view.printInfo("平均标题长度: " + String.format("%.1f", (double) totalTitleLength / (articles.size() - emptyTitles))); |
||||
|
|
||||
|
String domain = extractDomain(url); |
||||
|
Map<String, Long> domainDistribution = articles.stream() |
||||
|
.map(a -> extractDomain(a.getUrl())) |
||||
|
.collect(Collectors.groupingBy(d -> d, Collectors.counting())); |
||||
|
|
||||
|
view.printInfo("--- 来源域名分布 ---"); |
||||
|
for (Map.Entry<String, Long> entry : domainDistribution.entrySet()) { |
||||
|
view.printInfo(" " + entry.getKey() + ": " + entry.getValue() + " 篇"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private String extractDomain(String url) { |
||||
|
try { |
||||
|
int start = url.indexOf("://"); |
||||
|
if (start == -1) return "unknown"; |
||||
|
int end = url.indexOf("/", start + 3); |
||||
|
if (end == -1) return url.substring(start + 3); |
||||
|
return url.substring(start + 3, end); |
||||
|
} catch (Exception e) { |
||||
|
return "unknown"; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,40 @@ |
|||||
|
package com.example.datacollect.repository; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Collections; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ArticleRepository { |
||||
|
private final List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
public void add(Article article) { |
||||
|
if (article == null) { |
||||
|
throw new IllegalArgumentException("Article cannot be null"); |
||||
|
} |
||||
|
articles.add(article); |
||||
|
} |
||||
|
|
||||
|
public void addAll(List<Article> articles) { |
||||
|
if (articles == null) { |
||||
|
throw new IllegalArgumentException("Article list cannot be null"); |
||||
|
} |
||||
|
for (Article article : articles) { |
||||
|
if (article != null) { |
||||
|
this.articles.add(article); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public List<Article> getAll() { |
||||
|
return Collections.unmodifiableList(articles); |
||||
|
} |
||||
|
|
||||
|
public int size() { |
||||
|
return articles.size(); |
||||
|
} |
||||
|
|
||||
|
public void clear() { |
||||
|
articles.clear(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,13 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface CrawlStrategy { |
||||
|
List<Article> parse(String url, Document doc); |
||||
|
boolean supports(String url); |
||||
|
default int getPriority() { |
||||
|
return 0; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,49 @@ |
|||||
|
package com.example.datacollect.controller; |
||||
|
|
||||
|
import com.example.datacollect.command.AnalyzeCommand; |
||||
|
import com.example.datacollect.command.Command; |
||||
|
import com.example.datacollect.command.CrawlCommand; |
||||
|
import com.example.datacollect.command.ExitCommand; |
||||
|
import com.example.datacollect.command.HelpCommand; |
||||
|
import com.example.datacollect.command.ListCommand; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
private final Map<String, Command> commands = new HashMap<>(); |
||||
|
private final ConsoleView view; |
||||
|
private final ArticleRepository repository; |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, ArticleRepository repository, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.repository = repository; |
||||
|
register(new HelpCommand(view)); |
||||
|
register(new ListCommand(view)); |
||||
|
register(new CrawlCommand(view, strategyFactory)); |
||||
|
register(new AnalyzeCommand(view, strategyFactory)); |
||||
|
register(new ExitCommand(view)); |
||||
|
} |
||||
|
|
||||
|
private void register(Command command) { |
||||
|
commands.put(command.getName(), command); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
String text = input == null ? "" : input.trim(); |
||||
|
if (text.isEmpty()) { |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
String[] args = text.split("\\s+"); |
||||
|
String cmdName = args[0].toLowerCase(); |
||||
|
Command command = commands.get(cmdName); |
||||
|
if (command == null) { |
||||
|
view.printError("Unknown command: " + cmdName); |
||||
|
return; |
||||
|
} |
||||
|
command.execute(args, repository); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,40 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class DefaultStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return true; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public int getPriority() { |
||||
|
return Integer.MIN_VALUE; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements headings = doc.select("h1, h2, h3, h4, h5, h6"); |
||||
|
for (Element h : headings) { |
||||
|
String text = h.text().trim(); |
||||
|
if (!text.isEmpty()) { |
||||
|
articles.add(new Article(text, url, "")); |
||||
|
} |
||||
|
} |
||||
|
if (articles.isEmpty()) { |
||||
|
String title = doc.title(); |
||||
|
if (!title.isEmpty()) { |
||||
|
articles.add(new Article(title, url, "")); |
||||
|
} |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,45 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
public class RegexStrategy implements CrawlStrategy { |
||||
|
private final Pattern pattern; |
||||
|
private final int priority; |
||||
|
|
||||
|
public RegexStrategy(String regex, int priority) { |
||||
|
this.pattern = Pattern.compile(regex); |
||||
|
this.priority = priority; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return pattern.matcher(url).matches(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public int getPriority() { |
||||
|
return priority; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements items = doc.select("a[href]"); |
||||
|
for (Element item : items) { |
||||
|
String text = item.text(); |
||||
|
if (!text.isEmpty()) { |
||||
|
String href = item.attr("href"); |
||||
|
String fullUrl = href.startsWith("http") ? href : url + href; |
||||
|
articles.add(new Article(text, fullUrl, "")); |
||||
|
} |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,85 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
/** |
||||
|
* =============================================== |
||||
|
* 策略模式架构审计报告 |
||||
|
* =============================================== |
||||
|
* |
||||
|
* 一、审计概述 |
||||
|
* ---------- |
||||
|
* 项目采用策略模式(Strategy Pattern)实现网页解析逻辑的可替换性。 |
||||
|
* 主要涉及以下类: |
||||
|
* - CrawlStrategy:策略接口,定义解析器标准 |
||||
|
* - StrategyFactory:策略工厂,负责策略选择 |
||||
|
* - BlogStrategy:博客解析策略 |
||||
|
* - NewsStrategy:新闻解析策略 |
||||
|
* - HnuNewsStrategy:湖大新闻网解析策略 |
||||
|
* |
||||
|
* =============================================== |
||||
|
* |
||||
|
* 二、审计结果 |
||||
|
* ---------- |
||||
|
* |
||||
|
* 【规范项】策略接口与实现类解耦 |
||||
|
* ------------------------------------------ |
||||
|
* 状态:✓ 符合规范 |
||||
|
* |
||||
|
* 分析: |
||||
|
* - CrawlStrategy 接口定义了两个核心方法:parse() 和 supports() |
||||
|
* - 所有策略实现类(BlogStrategy、NewsStrategy、HnuNewsStrategy)都实现该接口 |
||||
|
* - 策略实现类之间无相互依赖,符合单一职责原则 |
||||
|
* - 新增策略只需实现接口,无需修改现有代码(开闭原则) |
||||
|
* |
||||
|
* =============================================== |
||||
|
* |
||||
|
* 【规范项】策略选择逻辑封装 |
||||
|
* ------------------------------------------ |
||||
|
* 状态:⚠ 部分符合,有改进空间 |
||||
|
* |
||||
|
* 分析: |
||||
|
* - 策略选择逻辑封装在 StrategyFactory.getStrategy(url) 中 |
||||
|
* - 选择逻辑为简单线性遍历,返回第一个匹配的策略 |
||||
|
* - 策略注册顺序即匹配顺序(插入顺序) |
||||
|
* |
||||
|
* 问题: |
||||
|
* 1. 无策略优先级机制,策略注册顺序决定匹配结果 |
||||
|
* 2. 无默认策略,当无策略匹配时返回 null,业务层需额外处理 |
||||
|
* 3. 无法支持正则匹配等复杂匹配场景 |
||||
|
* |
||||
|
* =============================================== |
||||
|
* |
||||
|
* 【规范项】策略类越权行为检查 |
||||
|
* ------------------------------------------ |
||||
|
* 状态:✓ 未发现越权行为 |
||||
|
* |
||||
|
* 分析: |
||||
|
* - 所有策略实现类只依赖 Article、Document 等数据模型 |
||||
|
* - 策略类不直接依赖 Repository 层(数据持久化) |
||||
|
* - 策略类不直接依赖 View 层(输出展示) |
||||
|
* - 策略类专注于解析逻辑,符合职责分离原则 |
||||
|
* |
||||
|
* =============================================== |
||||
|
* |
||||
|
* 三、修改建议 |
||||
|
* ---------- |
||||
|
* |
||||
|
* 1. 【高优先级】引入策略优先级机制 |
||||
|
* 建议:在 CrawlStrategy 接口中增加 getPriority() 方法, |
||||
|
* 或在 StrategyFactory 中支持优先级配置。 |
||||
|
* |
||||
|
* 2. 【高优先级】实现默认策略 |
||||
|
* 建议:当无策略匹配时,使用默认策略兜底, |
||||
|
* 避免返回 null 导致业务层 NPE 风险。 |
||||
|
* |
||||
|
* 3. 【中优先级】支持正则匹配策略 |
||||
|
* 建议:新增 RegexStrategy 类,支持基于正则表达式的 URL 匹配。 |
||||
|
* |
||||
|
* 4. 【中优先级】策略冲突检测 |
||||
|
* 建议:在 getStrategy 时检测是否存在多个策略支持同一 URL, |
||||
|
* 如有则记录警告日志或抛出异常。 |
||||
|
* |
||||
|
* =============================================== |
||||
|
*/ |
||||
|
public class StrategyArchitectureAuditReport { |
||||
|
// 此类仅用于承载审计报告文档注释,无实际业务逻辑
|
||||
|
} |
||||
@ -0,0 +1,50 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Comparator; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class StrategyFactory { |
||||
|
private final List<CrawlStrategy> strategies = new ArrayList<>(); |
||||
|
private final CrawlStrategy defaultStrategy; |
||||
|
|
||||
|
public StrategyFactory() { |
||||
|
strategies.add(new HnuNewsStrategy()); |
||||
|
strategies.add(new BlogStrategy()); |
||||
|
strategies.add(new NewsStrategy()); |
||||
|
strategies.add(new RegexStrategy(".*\\.edu\\.cn$", 80)); |
||||
|
strategies.add(new RegexStrategy(".*\\.com$", 60)); |
||||
|
strategies.sort(Comparator.comparingInt(CrawlStrategy::getPriority).reversed()); |
||||
|
this.defaultStrategy = new DefaultStrategy(); |
||||
|
} |
||||
|
|
||||
|
public CrawlStrategy getStrategy(String url) { |
||||
|
List<CrawlStrategy> matched = new ArrayList<>(); |
||||
|
for (CrawlStrategy s : strategies) { |
||||
|
if (s.supports(url)) { |
||||
|
matched.add(s); |
||||
|
} |
||||
|
} |
||||
|
if (matched.isEmpty()) { |
||||
|
return defaultStrategy; |
||||
|
} |
||||
|
if (matched.size() > 1) { |
||||
|
System.out.println("WARNING: Multiple strategies matched for URL: " + url |
||||
|
+ ", using highest priority: " + matched.get(0).getClass().getSimpleName()); |
||||
|
} |
||||
|
return matched.get(0); |
||||
|
} |
||||
|
|
||||
|
public void register(CrawlStrategy strategy) { |
||||
|
strategies.add(strategy); |
||||
|
strategies.sort(Comparator.comparingInt(CrawlStrategy::getPriority).reversed()); |
||||
|
} |
||||
|
|
||||
|
public List<CrawlStrategy> getAllStrategies() { |
||||
|
return new ArrayList<>(strategies); |
||||
|
} |
||||
|
|
||||
|
public CrawlStrategy getDefaultStrategy() { |
||||
|
return defaultStrategy; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,4 @@ |
|||||
|
*.jar |
||||
|
*.jar |
||||
|
*.class |
||||
|
*.log |
||||
@ -0,0 +1,492 @@ |
|||||
|
--- |
||||
|
id: "24" |
||||
|
title: w10-设计模式 |
||||
|
slug: w10-design-patterns |
||||
|
status: draft |
||||
|
view_count: 0 |
||||
|
created_at: 2026-05-07T12:00:00+08:00 |
||||
|
updated_at: 2026-05-07T14:00:00.000000000+08:00 |
||||
|
--- |
||||
|
|
||||
|
# 高级程序设计 · 第10周 |
||||
|
|
||||
|
### 设计模式:灵活性与可扩展性 |
||||
|
|
||||
|
### 策略模式 + 工厂 + Repository 实战 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 📌 本周导航 |
||||
|
|
||||
|
- W9回顾:骨架的成就与隐患 |
||||
|
- 策略模式:解析器的“插头标准” |
||||
|
- 解析器工厂:自动匹配的魔法 |
||||
|
- Repository:武装数据访问 |
||||
|
- 整体架构串联:调用链全程 |
||||
|
- 代码落地 + 实践任务 |
||||
|
- 架构反思 + W11 预告 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 1️⃣ W9回顾:骨架的成就与隐患 |
||||
|
|
||||
|
### 我们建了一座漂亮的房子 |
||||
|
|
||||
|
- ✅ MVC 分层清晰 |
||||
|
- ✅ Command 模式:**新增命令,Controller 零改动** |
||||
|
- ✅ 所有输出走 `ConsoleView` |
||||
|
- ✅ 工程包结构标准 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 但问题也随之而来 |
||||
|
|
||||
|
```java |
||||
|
// CrawlCommand 里解析逻辑怎么办? |
||||
|
if (url.contains("blog.example.com")) { |
||||
|
// 博客解析... |
||||
|
} else if (url.contains("news.example.com")) { |
||||
|
// 新闻解析... |
||||
|
} else { |
||||
|
view.printError("Unsupported website!"); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 😫 每支持一个新网站,就要加一个 `else if` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 还有另一个“裸奔”的数据 |
||||
|
|
||||
|
```java |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
// 所有 Command 都可以: |
||||
|
articles.clear(); |
||||
|
articles.add(null); |
||||
|
articles.remove(0); |
||||
|
``` |
||||
|
|
||||
|
> 🚨 数据没有任何保护,靠口头约定是靠不住的 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 本周任务 |
||||
|
|
||||
|
1. **解析逻辑可插拔** → 策略模式 + 工厂 |
||||
|
2. **数据访问加守卫** → Repository 模式 |
||||
|
|
||||
|
> W9 搭骨架,W10 装盔甲 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 2️⃣ 策略模式:解析器的“插头标准” |
||||
|
|
||||
|
### 墙上的插座,为什么什么电器都能插? |
||||
|
|
||||
|
- **三孔插座** 是标准接口 |
||||
|
- 电视、电脑、手机充电器都实现这个接口 |
||||
|
- 插座不关心你是什么电器 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 爬虫的世界也一样 |
||||
|
|
||||
|
- `CrawlStrategy` = 插座接口 |
||||
|
- `BlogStrategy`、`NewsStrategy` = 具体电器 |
||||
|
- `CrawlCommand` = 使用电器的人 |
||||
|
- `StrategyFactory` = 插座面板 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 接口即合同 |
||||
|
|
||||
|
```java |
||||
|
public interface CrawlStrategy { |
||||
|
List<Article> parse(String url, Document doc); |
||||
|
boolean supports(String url); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
- `supports()`:我能不能处理这个 URL? |
||||
|
- `parse()`:怎么解析? |
||||
|
- **任何网站想被爬,签这份合同!** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 策略 vs 硬编码 |
||||
|
|
||||
|
| 维度 | if-else 屎山 | 策略模式 | |
||||
|
|------|-------------|----------| |
||||
|
| 新增网站 | 改 Command | 新建策略类 | |
||||
|
| 修改解析 | 翻找 else if | 只改对应类 | |
||||
|
| 测试 | 启动整个爬虫 | 单独测策略 | |
||||
|
| 开闭原则 | ❌ 修改开放 | ✅ 扩展开放,修改关闭 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 具体策略示例 |
||||
|
|
||||
|
```java |
||||
|
public class BlogStrategy implements CrawlStrategy { |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("blog.example.com"); |
||||
|
} |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
for (Element e : doc.select(".post-title")) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> ✨ 一个新网站,一个独立类,各扫门前雪 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 3️⃣ 解析器工厂:自动匹配的魔法 |
||||
|
|
||||
|
### 谁来选择策略? |
||||
|
|
||||
|
- 如果 `CrawlCommand` 遍历所有策略 → 策略模式白用了 |
||||
|
- 我们需要一个黑盒子:**丢入 URL,返回合适的解析器** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 工厂登场 |
||||
|
|
||||
|
```java |
||||
|
public class StrategyFactory { |
||||
|
private final List<CrawlStrategy> strategies = new ArrayList<>(); |
||||
|
|
||||
|
public StrategyFactory() { |
||||
|
strategies.add(new BlogStrategy()); |
||||
|
strategies.add(new NewsStrategy()); |
||||
|
} |
||||
|
|
||||
|
public CrawlStrategy getStrategy(String url) { |
||||
|
for (CrawlStrategy s : strategies) { |
||||
|
if (s.supports(url)) return s; |
||||
|
} |
||||
|
return null; |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 🔧 新增网站只需:新建策略类 + 工厂里注册一行 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 开闭原则的胜利 |
||||
|
|
||||
|
- ✅ `CrawlCommand` 完全不改 |
||||
|
- ✅ 新增 `XxxStrategy` 和一行注册 |
||||
|
- ✅ 所有策略的调用方式完全一致 |
||||
|
|
||||
|
> 这就是 **“对扩展开放,对修改关闭”** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 重构后的 CrawlCommand |
||||
|
|
||||
|
```java |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
String url = args[1]; |
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
if (strategy == null) { |
||||
|
view.printError("No strategy for: " + url); |
||||
|
return; |
||||
|
} |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
List<Article> parsed = strategy.parse(url, doc); |
||||
|
for (Article a : parsed) { |
||||
|
repository.add(a); |
||||
|
} |
||||
|
view.printSuccess("Crawled " + parsed.size() + " articles."); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 🧠 CrawlCommand 现在只做 **“调度”**,不做解析 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 4️⃣ Repository:武装数据访问 |
||||
|
|
||||
|
### 共享 List 的问题 |
||||
|
|
||||
|
```java |
||||
|
articles.clear(); // 清空 |
||||
|
articles.add(null); // 塞 null |
||||
|
articles.remove(0); // 随意删除 |
||||
|
``` |
||||
|
|
||||
|
> 靠约定维护的秩序,终将被打破 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 给数据装上防盗门 |
||||
|
|
||||
|
```java |
||||
|
public class ArticleRepository { |
||||
|
private final List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
public void add(Article article) { |
||||
|
if (article == null) throw new IllegalArgumentException(...); |
||||
|
articles.add(article); |
||||
|
} |
||||
|
|
||||
|
public List<Article> getAll() { |
||||
|
return Collections.unmodifiableList(articles); |
||||
|
} |
||||
|
|
||||
|
public int size() { return articles.size(); } |
||||
|
|
||||
|
public void clear() { articles.clear(); } |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 三道防线 |
||||
|
|
||||
|
| 机制 | 作用 | |
||||
|
|------|------| |
||||
|
| **add 拒绝 null** | 规则写在代码里,不靠口头约定 | |
||||
|
| **getAll 返回不可变视图** | 任何修改立即抛异常 | |
||||
|
| **必须通过 repository 访问** | 封装内部结构,只暴露安全方法 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 所有 Command 签名改变 |
||||
|
|
||||
|
```java |
||||
|
// W9 |
||||
|
public void execute(String[] args, List<Article> articles); |
||||
|
|
||||
|
// W10 |
||||
|
public void execute(String[] args, ArticleRepository repository); |
||||
|
``` |
||||
|
|
||||
|
> 语义变化:从“给你数据随便玩” → “给你安全的存取通道” |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 5️⃣ 整体架构串联 |
||||
|
|
||||
|
### 一个 `crawl` 命令的完整旅程 |
||||
|
|
||||
|
``` |
||||
|
用户输入 "crawl https://blog.example.com" |
||||
|
↓ |
||||
|
ConsoleView 解析 |
||||
|
↓ |
||||
|
Controller 路由 → CrawlCommand |
||||
|
↓ |
||||
|
StrategyFactory.getStrategy(url) → BlogStrategy |
||||
|
↓ |
||||
|
Jsoup 抓取 → Document |
||||
|
↓ |
||||
|
BlogStrategy.parse(url, doc) → List<Article> |
||||
|
↓ |
||||
|
Repository.add() 存储 |
||||
|
↓ |
||||
|
ConsoleView 输出成功信息 |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 架构全景图 |
||||
|
|
||||
|
 |
||||
|
|
||||
|
```mermaid |
||||
|
flowchart TD |
||||
|
User(["👤 用户输入<br/>crawl https://blog.example.com"]) --> View |
||||
|
|
||||
|
subgraph View["🎨 View 层 (ConsoleView)"] |
||||
|
ReadLine["readLine()"] |
||||
|
Display["display() / printSuccess()"] |
||||
|
end |
||||
|
|
||||
|
ReadLine --> Controller |
||||
|
|
||||
|
subgraph Controller["🧭 Controller 层"] |
||||
|
Router["CrawlerController<br/>Map 路由"] |
||||
|
end |
||||
|
|
||||
|
Router --> Command |
||||
|
|
||||
|
subgraph Command["⚡ Command 层"] |
||||
|
CrawlCmd["CrawlCommand<br/>(调度者)"] |
||||
|
end |
||||
|
|
||||
|
CrawlCmd --> Factory |
||||
|
|
||||
|
subgraph Strategy["🧩 Strategy 层"] |
||||
|
Factory["StrategyFactory<br/>(自动匹配)"] |
||||
|
StrategyI["<<interface>> CrawlStrategy"] |
||||
|
BlogS["BlogStrategy"] |
||||
|
NewsS["NewsStrategy"] |
||||
|
Factory --> StrategyI --> BlogS |
||||
|
StrategyI --> NewsS |
||||
|
end |
||||
|
|
||||
|
BlogS --> Repository |
||||
|
|
||||
|
subgraph Repository["🔐 Repository 层"] |
||||
|
Repo["ArticleRepository<br/>(add / getAll)"] |
||||
|
RepoList["List<Article> (私有)"] |
||||
|
Repo --> RepoList |
||||
|
end |
||||
|
|
||||
|
RepoList --> Model |
||||
|
|
||||
|
subgraph Model["📦 Model 层"] |
||||
|
Article["Article"] |
||||
|
end |
||||
|
|
||||
|
CrawlCmd --> Display |
||||
|
Repository --> Display |
||||
|
``` |
||||
|
|
||||
|
> 🗺️ 每一层都有清晰的职责,每一处扩展都只需要新增而不是修改 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 6️⃣ 代码落地(分步升级) |
||||
|
|
||||
|
### 从 W9 升级到 W10 的改动清单 |
||||
|
|
||||
|
1. 新建 `strategy/` 包 → `CrawlStrategy` 接口 |
||||
|
2. 实现 `BlogStrategy`、`NewsStrategy` |
||||
|
3. 实现 `StrategyFactory` |
||||
|
4. 新建 `repository/` 包 → `ArticleRepository` |
||||
|
5. 修改 `Command` 接口签名 |
||||
|
6. 重写 `CrawlCommand` |
||||
|
7. 调整其他所有 `Command` |
||||
|
8. 调整 `Controller` 和 `App.java` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 关键代码演示 |
||||
|
|
||||
|
- `Collections.unmodifiableList()` 的用法 |
||||
|
- `StrategyFactory.getStrategy()` 的遍历逻辑 |
||||
|
- `CrawlCommand` 从“写死解析”到“调度组装” |
||||
|
|
||||
|
```java |
||||
|
// 一个改动示例 |
||||
|
for (Article a : parsed) { |
||||
|
repository.add(a); // 旧: articles.add(a); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 找茬点 |
||||
|
|
||||
|
- `StrategyFactory` 没匹配到策略时返回 `null` |
||||
|
- `CrawlCommand` 检查 `null` 并报错 |
||||
|
- 有没有更优雅的方式避免 `null` 判断? |
||||
|
|
||||
|
> 🔍 课后用 AI 探索 “空对象模式” 的前奏 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 7️⃣ 架构反思 + 下周预告 |
||||
|
|
||||
|
### 当前架构的脆弱点 |
||||
|
|
||||
|
- ❌ 异常处理单一笼统 |
||||
|
- ❌ 没有重试机制 |
||||
|
- ❌ 网络超时无控制 |
||||
|
- ❌ 日志仅输出到终端 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### W11 目标:健壮性工程 |
||||
|
|
||||
|
- ✅ **自定义异常体系**:把“出错了”变成具体的业务异常 |
||||
|
- ✅ **工程化日志**:记录谁、什么时间、做了什么 |
||||
|
- ✅ **防御式编程 + 重试机制**:网络抖动不再致命 |
||||
|
|
||||
|
> W9 搭骨架 → W10 装盔甲 → W11 让它经得起毒打 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 8️⃣ 实践任务(现场) |
||||
|
|
||||
|
### 必做 |
||||
|
|
||||
|
1. 基于 W9 项目升级到 W10 |
||||
|
2. 至少实现 2 个 CrawlStrategy(可模拟) |
||||
|
3. 实现 `StrategyFactory` 和 `ArticleRepository` |
||||
|
4. 测试完整 `crawl` → `list` 流程 |
||||
|
|
||||
|
### 验收标准 |
||||
|
|
||||
|
- [ ] 新增策略只加类+注册,零改动旧代码 |
||||
|
- [ ] `getAll()` 返回不可修改视图 |
||||
|
- [ ] `CrawlCommand` 不含网站特定解析 |
||||
|
- [ ] 所有 Command 用 Repository |
||||
|
- [ ] 无地方直接操作 `List<Article>` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 9️⃣ 课后作业 |
||||
|
|
||||
|
### 必做 |
||||
|
|
||||
|
1. 完善 `ArticleRepository`:增加 `addAll`,防御 null |
||||
|
2. **★ AnalyzeCommand**:复用策略解析但不存储,输出统计信息 |
||||
|
3. **AI 架构审计**:发送类签名给 AI,检查策略解耦与封装 |
||||
|
|
||||
|
### 选做 |
||||
|
|
||||
|
- 正则策略匹配、默认策略、策略优先级 |
||||
|
- 思考题:两个策略都 `supports` 同一 URL 时怎么办? |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🤖 AI 协同升级 |
||||
|
|
||||
|
### 架构审计师(必做) |
||||
|
|
||||
|
- 画出类依赖图 |
||||
|
- 发给 AI:“检查开闭原则达成度,Repository 封装完备性,是否存在循环依赖” |
||||
|
|
||||
|
### 进阶探究 |
||||
|
|
||||
|
- 不用工厂,直接用 `Map<String, CrawlStrategy>` 存起来 vs `StrategyFactory` 的区别? |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 📚 总结 |
||||
|
|
||||
|
- ✅ 策略模式:算法可插拔,新增网站零痛苦 |
||||
|
- ✅ 工厂:自动匹配,URL → 策略的魔法 |
||||
|
- ✅ Repository:数据守卫,规则从口头约定变成代码强制 |
||||
|
- ✅ 架构:从“分开”到“优雅合上”,对扩展开放,对修改关闭 |
||||
|
|
||||
|
### W11 预告 |
||||
|
|
||||
|
自定义异常体系 + 日志 + 重试机制 |
||||
|
|
||||
|
> 🚀 让我们造的爬虫,经得住现实的考验 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 谢谢! |
||||
|
|
||||
|
**保持工程洁癖,下周见!** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
# 居中标题 |
||||
|
|
||||
|
## 居中副标题 |
||||
|
|
||||
|
### 居中内容 |
||||
|
|
||||
|
--- |
||||
@ -0,0 +1,52 @@ |
|||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<groupId>com.example</groupId> |
||||
|
<artifactId>datacollect-cli</artifactId> |
||||
|
<version>0.1.0</version> |
||||
|
<properties> |
||||
|
<maven.compiler.source>11</maven.compiler.source> |
||||
|
<maven.compiler.target>11</maven.compiler.target> |
||||
|
</properties> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>1.17.2</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<version>3.8.1</version> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-assembly-plugin</artifactId> |
||||
|
<version>3.3.0</version> |
||||
|
<configuration> |
||||
|
<archive> |
||||
|
<manifest> |
||||
|
<mainClass>com.example.datacollect.Main</mainClass> |
||||
|
</manifest> |
||||
|
</archive> |
||||
|
<descriptorRefs> |
||||
|
<descriptorRef>jar-with-dependencies</descriptorRef> |
||||
|
</descriptorRefs> |
||||
|
</configuration> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>make-assembly</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>single</goal> |
||||
|
</goals> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
</project> |
||||
@ -0,0 +1,21 @@ |
|||||
|
package com.example.datacollect; |
||||
|
|
||||
|
import com.example.datacollect.controller.CrawlerController; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class Main { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
StrategyFactory strategyFactory = new StrategyFactory(); |
||||
|
CrawlerController controller = new CrawlerController(view, repository, strategyFactory); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands."); |
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,8 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, ArticleRepository repository); |
||||
|
} |
||||
@ -0,0 +1,50 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
|
||||
|
public class CrawlCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
private final StrategyFactory strategyFactory; |
||||
|
|
||||
|
public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.strategyFactory = strategyFactory; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "crawl"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
|
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
if (strategy == null) { |
||||
|
view.printError("No strategy found for: " + url); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
view.printInfo("Crawling: " + url); |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
var articles = strategy.parse(url, doc); |
||||
|
for (var article : articles) { |
||||
|
repository.add(article); |
||||
|
} |
||||
|
view.printSuccess("Crawled " + articles.size() + " articles."); |
||||
|
} catch (Exception e) { |
||||
|
view.printError("Failed to crawl: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,23 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class ExitCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ExitCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
view.printSuccess("Bye!"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,22 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class HelpCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public HelpCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
view.printInfo("Commands: crawl <url>, list, help, exit"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,22 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class ListCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ListCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "list"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
view.display(repository.getAll()); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,45 @@ |
|||||
|
package com.example.datacollect.model; |
||||
|
|
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
|
||||
|
public Article(String title, String url, String content) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
public void setUrl(String url) { |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
public String getContent() { |
||||
|
return content; |
||||
|
} |
||||
|
|
||||
|
public void setContent(String content) { |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{" |
||||
|
+ "title='" + title + '\'' |
||||
|
+ ", url='" + url + '\'' |
||||
|
+ '}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class BlogStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("blog.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements titles = doc.select(".post-title"); |
||||
|
for (Element e : titles) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,49 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class HnuNewsStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("news.hnu.edu.cn"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements listItems = doc.select("ul.list11 li"); |
||||
|
|
||||
|
for (Element li : listItems) { |
||||
|
Element link = li.selectFirst("a"); |
||||
|
if (link == null) continue; |
||||
|
|
||||
|
String articleUrl = link.attr("href"); |
||||
|
if (!articleUrl.startsWith("http")) { |
||||
|
articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", ""); |
||||
|
} |
||||
|
|
||||
|
String title = ""; |
||||
|
Element titleEl = link.selectFirst("h4.l2.h4s2"); |
||||
|
if (titleEl != null) { |
||||
|
title = titleEl.text().trim(); |
||||
|
} |
||||
|
|
||||
|
String content = ""; |
||||
|
Element contentEl = link.selectFirst("p.l3.ps3"); |
||||
|
if (contentEl != null) { |
||||
|
content = contentEl.text().trim(); |
||||
|
} |
||||
|
|
||||
|
if (!title.isEmpty()) { |
||||
|
articles.add(new Article(title, articleUrl, content)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class NewsStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("news.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements items = doc.select(".article-headline"); |
||||
|
for (Element e : items) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,42 @@ |
|||||
|
package com.example.datacollect.view; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class ConsoleView { |
||||
|
private static final String ANSI_RESET = "\u001B[0m"; |
||||
|
private static final String ANSI_GREEN = "\u001B[32m"; |
||||
|
private static final String ANSI_RED = "\u001B[31m"; |
||||
|
private static final String ANSI_BLUE = "\u001B[34m"; |
||||
|
|
||||
|
private final Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
public String readLine() { |
||||
|
System.out.print("> "); |
||||
|
return scanner.nextLine(); |
||||
|
} |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printError(String msg) { |
||||
|
System.out.println(ANSI_RED + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printInfo(String msg) { |
||||
|
System.out.println(ANSI_BLUE + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void display(List<Article> articles) { |
||||
|
if (articles.isEmpty()) { |
||||
|
printInfo("暂无文章,请先执行 crawl。"); |
||||
|
return; |
||||
|
} |
||||
|
for (int i = 0; i < articles.size(); i++) { |
||||
|
Article a = articles.get(i); |
||||
|
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,758 @@ |
|||||
|
--- |
||||
|
|
||||
|
# 教案:《高级程序设计》第9周——工程架构:从"写代码"到"造系统" |
||||
|
|
||||
|
| 项目 | 内容 | |
||||
|
|------|------| |
||||
|
| **课程名称** | 高级程序设计 | |
||||
|
| **周次** | 第9周 | |
||||
|
| **主题** | 工程架构——从"写代码"到"造系统" | |
||||
|
| **学时** | 2学时(90分钟) | |
||||
|
| **授课对象** | 具备Python基础、已完成Java面向对象特性学习的学生 | |
||||
|
| **教学环境** | JDK 17+、IntelliJ IDEA、Maven(模板) | |
||||
|
| **前情提要** | 本课程原计划使用JavaFX GUI,后根据教学反馈转向CLI + MVC + 爬虫工程化 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 教学调整说明:为什么选择CLI而不是GUI? |
||||
|
|
||||
|
> **原计划**:JavaFX桌面应用 → **新计划**:CLI命令行应用 |
||||
|
|
||||
|
| 维度 | GUI (JavaFX) | CLI (命令行) | |
||||
|
|------|--------------|-------------| |
||||
|
| **学习重心** | 布局、控件、事件监听 | 架构、分层、命令路由 | |
||||
|
| **学生痛点** | "窗口点击"与后端能力无关 | 真正锻炼工程思维 | |
||||
|
| **AI辅助** | AI生成FXML,学生看不懂 | AI辅助重构架构 | |
||||
|
| **工程化** | 脱离真实后端开发场景 | 模拟真实服务器/大数据开发 | |
||||
|
| **核心转型** | "视觉装饰"优先 | "逻辑架构"优先 | |
||||
|
|
||||
|
**决策理由**: |
||||
|
1. **985学生需要的是工程思维**,不是拖控件 |
||||
|
2. **接口抽象**是弱项,CLI + MVC更能暴露这个问题 |
||||
|
3. **彩色终端**足够酷炫,且代码量可控 |
||||
|
|
||||
|
**更深层的教育价值**: |
||||
|
> 在GUI框架中,架构已被框架强制划定,学生只是"遵守规矩";而CLI世界里没有任何框架告诉你模型在哪、视图在哪——**当外部约束消失,内部的工程纪律才真正建立**。这正是本节课要传递的核心精神。 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 一、教学目标 |
||||
|
|
||||
|
| 目标维度 | 具体描述 | |
||||
|
|----------|----------| |
||||
|
| **知识掌握** | 理解MVC架构的职责划分及其演化脉络;掌握Maven项目结构与pom.xml基础;理解Command模式的路由原理。 | |
||||
|
| **工程实践** | 能搭建规范的Maven项目包结构;能实现基于Scanner的控制台交互;能用Command接口实现可扩展的命令路由;能识别架构中的"越权行为"。 | |
||||
|
| **思维转型** | 从"一个类写全部"转向"分层解耦";从"修改现有代码"转向"新增类实现功能";从"满足功能"转向"代码的工程洁癖"。 | |
||||
|
| **工具应用** | 利用AI辅助审查MVC职责越权;让AI扮演"架构审计师"检查分层是否清晰;理解AI生成代码中的架构缺陷。 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 二、教学重点与难点 |
||||
|
|
||||
|
| 项目 | 内容 | 突破方法 | |
||||
|
|------|------|----------| |
||||
|
| **重点** | MVC三层职责划分、CLI交互实现、Command接口解耦、代码中的工程细节(常量、输出归属) | 以"新增命令需要改什么"为切入点,展示Command模式的优势;通过现场"代码找茬"强化细节意识 | |
||||
|
| **难点** | Controller不写业务逻辑、Command接口的多态实现、共享数据模型的设计缺陷识别 | 现场演示:增加一个命令只需新建类,无需修改Controller;暴露`List<Article>`共享引用的问题并预告解决方案 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 三、教学过程设计(90分钟) |
||||
|
|
||||
|
| 环节 | 时间 | 教学内容 | 师生活动 | AI协同点 | |
||||
|
|------|------|----------|----------|----------| |
||||
|
| **1. 痛点引入:从脚本到工程的鸿沟** | 10' | 展示"意大利面"式爬虫代码,演示改一处需要动全身 | **教师演示**:现场展示一段混乱代码,让学生找问题 | 用AI分析代码耦合度 | |
||||
|
| **2. CLI vs GUI:架构选择的思考** | 10' | 对比两种方案的优缺点,解释为什么CLI更适合培养工程思维 | **教师讲解**:用对比表格说明选择CLI的理由 | — | |
||||
|
| **3. MVC分层设计** | 20' | 讲解Model/View/Controller三层职责,用"餐厅类比"强化理解,随后批判类比局限性 | **教师讲解**:配合架构图讲解三层交互,引导学生寻找类比破绽 | 用AI生成MVC职责对照表 | |
||||
|
| **4. Command模式:可扩展的命令路由** | 15' | 引入Command接口,解释"一个命令就是一个类" | **类比**:Command像酒店的服务部门,Controller是前台 | 让AI解释Command模式的多态原理 | |
||||
|
| **5. Maven模板与环境** | 5' | 直接使用提供的Maven模板,讲解目录结构 | **教师演示**:解压模板 → IDEA打开 → 运行 | — | |
||||
|
| **6. 三层代码落地** | 20' | **Model**:Article实体<br>**View**:ConsoleView(ANSI常量)<br>**Command接口**+实现<br>**Controller**:Map路由 | **教师演示**:分步写出代码,刻意埋入1~2个"越权细节"让学生找茬 | 学生用AI做"架构审计" | |
||||
|
| **7. 架构反思与展望** | 5' | 指出当前`List<Article>`共享引用的问题,预告W10策略模式与仓库层 | **师生互动**:你发现这个设计有什么风险? | 让AI分析共享可变状态的危害 | |
||||
|
| **8. 实践任务:空壳程序** | 5' | 搭建完整包结构,实现CLI循环 | 学生现场编码,教师巡视 | 完成后用AI检查包结构 | |
||||
|
| **9. 总结与过渡** | 5' | 本周实现了"骨架+命令可扩展",下周填入"灵魂"——解析器,并解决数据安全问题 | 总结Command模式优势,预告策略模式 | — | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 四、核心教学内容脚本 |
||||
|
|
||||
|
### 4.1 痛点引入:从脚本到工程的鸿沟(10分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "同学们,前8周我们学的是Java语法,从变量到类,从继承到接口。但有一个问题:代码写完之后,怎么组织?" |
||||
|
> |
||||
|
> "来看这段代码——这是某个同学写的'爬虫',他一个人完成了一个'完整'的项目。" |
||||
|
|
||||
|
**展示"脚本式"代码**: |
||||
|
```java |
||||
|
public class Crawler { |
||||
|
public static void main(String[] args) { |
||||
|
System.out.print("请输入URL: "); |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
String url = scanner.nextLine(); |
||||
|
|
||||
|
List titles = new ArrayList(); |
||||
|
try { |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
Elements elements = doc.select(".post-title"); |
||||
|
for (Element e : elements) { |
||||
|
String title = e.text(); |
||||
|
System.out.println("标题: " + title); |
||||
|
titles.add(title); |
||||
|
} |
||||
|
} catch (Exception ex) { |
||||
|
System.out.println("出错啦: " + ex.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**提问引导**: |
||||
|
1. "如果我想把标题保存到文件,要改哪里?" |
||||
|
2. "如果我想支持另一个网站,它的HTML结构不一样,要怎么办?" |
||||
|
3. "如果我想让输出变成彩色,要改哪里?" |
||||
|
|
||||
|
**痛点提炼**: |
||||
|
> "看到了吗?才60行代码,已经'牵一发而动全身'了。这就是一个'脚本'的宿命——功能全混在一起,改一个小需求,整个文件都要翻。" |
||||
|
> |
||||
|
> "这周我们要解决:**怎么让代码'改起来不疼'?**" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.2 CLI vs GUI:架构选择的思考(10分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "既然要写一个'完整'的爬虫应用,我们有两个选择:图形界面(GUI)或命令行界面(CLI)。为什么我推荐CLI而不是GUI?" |
||||
|
|
||||
|
**对比表格** |
||||
|
|
||||
|
| 维度 | GUI (JavaFX) | CLI (命令行) | |
||||
|
|------|--------------|-------------| |
||||
|
| **代码量** | FXML + Controller + CSS,大量模板代码 | 纯Java,代码量可控 | |
||||
|
| **学习重心** | 布局、控件、事件监听 | 架构、分层、命令路由 | |
||||
|
| **后端能力** | 几乎无关 | 模拟真实服务器开发 | |
||||
|
| **可测试性** | 难(需要UI测试框架) | 易(直接测试Command类) | |
||||
|
| **工程思维** | 弱(关注视觉) | 强(关注逻辑) | |
||||
|
|
||||
|
**核心观点**: |
||||
|
> **CLI更需要MVC!** GUI有现成的事件系统(点击按钮→触发事件),而CLI只有字符流。**没有架构,分分钟写成脚本**。MVC在CLI里是"刚需",不是"装饰"。 |
||||
|
> |
||||
|
> **更深一层**:在GUI里,框架已经硬塞给你一套架构,你只是在填空;但在CLI里,所有结构都必须由你亲手搭建。**当外部约束消失,内部的工程纪律才真正开始建立**——这才是本节课的真正目的。 |
||||
|
|
||||
|
**CLI也能很酷**: |
||||
|
- ANSI彩色输出(红/绿/黄/蓝) |
||||
|
- 表格展示数据 |
||||
|
- 进度条动画 |
||||
|
- 模拟真实大数据开发场景 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.3 MVC分层设计(20分钟) |
||||
|
|
||||
|
#### 4.3.1 MVC的起源与演进 |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "MVC不是新东西,它是1970年代为桌面应用设计的架构思想。但它的核心——'职责分离'——在任何软件里都适用。" |
||||
|
|
||||
|
| 年代 | 场景 | MVC的角色 | |
||||
|
|------|------|----------| |
||||
|
| 1970s | Smalltalk-72 GUI | 最早的用户界面架构 | |
||||
|
| 1990s | Web开发 (Struts) | 后端模板引擎 | |
||||
|
| 2000s | ASP.NET MVC | 现代Web框架 | |
||||
|
| 2020s | CLI + API | 解耦业务逻辑与表现层 | |
||||
|
|
||||
|
#### 4.3.2 从GUI到CLI的映射 |
||||
|
|
||||
|
| GUI组件 | CLI对应 | 说明 | |
||||
|
|--------|--------|------| |
||||
|
| 窗口/按钮 | 命令行输入 | **View = 用户交互** | |
||||
|
| 数据模型 | Article实体类 | **Model = 数据结构** | |
||||
|
| 事件监听 | Command路由 | **Controller = 调度** | |
||||
|
|
||||
|
#### 4.3.3 MVC三层职责 |
||||
|
|
||||
|
**架构图示**: |
||||
|
|
||||
|
``` |
||||
|
┌─────────────────────────────────────────┐ |
||||
|
│ 入口 │ |
||||
|
│ (main方法) │ |
||||
|
└─────────────────┬───────────────────────┘ |
||||
|
│ |
||||
|
▼ |
||||
|
┌─────────────────────────────────────────┐ |
||||
|
│ Controller │ |
||||
|
│ - 接收命令(crawl, help, exit) │ |
||||
|
│ - 分发给对应的Command │ |
||||
|
│ 【口诀】:Controller不管"怎么做", │ |
||||
|
│ 只管"派给谁" │ |
||||
|
└─────────┬───────────────┬───────────────┘ |
||||
|
│ │ |
||||
|
▼ ▼ |
||||
|
┌─────────────────┐ ┌─────────────────┐ |
||||
|
│ Model │ │ View │ |
||||
|
│ - 数据实体 │ │ - 输入解析 │ |
||||
|
│ - 业务逻辑 │ │ - 输出格式化 │ |
||||
|
│ 【口诀】: │ │ 【口诀】: │ |
||||
|
│ Model管"数据" │ │ View管"呈现" │ |
||||
|
└─────────────────┘ └─────────────────┘ |
||||
|
``` |
||||
|
|
||||
|
**三层职责详解** |
||||
|
|
||||
|
| 层级 | 职责 | 典型代码 | 禁止做什么 | |
||||
|
|------|------|----------|------------| |
||||
|
| **Model** | 数据结构 + 业务逻辑 | `class Article { String title; String content; }` | 不能有`System.out.println`,不能有`Scanner` | |
||||
|
| **View** | 接收用户输入 + 格式化输出 | `class ConsoleView { String readInput(); void print(String); }` | 不能写爬虫逻辑,只做"传声筒" | |
||||
|
| **Controller** | 协调调度 | `class CrawlerController { void handle(String cmd) { ... } }` | 不能直接写业务细节,委托给Command | |
||||
|
|
||||
|
#### 4.3.4 类比强化:"餐厅类比" |
||||
|
|
||||
|
> "把MVC想象成一家餐厅: |
||||
|
> - **Model是后厨**:只管做菜(数据加工),不管谁来吃、怎么端 |
||||
|
> - **View是服务员**:只管端菜和收钱(输入输出),不管菜怎么做 |
||||
|
> - **Controller是前台**:只管把顾客的点单传给后厨,把做好的菜端给顾客 |
||||
|
> |
||||
|
> 如果后厨开始管'谁来吃饭',这餐厅就乱了。" |
||||
|
|
||||
|
#### 4.3.5 对"餐厅类比"的批判性思考(关键!) |
||||
|
|
||||
|
**教师导引**: |
||||
|
> "刚才的类比好理解吗?很好。但任何一个类比都有它的边界,如果把它当成真理,就会出问题。现在我们来给这个类比'找茬'。" |
||||
|
|
||||
|
**提问学生**: |
||||
|
1. "后厨真的完全不知道客人是谁吗?如果客人有忌口(比如不吃香菜),这个信息需不需要传到后厨?" |
||||
|
2. "服务员只是端菜吗?在真实餐厅里,服务员经常向后厨反馈'客人觉得今天的菜咸了',这属于View→Model的反向影响吗?" |
||||
|
3. "在这个类比里,我们把前台(Controller)和后厨(Model)的关系说成单向的。但实际上,后厨做完了菜,需要通知前台'菜好了',这不就是**观察者模式**吗?" |
||||
|
|
||||
|
**点明本质**: |
||||
|
> "实际MVC的数据流向常常是**双向**的:Controller调用Model的方法改变数据,Model变化后又通知View更新显示。只不过在本次CLI项目中,我们暂时使用'请求-响应'的单向简化模型——用户输入命令,系统处理,然后立即输出结果。这个简化版够用,但你要知道完整的MVC是更动态的。随着系统复杂,Model层需要一个专门的'仓库类'来管理数据,并通知视图刷新——这正是W10我们将要深入的内容。" |
||||
|
|
||||
|
#### 4.3.6 MVC的数据流向(本课程简化版) |
||||
|
|
||||
|
``` |
||||
|
CLI用户输入 |
||||
|
↓ |
||||
|
View(解析命令字符串) |
||||
|
↓ |
||||
|
Controller(找到对应Command) |
||||
|
↓ |
||||
|
Command.execute()(执行业务逻辑) |
||||
|
↓ |
||||
|
Model(Article数据,目前暂存于List) |
||||
|
↓ |
||||
|
View(display()展示数据) |
||||
|
↓ |
||||
|
CLI终端显示 |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.4 Command模式:可扩展的命令路由(15分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在引入一个设计模式——Command(命令)模式。它的核心思想是:**一个命令就是一个类**。" |
||||
|
|
||||
|
#### 4.4.1 为什么需要Command模式? |
||||
|
|
||||
|
**演示:增加一个命令的代价(switch-case版)** |
||||
|
```java |
||||
|
// 现状代码 |
||||
|
switch (cmd) { |
||||
|
case "crawl": handleCrawl(); break; |
||||
|
case "help": showHelp(); break; |
||||
|
// 如果要增加 list 命令? |
||||
|
// 1. 加 case "list" |
||||
|
// 2. 加 handleList() 方法 |
||||
|
// 3. 可能还要改其他地方... |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**提问**: |
||||
|
- "如果我想增加10个命令,这个类要改多少次?" |
||||
|
- "如果我不小心删了一个case,整个程序还能跑吗?" |
||||
|
|
||||
|
**痛点提炼**: |
||||
|
> "每加一个功能,就要在这个类里戳一个洞。**这就是'肥控制器'陷阱**——所有的逻辑都堆在Controller里,它变成了新的'意大利面'。" |
||||
|
|
||||
|
#### 4.4.2 Command模式的四个要素 |
||||
|
|
||||
|
| 要素 | 角色 | 示例 | |
||||
|
|------|------|------| |
||||
|
| **Command接口** | 抽象的"订单" | `Command` 接口 | |
||||
|
| **ConcreteCommand** | 具体的订单 | `HelpCommand`、`CrawlCommand` | |
||||
|
| **Invoker** | 接单的前台 | `CrawlerController` | |
||||
|
| **Receiver** | 执行者 | `ConsoleView`、`ArticleRepository` | |
||||
|
|
||||
|
#### 4.4.3 Command接口定义 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/command/Command.java |
||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); // 命令名,如 "crawl" |
||||
|
void execute(String[] args, List<Article> articles); // 执行逻辑 |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 4.4.4 Controller的变革(从switch到Map) |
||||
|
|
||||
|
```java |
||||
|
// 修改后的Controller |
||||
|
public class CrawlerController { |
||||
|
private Map<String, Command> commands; // 用Map存命令 |
||||
|
private ConsoleView view; // 持有View以输出错误 |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, List<Article> articles) { |
||||
|
this.view = view; |
||||
|
this.commands = new HashMap<>(); |
||||
|
// 增加命令无需改Controller代码,只需在这里注册 |
||||
|
commands.put("crawl", new CrawlCommand(view)); |
||||
|
commands.put("help", new HelpCommand(view)); |
||||
|
commands.put("list", new ListCommand(view)); |
||||
|
commands.put("exit", new ExitCommand(view)); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
if (input.isEmpty()) return; |
||||
|
String[] parts = input.split("\\s+"); |
||||
|
String cmd = parts[0].toLowerCase(); |
||||
|
|
||||
|
Command command = commands.get(cmd); |
||||
|
if (command == null) { |
||||
|
view.printError("Unknown command: " + cmd); // 通过View输出,而非直接System.out |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// 执行命令,传入参数和文章列表 |
||||
|
command.execute(parts, articles); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**对比表格** |
||||
|
|
||||
|
| 维度 | switch-case | Command模式 | |
||||
|
|------|-------------|-------------| |
||||
|
| 增加命令 | 要改Controller | 新建一个类 | |
||||
|
| 多态体验 | 无 | execute()的多态调用 | |
||||
|
| 可测试性 | 难 | 每个Command可单独测试 | |
||||
|
| 代码量 | 少 | 多,但更清晰 | |
||||
|
|
||||
|
**类比强化**: |
||||
|
> "Command模式就像**酒店的客房服务**:每个服务(清理、送餐、按摩)都是一个独立的部门。前台(Controller)只负责接电话,然后把请求'派发'给对应的部门。部门自己知道怎么干活,不需要前台教。" |
||||
|
> |
||||
|
> "如果想新增一个服务,前台只需要'登记'一下,不需要把现有部门重新装修。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.5 Maven模板与环境(5分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "这周我们不发愁pom.xml配置。我已经把 Maven 模板准备好了,你们只需要解压、打开、运行。" |
||||
|
|
||||
|
**模板使用流程**: |
||||
|
``` |
||||
|
1. 解压 [my-crawler-template.zip] |
||||
|
2. 用 IDEA 打开文件夹 |
||||
|
3. 右键 pom.xml → Maven → Reload Project |
||||
|
4. 运行 App.java |
||||
|
``` |
||||
|
|
||||
|
**标准目录结构**: |
||||
|
``` |
||||
|
src/main/java/com/crawler/ |
||||
|
├── model/ |
||||
|
│ └── Article.java |
||||
|
├── view/ |
||||
|
│ └── ConsoleView.java |
||||
|
├── command/ |
||||
|
│ ├── Command.java (接口) |
||||
|
│ ├── CrawlCommand.java |
||||
|
│ ├── HelpCommand.java |
||||
|
│ ├── ListCommand.java |
||||
|
│ └── ExitCommand.java |
||||
|
└── controller/ |
||||
|
└── CrawlerController.java |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.6 代码落地(20分钟) |
||||
|
|
||||
|
#### 4.6.1 Model层:Article实体 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/model/Article.java |
||||
|
package com.crawler.model; |
||||
|
|
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
|
||||
|
public Article(String title, String url, String content) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { return title; } |
||||
|
public void setTitle(String title) { this.title = title; } |
||||
|
public String getUrl() { return url; } |
||||
|
public void setUrl(String url) { this.url = url; } |
||||
|
public String getContent() { return content; } |
||||
|
public void setContent(String content) { this.content = content; } |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{title='" + title + "', url='" + url + "'}"; |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 4.6.2 View层:ANSI常量集中管理(工程细节!) |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/view/ConsoleView.java |
||||
|
package com.crawler.view; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class ConsoleView { |
||||
|
// ANSI颜色常量——集中管理,避免散落各处 |
||||
|
private static final String ANSI_GREEN = "\033[32m"; |
||||
|
private static final String ANSI_RED = "\033[31m"; |
||||
|
private static final String ANSI_CYAN = "\033[36m"; |
||||
|
private static final String ANSI_RESET = "\033[0m"; |
||||
|
|
||||
|
private Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
public String readLine() { |
||||
|
System.out.print("crawler> "); |
||||
|
return scanner.nextLine().trim(); |
||||
|
} |
||||
|
|
||||
|
public void print(String msg) { |
||||
|
System.out.println(msg); |
||||
|
} |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
print(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printError(String msg) { |
||||
|
print(ANSI_RED + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printInfo(String msg) { |
||||
|
print(ANSI_CYAN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
// 展示文章列表 |
||||
|
public void display(List<Article> articles) { |
||||
|
if (articles.isEmpty()) { |
||||
|
printInfo("No articles yet. Use 'crawl <url>' first."); |
||||
|
return; |
||||
|
} |
||||
|
print("+----------+--------------------------------+"); |
||||
|
print("| Title | URL |"); |
||||
|
print("+----------+--------------------------------+"); |
||||
|
for (Article a : articles) { |
||||
|
String title = a.getTitle(); |
||||
|
if (title.length() > 10) title = title.substring(0, 10) + ".."; |
||||
|
String url = a.getUrl(); |
||||
|
if (url.length() > 30) url = url.substring(0, 27) + "..."; |
||||
|
print("| " + String.format("%-10s", title) + " | " + url + " |"); |
||||
|
} |
||||
|
print("+----------+--------------------------------+"); |
||||
|
printInfo("Total: " + articles.size() + " articles"); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师提示**: |
||||
|
> "注意:所有ANSI转义码都被定义为`private static final`常量。如果把`\033[32m`散落在项目各处,一旦想调整颜色,就得满世界去改——这正是我们之前痛批的'意大利面'。**这就是工程细节**。" |
||||
|
|
||||
|
#### 4.6.3 Command接口与四个实现(全部通过View输出) |
||||
|
|
||||
|
```java |
||||
|
// Command.java |
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, List<Article> articles); |
||||
|
} |
||||
|
|
||||
|
// HelpCommand.java |
||||
|
public class HelpCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public HelpCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "help"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printInfo("Commands: crawl <url>, list, help, exit"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ListCommand.java |
||||
|
public class ListCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ListCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "list"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.display(articles); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// CrawlCommand.java (存根) |
||||
|
public class CrawlCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public CrawlCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "crawl"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
view.printInfo("Stub: Would crawl " + args[1]); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ExitCommand.java |
||||
|
public class ExitCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ExitCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "exit"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printSuccess("Bye!"); // 全部输出都通过View,绝不让System.out直接出现在这里 |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**故意埋设的"找茬点"**: |
||||
|
> "我在刚才的代码里有没有隐藏违反MVC原则的地方?`CrawlCommand`的存根里,`view.printInfo("Stub: Would crawl " + args[1]);` —— 这个字符串拼接算是"业务逻辑"吗?留给大家用AI架构审计时讨论。 |
||||
|
|
||||
|
#### 4.6.4 Controller:Map路由(全部通过View输出) |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/controller/CrawlerController.java |
||||
|
package com.crawler.controller; |
||||
|
|
||||
|
import com.crawler.command.*; |
||||
|
import com.crawler.model.Article; |
||||
|
import com.crawler.view.ConsoleView; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
private Map<String, Command> commands = new HashMap<>(); |
||||
|
private ConsoleView view; // 持有View |
||||
|
private List<Article> articles; |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, List<Article> articles) { |
||||
|
this.view = view; |
||||
|
this.articles = articles; |
||||
|
commands.put("help", new HelpCommand(view)); |
||||
|
commands.put("list", new ListCommand(view)); |
||||
|
commands.put("crawl", new CrawlCommand(view)); |
||||
|
commands.put("exit", new ExitCommand(view)); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
if (input.isEmpty()) return; |
||||
|
String[] parts = input.split("\\s+"); |
||||
|
String cmdName = parts[0].toLowerCase(); |
||||
|
|
||||
|
Command cmd = commands.get(cmdName); |
||||
|
if (cmd == null) { |
||||
|
view.printError("Unknown command: " + cmdName); // 错误信息也走View! |
||||
|
return; |
||||
|
} |
||||
|
cmd.execute(parts, articles); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 4.6.5 main方法:组装 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/App.java |
||||
|
package com.crawler; |
||||
|
|
||||
|
import com.crawler.controller.CrawlerController; |
||||
|
import com.crawler.model.Article; |
||||
|
import com.crawler.view.ConsoleView; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class App { |
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
CrawlerController controller = new CrawlerController(view, articles); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler!"); |
||||
|
view.printInfo("Type 'help' for commands."); |
||||
|
|
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 4.6.6 架构反思与展望:共享List<Article>的隐患(关键!) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在这个架构已经可用了。但请大家审视一下:我们所有的Command都直接拿到了`List<Article>`的引用。换句话说,任何一个命令都可以随意增、删、改这个列表。" |
||||
|
> |
||||
|
> "这就好像一家酒店,所有服务员、厨师、清洁工都能随意进出保险箱——**数据结构完全裸奔了**。" |
||||
|
|
||||
|
**提问**: |
||||
|
- "如果CrawlCommand不小心写错了代码,把一个null塞进articles,HelpCommand会不会受影响?" |
||||
|
- "如果未来我们要在添加文章时也写入日志文件,现在的设计能优雅实现吗?还是得在所有Command里分别加日志代码?" |
||||
|
|
||||
|
**预告解决方案**: |
||||
|
> "下周,我们将引入**策略模式**和一个真正的**Model仓库层(ArticleRepository)**。这个仓库会把`List`封装起来,对外只提供`add()`、`getAll()`等安全接口。任何命令想修改数据,都必须通过仓库。这就是从'数据结构'到'模型层'的进化——我们W9先搭骨架,W10给它装上盔甲。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.7 实践任务(5分钟) |
||||
|
|
||||
|
**任务要求**: |
||||
|
1. 使用Maven模板创建项目 |
||||
|
2. 实现完整包结构(model/view/command/controller) |
||||
|
3. 实现4个Command:help/list/crawl/exit |
||||
|
4. `list`命令能展示已抓取的文章 |
||||
|
5. 运行并测试循环 |
||||
|
6. **代码找茬(额外加分)**:找出你自己代码中是否存在`System.out`直接调用、硬编码ANSI字符串等"越权行为" |
||||
|
|
||||
|
**验收标准**: |
||||
|
- [x] Maven编译通过 |
||||
|
- [x] Command接口和4个实现分离在不同文件 |
||||
|
- [x] Controller里没有switch-case |
||||
|
- [x] 新增命令只需新建类,不改Controller |
||||
|
- [x] list命令能正确显示空列表 |
||||
|
- [x] 所有输出均通过ConsoleView完成,无直接System.out.println(main除外) |
||||
|
- [x] ANSI颜色码集中定义为View常量 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 五、课后作业 |
||||
|
|
||||
|
### 5.1 必做任务 |
||||
|
|
||||
|
1. **完善Article**:增加`author`、`publishDate`字段 |
||||
|
2. **★ HistoryCommand(强制作业)**: |
||||
|
- 实现`history`命令,记录用户输入过的所有命令 |
||||
|
- 使用`List<String>`存储历史(复习W8集合) |
||||
|
- 示例输出: |
||||
|
``` |
||||
|
crawler> history |
||||
|
1. help |
||||
|
2. list |
||||
|
3. crawl https://example.com |
||||
|
``` |
||||
|
3. **AI架构审计**:将类名和方法名发给AI,指令: |
||||
|
> "作为Java架构审计师,请检查我的MVC三层划分是否存在越权行为?Model层是否包含输入输出代码?View层是否越权写了业务逻辑?有没有地方直接使用了System.out或硬编码ANSI码?" |
||||
|
|
||||
|
### 5.2 选做任务 |
||||
|
|
||||
|
1. **命令别名**:给`crawl`增加别名`c`,`help`增加别名`h` |
||||
|
2. **URL验证**:检查URL格式是否以http://或https://开头 |
||||
|
3. **暗色主题**:实现不同的配色方案(利用View中的ANSI常量,只需修改一处即可) |
||||
|
4. **思考并回答**:分析`List<Article>`共享引用的潜在风险,写一段200字的小结 |
||||
|
|
||||
|
### 5.3 思考题 |
||||
|
|
||||
|
1. **Command vs switch-case**:增加10个命令,哪种方式代码改动量更小? |
||||
|
2. **如果不用Command接口,直接用Map存命令类行不行?** 接口的意义是什么? |
||||
|
3. **Controller里的`commands.put()`能否减少?** 提示:思考"注册机制" |
||||
|
4. **为什么ExitCommand里的`view.printSuccess("Bye!")`比直接`System.out.println`更"MVC"?** 提示:回忆View的职责 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 六、AI协同升级 |
||||
|
|
||||
|
### 架构审计师任务(必做) |
||||
|
|
||||
|
**学生执行步骤**: |
||||
|
1. 列出项目中所有类名(不含方法实现) |
||||
|
2. 将类名列表发给AI |
||||
|
3. 输入指令: |
||||
|
> "作为Java架构审计师,请检查我的MVC三层划分是否清晰。Model层是否包含了不应该有的代码(Scanner/System.out)?View层是否越权写了业务逻辑?请指出任何一处直接使用System.out.println的地方,并建议如何改正。" |
||||
|
|
||||
|
**预期AI输出**: |
||||
|
- 指出哪一层有越权行为 |
||||
|
- 建议如何整改 |
||||
|
- 评价整体架构健康度 |
||||
|
|
||||
|
### 进阶AI探究(选做) |
||||
|
|
||||
|
> "假设我的Command接口中execute方法接收了一个`List<Article>`参数,请分析这种设计在工程上有什么隐患,并给出重构建议。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 七、教学反思与调整记录 |
||||
|
|
||||
|
| 日期 | 事项 | 调整内容 | |
||||
|
|------|------|----------| |
||||
|
| 2026-04-28 | 首次编写 | 基于CLI+MVC重构 | |
||||
|
| 2026-04-30 | 教授反馈 | 引入Command模式、提供Maven模板、升级AI协同比 | |
||||
|
| 2026-04-30 | 逻辑重排 | 按"问题→选择→架构→模式"顺序重写 | |
||||
|
| 2026-05-01 | v2 vs V3合并 | 融合深度改进:增加教育哲学、批判性思考、ANSI常量、共享List隐患、故意埋坑 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 附录1:Maven模板说明 |
||||
|
|
||||
|
> 老师提供`my-crawler-template.zip`压缩包,包含: |
||||
|
> - pom.xml(含Jsoup依赖) |
||||
|
> - 空的src/main/java结构 |
||||
|
> - .gitignore |
||||
|
|
||||
|
## 附录2:常见问题速查 |
||||
|
|
||||
|
| 问题 | 解答 | |
||||
|
|------|------| |
||||
|
| IDEA不识别pom.xml | 右键 pom.xml → Maven → Reload Project | |
||||
|
| 中文乱码 | Settings → Editor → File Encodings → UTF-8 | |
||||
|
| 包名大小写 | 包名全小写,类名首字母大写 | |
||||
|
| Command找不到 | 检查是否 implements Command,是否 @Override getName() | |
||||
|
| 命令不生效 | 检查 commands.put() 是否注册了该命令 | |
||||
|
| 输出颜色乱码 | IDEA控制台需支持ANSI,Windows下建议使用Windows Terminal或调整设置 | |
||||
|
| 我的System.out为什么被老师说越权 | View层才是与用户交互的唯一出口,所有输出都应通过View,这样将来改成GUI或日志时只需改View | |
||||
|
|
||||
|
## 附录3:教学逻辑说明 |
||||
|
|
||||
|
| 顺序 | 内容 | 设计理由 | |
||||
|
|------|------|----------| |
||||
|
| 1 | 痛点引入 | 从问题出发,让学生感受"为什么需要架构" | |
||||
|
| 2 | CLI vs GUI | 解释技术选型,建立"工程思维 > 视觉装饰"的认知 | |
||||
|
| 3 | MVC分层 | 核心架构概念,理解职责分离,通过类比及批判加深理解 | |
||||
|
| 4 | Command模式 | 具体实现方式,解决"肥控制器"问题 | |
||||
|
| 5 | Maven | 工具链支持 | |
||||
|
| 6 | 代码落地 | 实践验证,刻意植入细节规范,训练工程洁癖 | |
||||
|
| 7 | 架构反思 | 暴露共享可变状态隐患,为W10策略模式+仓库层做铺垫 | |
||||
|
| 8 | 实践任务 | 现场编码验证 | |
||||
|
| 9 | 总结 | 强化认知,预告下周 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 版本说明 |
||||
|
|
||||
|
- **v1**:首次编写,CLI+MVC基础框架 |
||||
|
- **v2**:按"问题→选择→架构→模式"逻辑重排 |
||||
|
- **v3 (本版)**:融合v2结构 + V3深度改进,包含: |
||||
|
- 更深的CLI教育哲学 |
||||
|
- 餐厅类比批判性思考 |
||||
|
- ANSI常量集中管理工程细节 |
||||
|
- 全部输出走View |
||||
|
- 共享List架构隐患反思 |
||||
|
- 故意埋坑让学生找茬 |
||||
|
- W10铺垫(策略模式+仓库层) |
||||
@ -0,0 +1,5 @@ |
|||||
|
#Generated by Maven |
||||
|
#Thu Apr 30 11:50:54 CST 2026 |
||||
|
artifactId=datacollect-cli |
||||
|
groupId=com.example |
||||
|
version=0.1.0 |
||||
@ -0,0 +1,15 @@ |
|||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\command\CrawlCommand.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\model\Article.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\command\HelpCommand.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\Main.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\view\ConsoleView.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\strategy\BlogStrategy.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\strategy\StrategyFactory.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\repository\ArticleRepository.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\strategy\NewsStrategy.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\controller\CrawlerController.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\command\ListCommand.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\command\ExitCommand.java |
||||
|
K:\teach-space\java-cli\src\main\java\com\example\datacollect\command\Command.java |
||||
@ -0,0 +1,530 @@ |
|||||
|
## 高级程序设计 · 第9周 |
||||
|
|
||||
|
#### 工程架构:从"写代码"到"造系统" |
||||
|
|
||||
|
##### CLI + MVC + Command模式实战 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 📌 本周导航 |
||||
|
|
||||
|
- 痛点引入:脚本的宿命 |
||||
|
- CLI vs GUI:为什么选命令行? |
||||
|
- MVC分层:职责分离的艺术 |
||||
|
- Command模式:可扩展的路由 |
||||
|
- Maven模板:工程化第一步 |
||||
|
- 代码落地:从接口到实现 |
||||
|
- 架构反思:共享数据的隐患 |
||||
|
- 实践任务 + 课后作业 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 1️⃣ 痛点引入:从脚本到工程的鸿沟 |
||||
|
|
||||
|
#### 这是一段“意大利面”爬虫 |
||||
|
|
||||
|
```java |
||||
|
public class Crawler { |
||||
|
public static void main(String[] args) { |
||||
|
System.out.print("请输入URL: "); |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
String url = scanner.nextLine(); |
||||
|
List titles = new ArrayList(); |
||||
|
try { |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
Elements elements = doc.select(".post-title"); |
||||
|
for (Element e : elements) { |
||||
|
String title = e.text(); |
||||
|
System.out.println("标题: " + title); |
||||
|
titles.add(title); |
||||
|
} |
||||
|
} catch (Exception ex) { |
||||
|
System.out.println("出错啦: " + ex.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 脚本的三大痛点 |
||||
|
|
||||
|
| 需求 | 需要改哪里? | |
||||
|
|------|--------------| |
||||
|
| 保存标题到文件 | 改 main 内部逻辑 | |
||||
|
| 支持不同网站结构 | 全部重写解析代码 | |
||||
|
| 彩色输出 | 一个一个改 print | |
||||
|
|
||||
|
> 😫 **牵一发而动全身 → 改起来疼** |
||||
|
|
||||
|
### 本周目标:**让代码“改起来不疼”** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 2️⃣ CLI vs GUI:架构选择的思考 |
||||
|
|
||||
|
### 图形界面 vs 命令行 |
||||
|
|
||||
|
| 维度 | GUI (JavaFX) | CLI (命令行) | |
||||
|
|------|--------------|-------------| |
||||
|
| 学习重心 | 布局、控件、事件 | **架构、分层、路由** | |
||||
|
| 后端能力 | 弱 | 模拟真实服务器 | |
||||
|
| 工程思维 | 弱(关注视觉) | **强(关注逻辑)** | |
||||
|
| 可测试性 | 难 | 易 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 核心观点 |
||||
|
|
||||
|
> **CLI 更需要 MVC!** |
||||
|
|
||||
|
- GUI 有现成事件系统,框架强塞给你一套架构 |
||||
|
- CLI 只有字符流 → **没有架构,分分钟写成脚本** |
||||
|
|
||||
|
> 🎯 **当外部约束消失,内部的工程纪律才真正开始建立** |
||||
|
|
||||
|
### CLI 也能很酷 |
||||
|
|
||||
|
- ANSI 彩色输出 |
||||
|
- 表格展示数据 |
||||
|
- 模拟大数据/后端开发 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 3️⃣ MVC 分层设计 |
||||
|
|
||||
|
### MVC 的起源与演进 |
||||
|
|
||||
|
| 年代 | 场景 | MVC的角色 | |
||||
|
|------|------|----------| |
||||
|
| 1970s | Smalltalk-72 GUI | 最早的用户界面架构 | |
||||
|
| 1990s | Web开发 (Struts) | 后端模板引擎 | |
||||
|
| 2000s | ASP.NET MVC | 现代Web框架 | |
||||
|
| 2020s | CLI + API | 解耦业务逻辑与表现层 | |
||||
|
|
||||
|
**核心不变:职责分离** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## MVC 三层职责 |
||||
|
|
||||
|
![[mvc.png]] |
||||
|
``` |
||||
|
┌─────────────────────────────────────────┐ |
||||
|
│ 入口 │ |
||||
|
│ (main方法) │ |
||||
|
└─────────────────┬───────────────────────┘ |
||||
|
▼ |
||||
|
┌─────────────────────────────────────────┐ |
||||
|
│ Controller │ |
||||
|
│ 只管"派给谁",不管"怎么做" │ |
||||
|
└─────────┬───────────────┬───────────────┘ |
||||
|
▼ ▼ |
||||
|
┌─────────────────┐ ┌─────────────────┐ |
||||
|
│ Model │ │ View │ |
||||
|
│ 管"数据" │ │ 管"呈现" │ |
||||
|
│ + 业务逻辑 │ │ + 输入输出 │ |
||||
|
└─────────────────┘ └─────────────────┘ |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 三层“禁止做什么” |
||||
|
|
||||
|
| 层级 | 禁止行为 | |
||||
|
| -------------- | -------------------------------------- | |
||||
|
| **Model** | 不能有 `System.out.println`,不能有 `Scanner` | |
||||
|
| **View** | 不能写爬虫逻辑,只做“传声筒” | |
||||
|
| **Controller** | 不能直接写业务细节,委托给 Command | |
||||
|
|
||||
|
> 🔴 **越权就是架构腐败的开始** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🍽️ 餐厅类比(帮助理解) |
||||
|
|
||||
|
- **Model = 后厨**:只管做菜,不管谁来吃、怎么端 |
||||
|
- **View = 服务员**:只管端菜和收钱,不管菜怎么做 |
||||
|
- **Controller = 前台**:接单 → 派给后厨 → 叫服务员上菜 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🤔 对类比的批判性思考(关键!) |
||||
|
|
||||
|
> 任何类比都有边界,不要当成真理 |
||||
|
|
||||
|
| 场景 | 暴露的问题 | |
||||
|
|------|------------| |
||||
|
| 客人有忌口(不吃香菜) | 信息需要传到后厨 → Model 可能需要知道 meta 信息 | |
||||
|
| 服务员反馈“今天的菜咸了” | View → Model 反向影响 | |
||||
|
| 后厨做完菜通知前台 | **观察者模式**,数据流可能是双向的 | |
||||
|
|
||||
|
**本课程简化模型**:请求-响应,单向流 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## MVC 数据流向(本课程简化版) |
||||
|
|
||||
|
``` |
||||
|
CLI用户输入 |
||||
|
↓ |
||||
|
View(解析命令字符串) |
||||
|
↓ |
||||
|
Controller(找到对应Command) |
||||
|
↓ |
||||
|
Command.execute()(执行业务逻辑) |
||||
|
↓ |
||||
|
Model(Article数据,暂存于List) |
||||
|
↓ |
||||
|
View(display()展示数据) |
||||
|
↓ |
||||
|
CLI终端显示 |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 4️⃣ Command 模式:可扩展的命令路由 |
||||
|
|
||||
|
### 为什么需要 Command 模式? |
||||
|
|
||||
|
```java |
||||
|
switch (cmd) { |
||||
|
case "crawl": handleCrawl(); break; |
||||
|
case "help": showHelp(); break; |
||||
|
// 如果要增加 list 命令? |
||||
|
// 1. 加 case "list" |
||||
|
// 2. 加 handleList() 方法 |
||||
|
// 3. 可能还要改其他地方... |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 每加一个功能,就要在这个类里戳一个洞 → **肥控制器陷阱** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Command 模式的四个要素 |
||||
|
|
||||
|
| 要素 | 角色 | 示例 | |
||||
|
|------|------|------| |
||||
|
| Command接口 | 抽象的“订单” | `Command` | |
||||
|
| ConcreteCommand | 具体的订单 | `HelpCommand` | |
||||
|
| Invoker | 接单的前台 | `CrawlerController` | |
||||
|
| Receiver | 执行者 | `ConsoleView`、`ArticleRepository` | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Command 接口定义 |
||||
|
|
||||
|
```java |
||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, List<Article> articles); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Controller 的变革:从 switch 到 Map |
||||
|
|
||||
|
```java |
||||
|
public class CrawlerController { |
||||
|
private Map<String, Command> commands = new HashMap<>(); |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, List<Article> articles) { |
||||
|
commands.put("help", new HelpCommand(view)); |
||||
|
commands.put("list", new ListCommand(view)); |
||||
|
commands.put("crawl", new CrawlCommand(view)); |
||||
|
commands.put("exit", new ExitCommand(view)); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
// 解析命令 → 从 Map 取 Command → 调用 execute |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> **增加新命令:只需新建类,Controller 零改动!** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 对比:switch-case vs Command |
||||
|
|
||||
|
| 维度 | switch-case | Command模式 | |
||||
|
|------|-------------|-------------| |
||||
|
| 增加命令 | 要改 Controller | 新建一个类 | |
||||
|
| 多态体验 | 无 | `execute()` 多态 | |
||||
|
| 可测试性 | 难 | 每个 Command 单独测试 | |
||||
|
| 代码量 | 少 | 多,但更清晰 | |
||||
|
|
||||
|
> 🏨 **类比:酒店客房服务,前台只负责派单** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 5️⃣ Maven 模板与环境(5分钟) |
||||
|
|
||||
|
### 直接使用模板,不折腾配置 |
||||
|
|
||||
|
``` |
||||
|
my-crawler-template.zip |
||||
|
↓ 解压 + IDEA打开 |
||||
|
↓ 右键 pom.xml → Maven → Reload Project |
||||
|
↓ 运行 App.java |
||||
|
``` |
||||
|
|
||||
|
### 标准目录结构 |
||||
|
|
||||
|
``` |
||||
|
src/main/java/com/crawler/ |
||||
|
├── model/Article.java |
||||
|
├── view/ConsoleView.java |
||||
|
├── command/ |
||||
|
│ ├── Command.java |
||||
|
│ ├── CrawlCommand.java |
||||
|
│ ├── HelpCommand.java |
||||
|
│ ├── ListCommand.java |
||||
|
│ └── ExitCommand.java |
||||
|
└── controller/CrawlerController.java |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 6️⃣ 代码落地(分步实现) |
||||
|
|
||||
|
### Model:Article 实体 |
||||
|
|
||||
|
```java |
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
// 构造器、getter/setter、toString |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 📦 只存放数据,没有任何输入输出代码 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## View:ConsoleView(ANSI常量集中管理) |
||||
|
|
||||
|
```java |
||||
|
public class ConsoleView { |
||||
|
private static final String ANSI_GREEN = "\033[32m"; |
||||
|
private static final String ANSI_RED = "\033[31m"; |
||||
|
// ... 其他常量 |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
public void printError(String msg) { ... } |
||||
|
public void display(List<Article> articles) { ... } |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> ✨ **所有颜色码集中定义 → 改主题只需改一处** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Command 实现示例(HelpCommand) |
||||
|
|
||||
|
```java |
||||
|
public class HelpCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public HelpCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "help"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printInfo("Commands: crawl <url>, list, help, exit"); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> ⚠️ 全部输出通过 `view`,绝不让 `System.out` 直接出现在这里 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## CrawlCommand(存根,下周填坑) |
||||
|
|
||||
|
```java |
||||
|
public class CrawlCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public CrawlCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "crawl"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
view.printInfo("Stub: Would crawl " + args[1]); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 🔍 **找茬点**:这里拼接字符串算是“业务逻辑”吗?留给大家用 AI 审计。 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## ExitCommand |
||||
|
|
||||
|
```java |
||||
|
public class ExitCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ExitCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "exit"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printSuccess("Bye!"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> ✅ 所有输出都通过 View → 将来改 GUI 只需换 View 实现 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Controller + main 组装 |
||||
|
|
||||
|
```java |
||||
|
// Controller 中持有 Map<String,Command> |
||||
|
// App.java 中: |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
CrawlerController controller = new CrawlerController(view, articles); |
||||
|
view.printSuccess("Welcome to CLI Crawler!"); |
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 🔁 完成交互循环 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 7️⃣ 架构反思:共享 List<Article> 的隐患 |
||||
|
|
||||
|
### 当前问题 |
||||
|
|
||||
|
- 所有 Command 都直接拿到 `List<Article>` 引用 |
||||
|
- 任何一个命令都可以随意增、删、改列表 |
||||
|
- 数据完全“裸奔” |
||||
|
|
||||
|
> 🚨 就像酒店所有员工都能进保险箱 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 提问 |
||||
|
|
||||
|
- 如果 `CrawlCommand` 不小心把 `null` 塞进列表,`ListCommand` 会怎样? |
||||
|
- 如果我们要在添加文章时写日志,现在的设计能优雅实现吗? |
||||
|
|
||||
|
### 预告解决方案(W10) |
||||
|
|
||||
|
- **策略模式** + **仓库层(ArticleRepository)** |
||||
|
- 封装 `List`,对外只暴露 `add()`、`getAll()` 等安全接口 |
||||
|
|
||||
|
> W9 搭骨架,W10 装上盔甲 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 8️⃣ 实践任务(现场5分钟) |
||||
|
|
||||
|
### 必做项 |
||||
|
|
||||
|
1. 使用 Maven 模板创建项目 |
||||
|
2. 实现完整包结构(model/view/command/controller) |
||||
|
3. 实现 4 个 Command:help / list / crawl / exit |
||||
|
4. `list` 能展示已抓取的文章(目前存根即可) |
||||
|
5. 运行并测试循环 |
||||
|
|
||||
|
### 额外加分:代码找茬 |
||||
|
|
||||
|
- 检查是否仍有 `System.out` 直接调用 |
||||
|
- 检查 ANSI 码是否硬编码在多个地方 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 验收标准 |
||||
|
|
||||
|
- [x] Maven 编译通过 |
||||
|
- [x] Command 接口和 4 个实现在不同文件 |
||||
|
- [x] Controller 里没有 switch-case |
||||
|
- [x] 新增命令只需新建类,不改 Controller |
||||
|
- [x] list 能正确显示空列表 |
||||
|
- [x] 所有输出均通过 `ConsoleView` |
||||
|
- [x] ANSI 颜色码集中定义为常量 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 9️⃣ 课后作业 |
||||
|
|
||||
|
### 必做 |
||||
|
|
||||
|
1. **完善 Article**:增加 `author`、`publishDate` 字段 |
||||
|
2. **★ HistoryCommand**:记录用户输入过的所有命令(用 `List<String>`) |
||||
|
3. **AI 架构审计**:将类名发给 AI,指令: |
||||
|
> “作为Java架构审计师,请检查我的MVC三层划分是否存在越权行为?” |
||||
|
|
||||
|
### 选做 |
||||
|
|
||||
|
- 命令别名(c 代替 crawl) |
||||
|
- URL 格式验证 |
||||
|
- 暗色主题(修改一处常量) |
||||
|
- 思考题:分析 `List<Article>` 共享引用的风险(200字小结) |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🤖 AI 协同升级 |
||||
|
|
||||
|
### 架构审计师任务(必做) |
||||
|
|
||||
|
**步骤**: |
||||
|
1. 列出所有类名(不含方法实现) |
||||
|
2. 发给 AI |
||||
|
3. 指令:“检查 MVC 分层是否清晰,是否有越权行为” |
||||
|
|
||||
|
### 进阶探究(选做) |
||||
|
|
||||
|
> “假设我的 Command 接口中 execute 方法接收了一个 `List<Article>` 参数,请分析这种设计在工程上有什么隐患,并给出重构建议。” |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 📚 总结与过渡 |
||||
|
|
||||
|
### 本周成果 |
||||
|
|
||||
|
- ✅ 工程化包结构 |
||||
|
- ✅ MVC 分层清晰 |
||||
|
- ✅ Command 模式实现可扩展路由 |
||||
|
- ✅ 所有输出走 View,常量集中管理 |
||||
|
|
||||
|
### 下周预告 |
||||
|
|
||||
|
- **策略模式**:封装爬取算法 |
||||
|
- **仓库层(Repository)**:武装 `List<Article>`,解决共享隐患 |
||||
|
|
||||
|
> 🚀 从“写代码”到“造系统”,踏出坚实第一步! |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Q&A |
||||
|
|
||||
|
### 常见问题 |
||||
|
|
||||
|
| 问题 | 解答 | |
||||
|
|------|------| |
||||
|
| IDEA 不识别 pom.xml | 右键 → Maven → Reload Project | |
||||
|
| 中文乱码 | Settings → File Encodings → UTF-8 | |
||||
|
| 输出颜色乱码 | Windows 建议使用 Windows Terminal | |
||||
|
| 我的 System.out 被批评 | View 才是唯一输出出口 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 谢谢! |
||||
|
|
||||
|
### 课件已上传,模板在课程群 |
||||
|
|
||||
|
**保持工程洁癖,下周见!** |
||||
@ -0,0 +1,705 @@ |
|||||
|
# 教案:《高级程序设计》第10周——设计模式:灵活性与可扩展性 |
||||
|
|
||||
|
| 项目 | 内容 | |
||||
|
| -------- | ---------------------------------------------------------------------------- | |
||||
|
| **课程名称** | 高级程序设计 | |
||||
|
| **周次** | 第10周 | |
||||
|
| **主题** | 设计模式——灵活性与可扩展性 | |
||||
|
| **学时** | 2学时(90分钟) | |
||||
|
| **授课对象** | 已完成第9周CLI+MVC架构学习,具备Command模式基础 | |
||||
|
| **教学环境** | JDK 17+、IntelliJ IDEA、Maven | |
||||
|
| **前情提要** | W9搭建了CLI骨架:MVC分层 + Command路由,但留下了两大隐患——解析逻辑耦合在Command中、List\<Article\>共享引用裸奔 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 教学调整说明:为什么W10要在“骨架”上装“盔甲”? |
||||
|
|
||||
|
> **W9成果**:一个可扩展的命令行骨架 → **W9痛点**:解析器与数据存储仍在“裸奔” |
||||
|
|
||||
|
| 维度 | W9状态 | W10目标 | |
||||
|
|------|--------|---------| |
||||
|
| **架构** | MVC分层清晰 | MVC + 策略模式 + 仓库层 | |
||||
|
| **命令扩展** | 新增命令不改Controller | 新增解析器不改任何旧代码 | |
||||
|
| **数据安全** | List\<Article\>全员可写 | Repository封装,只暴露安全接口 | |
||||
|
| **解析逻辑** | 硬编码在CrawlCommand内 | 策略模式,按URL自动匹配 | |
||||
|
| **代码量** | ~8个类 | ~12个类,但每个更小更纯粹 | |
||||
|
|
||||
|
**决策理由**: |
||||
|
1. W9学生已经感受到Command模式的好处——**多态带来的扩展性** |
||||
|
2. 策略模式是多态思想的又一次实战,是**接口抽象的深化** |
||||
|
3. 仓库层是“封装”这一OOP核心原则的落地,补上W9留下的课 |
||||
|
4. 解析器工厂让学生看到**“自动匹配”**的威力——增加网站支持只需新增一个类 |
||||
|
|
||||
|
**更深层的教育价值**: |
||||
|
> W9教会学生“怎么把代码分开”,W10要教会学生“怎么把代码分开后还能优雅地合上”——**接口即合同,工厂即自动匹配,仓库即数据守卫**。这三句话,就是本周的全部精华。 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 一、教学目标 |
||||
|
|
||||
|
| 目标维度 | 具体描述 | |
||||
|
|----------|----------| |
||||
|
| **知识掌握** | 理解策略模式的定义与多态本质;掌握工厂模式的两类变体(工厂方法/简单工厂)及适用场景;理解仓库模式对数据访问的封装原理。 | |
||||
|
| **工程实践** | 能在爬虫项目中用策略模式封装不同网站的解析逻辑;能实现解析器工厂,根据URL自动匹配解析策略;能用Repository模式替代裸List,提供安全的数据访问接口。 | |
||||
|
| **思维转型** | 从“写死逻辑”转向“策略可插拔”;从“直接操作集合”转向“通过仓库存取”;理解“对扩展开放,对修改关闭”的开闭原则。 | |
||||
|
| **工具应用** | 利用AI审查策略模式实现是否真正解耦;让AI扮演“网站结构分析师”辅助编写具体解析策略;用AI生成Repository的安全接口建议。 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 二、教学重点与难点 |
||||
|
|
||||
|
| 项目 | 内容 | 突破方法 | |
||||
|
|------|------|----------| |
||||
|
| **重点** | 策略模式的多态本质、解析器工厂的自动匹配机制、Repository对数据访问的封装 | 以“新增网站需要改什么”为切入点,展示策略模式的开闭原则达成;通过“攻击”当前List裸奔的问题,引出Repository的必然性 | |
||||
|
| **难点** | 理解“接口即合同”的抽象思维、工厂模式中反射/Map注册的实现、仓库层与Strategy模式的协同 | 用“插座与电器”类比接口标准;现场演示从硬编码→工厂→反射的演进路径;用时序图展示“用户→Command→Strategy→Repository”的完整调用链 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 三、教学过程设计(90分钟) |
||||
|
|
||||
|
| 环节 | 时间 | 教学内容 | 师生活动 | AI协同点 | |
||||
|
| -------------------------- | --- | ----------------------------------------------------------------- | -------------------------------------- | --------------------------- | |
||||
|
| **1. W9回顾与痛点暴露** | 8' | 回顾W9成果(CLI骨架),暴露两大隐患:①CrawlCommand里解析逻辑硬编码;②List\<Article\>全员可读可写 | **教师演示**:展示W9代码,用“事故场景”引发思考 | — | |
||||
|
| **2. 策略模式:解析器的“插头标准化”** | 18' | 策略模式定义、接口设计、多态调用、与Command模式的对比 | **类比**:插座与电器;**教师演示**:从if-else到策略模式的演进 | 让AI生成“策略模式vs switch-case”对比 | |
||||
|
| **3. 解析器工厂:自动匹配的魔法** | 14' | 工厂模式的两种形态(简单工厂→Map注册工厂),解析器工厂实现 | **教师演示**:先用if-else判断host,再升级为Map注册工厂 | 让AI解释工厂模式与策略模式如何协同 | |
||||
|
| **4. Repository模式:武装数据访问** | 12' | Repository定义、接口设计、替换List\<Article\>后的影响 | **教师演示**:在原代码中把List替换为Repository,展示改动点 | 学生用AI审计Repository接口的“最小完备性” | |
||||
|
| **5. 整体架构串联** | 8' | 用一张时序图串联:用户→CLI→Controller→Command→Strategy→Repository→Model | **师生互动**:让学生在白板上画出调用链 | — | |
||||
|
| **6. 代码落地** | 20' | 实现CrawlStrategy接口 + 两个策略 + 解析器工厂 + ArticleRepository | **教师演示**:分步写出代码,刻意埋入“策略匹配失败”的异常处理 | 完成后用AI检查策略模式实现 | |
||||
|
| **7. 架构反思与W11预告** | 5' | 当前架构还有什么隐患?(异常处理不统一、日志缺失)→ 预告W11健壮性工程 | **师生互动**:如果解析器工厂找不到匹配策略,会发生什么? | — | |
||||
|
| **8. 实践任务** | 5' | 实现策略模式和仓库层,完成本周代码升级 | 学生现场编码,教师巡视 | — | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 四、核心教学内容脚本 |
||||
|
|
||||
|
### 4.1 W9回顾与痛点暴露(8分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "上节课我们搭了一个很漂亮的骨架——CLI+MVC+Command模式。我们先来表扬一下自己:新增一个命令,只要新建一个类,Controller零改动。但请大家想一个问题——" |
||||
|
|
||||
|
**投影展示W9的CrawlCommand存根**: |
||||
|
```java |
||||
|
public class CrawlCommand implements Command { |
||||
|
// ... |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
view.printInfo("Stub: Would crawl " + args[1]); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**提问引导**: |
||||
|
1. "这个存根下周要填坑了。假设我们现在要真正实现爬取,代码写在哪?" |
||||
|
2. "如果我要支持两个网站——比如一个技术博客和一个新闻网站——它们的HTML结构完全不一样,这个`execute`方法会变成什么样?" |
||||
|
|
||||
|
**展示“噩梦版”CrawlCommand**: |
||||
|
```java |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
String url = args[1]; |
||||
|
// 五十行if-else地狱... |
||||
|
if (url.contains("blog.example.com")) { |
||||
|
// 解析技术博客的HTML |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
Elements titles = doc.select(".post-title"); |
||||
|
for (Element e : titles) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
} else if (url.contains("news.example.com")) { |
||||
|
// 解析新闻网站的HTML |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
Elements items = doc.select(".article-headline"); |
||||
|
for (Element e : items) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
} else { |
||||
|
view.printError("Unsupported website!"); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**痛点提炼**: |
||||
|
> "看到了吗?每支持一个新网站,就要在这里加一个`else if`。这就是W1我们痛批的'牵一发而动全身',只不过这次灾难地点从`main`搬到了`CrawlCommand`。" |
||||
|
> |
||||
|
> "更重要的是,我们上节课辛辛苦苦实现了Command模式,难道解析逻辑又要回到if-else地狱吗?**这就是W10要解决的第一个问题:怎么让解析逻辑也可插拔?**" |
||||
|
|
||||
|
**第二个隐患——共享状态的回顾**: |
||||
|
> "还有一件事,我们上节课结束前提到的:`List<Article> articles`在所有Command之间共享。任何一个Command都可以往里面塞东西、删东西、甚至清空。这是W10要解决的第二个问题:**怎么给数据装上'防盗门'?**" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.2 策略模式:解析器的“插头标准化”(18分钟) |
||||
|
|
||||
|
#### 4.2.1 从类比切入 |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "先讲个生活场景。你家里墙上有一个三孔插座,你可以插电视、插电脑、插手机充电器——任何符合这个标准的电器都能用。插座不在乎你是什么电器,它只认接口标准。" |
||||
|
|
||||
|
**类比映射**: |
||||
|
|
||||
|
| 生活场景 | 代码对应 | |
||||
|
|----------|----------| |
||||
|
| 三孔插座 | `CrawlStrategy` 接口 | |
||||
|
| 电视/电脑充电器 | 具体解析策略(BlogStrategy/NewsStrategy) | |
||||
|
| 电流 | 输入:URL + Document;输出:List\<Article\> | |
||||
|
| 你(使用者) | CrawlCommand | |
||||
|
| 插座面板 | 解析器工厂 | |
||||
|
|
||||
|
> "策略模式的核心思想就是:**定义一个算法接口,让具体的算法实现可以互相替换,而使用算法的客户端不受影响。**" |
||||
|
|
||||
|
#### 4.2.2 策略模式定义 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/strategy/CrawlStrategy.java |
||||
|
package com.crawler.strategy; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface CrawlStrategy { |
||||
|
/** |
||||
|
* 从已获取的Document中解析文章列表 |
||||
|
* @param url 原始请求URL(用于填充Article) |
||||
|
* @param doc Jsoup解析后的Document |
||||
|
* @return 解析出的文章列表 |
||||
|
*/ |
||||
|
List<Article> parse(String url, Document doc); |
||||
|
|
||||
|
/** |
||||
|
* 判断此策略是否为给定URL服务 |
||||
|
* @param url 待判断的URL |
||||
|
* @return true表示此策略可以处理该URL |
||||
|
*/ |
||||
|
boolean supports(String url); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "注意,策略接口里有两个方法。`parse`是干活的那个,`supports`是'我能不能干这个活'——这是什么?**这是合同!** 任何网站想被我们爬虫支持,就必须签署这份合同:告诉我你是不是我的客户(supports),以及怎么解析你(parse)。" |
||||
|
|
||||
|
#### 4.2.3 具体策略实现示例 |
||||
|
|
||||
|
```java |
||||
|
// BlogStrategy.java - 技术博客解析策略 |
||||
|
public class BlogStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("blog.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements titles = doc.select(".post-title"); |
||||
|
for (Element e : titles) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// NewsStrategy.java - 新闻网站解析策略 |
||||
|
public class NewsStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("news.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements items = doc.select(".article-headline"); |
||||
|
for (Element e : items) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**对比:策略模式 vs 硬编码if-else** |
||||
|
|
||||
|
| 维度 | if-else屎山 | 策略模式 | |
||||
|
|------|-------------|----------| |
||||
|
| 新增网站 | 改CrawlCommand,加else if | 新写一个类,实现CrawlStrategy | |
||||
|
| 修改解析逻辑 | 在CrawlCommand里翻找对应的else if | 只改对应策略类 | |
||||
|
| 测试 | 必须启动整个爬虫 | 单独对Strategy做单元测试 | |
||||
|
| 是否符合开闭原则 | ❌ 对修改开放 | ✅ 对扩展开放,对修改关闭 | |
||||
|
|
||||
|
**与Command模式的对比(加深理解)**: |
||||
|
> "上节课Command模式,我们为每个命令定义一个类;这节课策略模式,我们为每个网站的解析算法定义一个类。**本质上都是同一个OOP思想:用多态替代条件分支。** 只不过Command的接口是`execute()`,Strategy的接口是`parse()`。" |
||||
|
> |
||||
|
> "这张图你们可以记下来:**接口是消除if-else的利器,多态是接口的灵魂。**" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.3 解析器工厂:自动匹配的魔法(14分钟) |
||||
|
|
||||
|
#### 4.3.1 问题引出 |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在我们有A网站的策略、B网站的策略。问题来了:谁来选策略?谁来遍历所有策略,找到一个supports返回true的?" |
||||
|
> |
||||
|
> "如果把这个逻辑写在CrawlCommand里,那策略模式就白用了——CrawlCommand还是得'知道'有哪些策略。我们要的是一个黑盒子:**把URL丢进去,自动弹出一个合适的解析器。**" |
||||
|
|
||||
|
#### 4.3.2 解析器工厂的实现 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/strategy/StrategyFactory.java |
||||
|
package com.crawler.strategy; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class StrategyFactory { |
||||
|
private final List<CrawlStrategy> strategies = new ArrayList<>(); |
||||
|
|
||||
|
// 注册策略——新的网站只需在这里加一行 |
||||
|
public StrategyFactory() { |
||||
|
strategies.add(new BlogStrategy()); |
||||
|
strategies.add(new NewsStrategy()); |
||||
|
// 未来增加新网站:strategies.add(new XxxStrategy()); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 根据URL自动匹配解析策略 |
||||
|
* @param url 目标URL |
||||
|
* @return 匹配的策略,如果没有匹配返回null |
||||
|
*/ |
||||
|
public CrawlStrategy getStrategy(String url) { |
||||
|
for (CrawlStrategy s : strategies) { |
||||
|
if (s.supports(url)) { |
||||
|
return s; |
||||
|
} |
||||
|
} |
||||
|
return null; // 未找到匹配策略 |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "这个工厂类足够简单:一个List存所有策略,一个方法遍历找到匹配的。但简单不等于不强大。** |
||||
|
> |
||||
|
> **关键点**:新增网站支持,只需要——" |
||||
|
1. 写一个`XxxStrategy`实现`CrawlStrategy` |
||||
|
2. 在工厂构造器里加一行`strategies.add(new XxxStrategy())` |
||||
|
> |
||||
|
> "CrawlCommand一行不改。这就是开闭原则的胜利。" |
||||
|
|
||||
|
#### 4.3.3 从简单工厂到更高级的注册机制(拓展思维) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "有同学可能会问:还要在工厂构造器里加一行,能不能做到完全零改动?当然可以——用反射或者SPI。" |
||||
|
|
||||
|
**演示概念(不要求实现)**: |
||||
|
```java |
||||
|
// 进阶思路:扫描指定包下的所有CrawlStrategy实现类 |
||||
|
// 用反射自动注册,真正做到“新增类即生效” |
||||
|
// 这是Spring框架的核心思想之一 |
||||
|
``` |
||||
|
|
||||
|
> "这个技术我们暂时不要求掌握,但我希望你们知道:你现在写的每一个`new XxxStrategy()`,在未来都可能进化为框架级别的自动装配。**你现在建立的思维习惯,决定了你未来能走多高。**" |
||||
|
|
||||
|
#### 4.3.4 重构后的CrawlCommand |
||||
|
|
||||
|
```java |
||||
|
public class CrawlCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
private StrategyFactory strategyFactory; |
||||
|
private ArticleRepository repository; // 注意:这里是Repository了! |
||||
|
|
||||
|
public CrawlCommand(ConsoleView v, StrategyFactory f, ArticleRepository r) { |
||||
|
this.view = v; |
||||
|
this.strategyFactory = f; |
||||
|
this.repository = r; |
||||
|
} |
||||
|
|
||||
|
public String getName() { return "crawl"; } |
||||
|
|
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
|
||||
|
// 1. 工厂自动选策略 |
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
if (strategy == null) { |
||||
|
view.printError("No strategy found for: " + url); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// 2. 抓取页面 |
||||
|
view.printInfo("Crawling: " + url); |
||||
|
try { |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
List<Article> parsed = strategy.parse(url, doc); |
||||
|
|
||||
|
// 3. 通过仓库存入(而不是直接操作List) |
||||
|
for (Article a : parsed) { |
||||
|
repository.add(a); |
||||
|
} |
||||
|
view.printSuccess("Crawled " + parsed.size() + " articles."); |
||||
|
} catch (IOException e) { |
||||
|
view.printError("Failed to crawl: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "注意这个CrawlCommand现在的职责:拿到URL → 交给工厂选策略 → 执行解析 → 交给仓库存储。**它自己在干什么?在调度!** 这就是上节课我们讲的Controller的'调度思维',现在向Command内部延伸了。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.4 Repository模式:武装数据访问(12分钟) |
||||
|
|
||||
|
#### 4.4.1 问题重提 |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "回到上节课结束时的那个问题:`List<Article>`在所有Command之间共享。任何一个Command都可以做这些事——" |
||||
|
```java |
||||
|
articles.clear(); // 清空所有文章 |
||||
|
articles.add(null); // 塞入null |
||||
|
articles.remove(0); // 随意删除 |
||||
|
``` |
||||
|
|
||||
|
> "如果一个新同事接手开发,他不知道'不要动这个List'的潜规则,写了一个`articles.clear()`,你的`list`命令就突然什么都不显示了。**靠代码约定维护的秩序,早晚会被打破。我们需要实体的'规则'——代码层面的约束。**" |
||||
|
|
||||
|
#### 4.4.2 ArticleRepository的定义 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/repository/ArticleRepository.java |
||||
|
package com.crawler.repository; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Collections; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ArticleRepository { |
||||
|
private final List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
/** |
||||
|
* 添加一篇文章。注意:不接受null,这是代码层面的规则,不是口头约定。 |
||||
|
*/ |
||||
|
public void add(Article article) { |
||||
|
if (article == null) { |
||||
|
throw new IllegalArgumentException("Article cannot be null"); |
||||
|
} |
||||
|
articles.add(article); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取所有文章的只读视图 |
||||
|
* 调用者无法通过此返回值修改内部数据 |
||||
|
*/ |
||||
|
public List<Article> getAll() { |
||||
|
return Collections.unmodifiableList(articles); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取文章数量 |
||||
|
*/ |
||||
|
public int size() { |
||||
|
return articles.size(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 清空(仅管理员可调——下一篇:权限控制) |
||||
|
*/ |
||||
|
public void clear() { |
||||
|
articles.clear(); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "三个关键设计点——" |
||||
|
> |
||||
|
> - **add()拒绝null**:规则写在代码里,不是写在邮件里 |
||||
|
> - **getAll()返回不可修改的视图**:`Collections.unmodifiableList()`——调用者如果尝试add/remove,会**直接抛异常**,不是'悄悄的bug' |
||||
|
> - **ClearCommand要清空数据?调`repository.clear()`**,而不是直接操作List |
||||
|
> |
||||
|
> "这就是面向对象的第一课——封装。把数据藏起来,只暴露安全的方法。从'直接操作集合'到'通过仓库存取',是程序员成熟度的分水岭。" |
||||
|
|
||||
|
#### 4.4.3 仓库引入后的架构变化 |
||||
|
|
||||
|
**Command接口的execute方法调整**: |
||||
|
|
||||
|
```java |
||||
|
// 调整前(W9) |
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, List<Article> articles); |
||||
|
} |
||||
|
|
||||
|
// 调整后(W10) |
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, ArticleRepository repository); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "这个改动很小——把`List<Article>`换成`ArticleRepository`。但语义完全不同:之前是'给你数据随便玩',现在是'给你一个安全的存取通道'。" |
||||
|
|
||||
|
**所有Command同步调整**: |
||||
|
|
||||
|
```java |
||||
|
// ListCommand.java - 调整后 |
||||
|
public class ListCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ListCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "list"; } |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
view.display(repository.getAll()); // 通过仓库获取数据 |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ClearCommand.java(新增示例) |
||||
|
public class ClearCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ClearCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "clear"; } |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
repository.clear(); |
||||
|
view.printSuccess("All articles cleared."); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**Controller和main的调整**: |
||||
|
|
||||
|
```java |
||||
|
// App.java - 调整后 |
||||
|
public class App { |
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); // 替代 List<Article> |
||||
|
StrategyFactory factory = new StrategyFactory(); // 新增 |
||||
|
|
||||
|
CrawlerController controller = new CrawlerController(view, repository, factory); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler v2.0!"); |
||||
|
view.printInfo("Type 'help' for commands."); |
||||
|
|
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.5 整体架构串联(8分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在我们把所有部件串起来,看看一个`crawl https://blog.example.com`命令走过的完整路径。" |
||||
|
|
||||
|
**时序图(口述配白板绘制)**: |
||||
|
``` |
||||
|
用户输入 "crawl https://blog.example.com" |
||||
|
│ |
||||
|
▼ |
||||
|
ConsoleView.readLine() |
||||
|
│ |
||||
|
▼ |
||||
|
CrawlerController.handle("crawl https://blog.example.com") |
||||
|
│ Map查找 "crawl" → CrawlCommand |
||||
|
▼ |
||||
|
CrawlCommand.execute(args, repository) |
||||
|
│ |
||||
|
├─► StrategyFactory.getStrategy(url) |
||||
|
│ │ 遍历List<CrawlStrategy> |
||||
|
│ │ BlogStrategy.supports(url) → true! |
||||
|
│ ▼ |
||||
|
│ 返回 BlogStrategy |
||||
|
│ |
||||
|
├─► Jsoup.connect(url).get() → Document |
||||
|
│ |
||||
|
├─► BlogStrategy.parse(url, doc) → List<Article> |
||||
|
│ |
||||
|
└─► for each article: repository.add(article) |
||||
|
│ |
||||
|
▼ |
||||
|
ArticleRepository.articles.add(article) |
||||
|
|
||||
|
最终:ConsoleView.printSuccess("Crawled N articles.") |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "七步调用,每一步职责清晰:View负责输入输出,Controller负责路由,Command负责调度,Factory负责匹配,Strategy负责解析,Repository负责存储。**没有哪个类干了两个人的活,也没有哪个类不知道自己的活是什么。**" |
||||
|
> |
||||
|
> "这就是工程化——不是把代码写得快,是把代码写得对。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.6 代码落地(20分钟) |
||||
|
|
||||
|
**教师准备**:课前准备一份“W9升级到W10”的改动清单,现场演示关键改动。 |
||||
|
|
||||
|
**改动清单**: |
||||
|
1. 新建`strategy/`包,创建`CrawlStrategy`接口 |
||||
|
2. 新建`strategy/BlogStrategy.java` |
||||
|
3. 新建`strategy/NewsStrategy.java` |
||||
|
4. 新建`strategy/StrategyFactory.java` |
||||
|
5. 新建`repository/`包,创建`ArticleRepository.java` |
||||
|
6. 修改`Command`接口的`execute`签名 |
||||
|
7. 修改`CrawlCommand`,引入`StrategyFactory`和`ArticleRepository` |
||||
|
8. 修改其余所有`Command`实现类 |
||||
|
9. 修改`CrawlerController`构造器 |
||||
|
10. 修改`App.java` |
||||
|
|
||||
|
**教师演示关键步骤**(重点演示): |
||||
|
- `ArticleRepository`的`Collections.unmodifiableList()` |
||||
|
- `StrategyFactory`的遍历匹配逻辑 |
||||
|
- `CrawlCommand`重写后的调度结构 |
||||
|
|
||||
|
**刻意埋入的“找茬点”**: |
||||
|
> "我在`StrategyFactory.getStrategy()`里,如果没有匹配的策略就返回`null`。然后在`CrawlCommand`里检查null。这其实叫'null object pattern的前奏'——如果我不想让Command检查null,我应该怎么改工厂?大家带着这个问题用AI探究。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.7 架构反思与W11预告(5分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在我们的架构比W9强壮多了:解析逻辑可插拔,数据访问有守卫。但还有一些漏洞——" |
||||
|
|
||||
|
**逐一点破**: |
||||
|
1. **异常处理**:`CrawlCommand`用了一个笼统的`catch (IOException e)`,如果解析过程中抛出其他异常怎么办? |
||||
|
2. **网络超时**:如果目标网站3秒没响应,当前代码会一直等吗? |
||||
|
3. **日志缺失**:所有的成功/失败信息只输出到终端,如果程序半夜跑,第二天想看昨晚抓了多少——看不了。 |
||||
|
4. **重试机制**:如果一次失败就直接报错,要不要给个重试的机会? |
||||
|
|
||||
|
**W11预告**: |
||||
|
> "下周,我们会做三件事:**自定义异常体系**、**工程化日志框架**、**防御式编程与重试机制**。W9搭骨架,W10装盔甲,W11要让这个系统**经得起现实的毒打**。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.8 实践任务(5分钟) |
||||
|
|
||||
|
**任务要求**: |
||||
|
1. 从W9代码出发,完成W10升级 |
||||
|
2. 实现至少两个`CrawlStrategy`(可以是模拟的,不要求真实爬取) |
||||
|
3. 实现`StrategyFactory`和`ArticleRepository` |
||||
|
4. 确保所有Command通过Repository访问数据 |
||||
|
5. 运行并测试完整流程 |
||||
|
|
||||
|
**验收标准**: |
||||
|
- [x] 新增策略类只需新建文件+工厂注册一行,其余代码零改动 |
||||
|
- [x] `ArticleRepository`的`getAll()`返回不可修改视图 |
||||
|
- [x] `CrawlCommand`不包含任何网站特定的解析逻辑 |
||||
|
- [x] `StrategyFactory`能根据URL自动匹配正确的策略 |
||||
|
- [x] 所有Command的`execute`方法签名已更新为`ArticleRepository` |
||||
|
- [x] 无任何地方直接操作`List<Article>` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 五、课后作业 |
||||
|
|
||||
|
### 5.1 必做任务 |
||||
|
|
||||
|
1. **完善ArticleRepository**:增加`addAll(List<Article>)`批量添加方法,注意防御null |
||||
|
2. **★ AnalyzeCommand(集大成作业)**: |
||||
|
- 实现`analyze <url>`命令 |
||||
|
- 内部调用`StrategyFactory`匹配策略 |
||||
|
- 调用策略解析文章后,**不存到Repository**,而是分析统计信息: |
||||
|
- 文章总数 |
||||
|
- 标题平均长度 |
||||
|
- 按某种规则排名的Top 5 |
||||
|
- 结果只输出,不存储 |
||||
|
- **提示**:这就是策略的复用——同一个解析策略,既能为`crawl`服务(存入仓库),也能为`analyze`服务(仅分析) |
||||
|
|
||||
|
3. **AI架构审计**:将完整代码的类图(或类名与方法签名列表)发给AI,指令: |
||||
|
> "作为Java架构审计师,请检查:①策略模式的实现是否正确解耦(CrawlCommand是否仍然包含网站特定逻辑);②Repository是否真正封装了数据访问(是否存在绕过Repository直接操作List的地方);③工厂的匹配逻辑是否存在性能隐患。请给出具体的改进建议。" |
||||
|
|
||||
|
### 5.2 选做任务 |
||||
|
|
||||
|
1. **正则策略匹配**:将`Supports()`的判断从`url.contains()`改为正则表达式,让一张策略可以匹配一类URL |
||||
|
2. **默认策略(DefaultStrategy)**:当没有策略匹配时,提供一个通用的“标题提取”逻辑 |
||||
|
3. **策略优先级**:给每个策略加一个`priority`字段,工厂按优先级匹配(而不是按注册顺序) |
||||
|
4. **思考并回答(200字)**: |
||||
|
> "策略模式中,策略的`supports()`方法有可能让两个策略都返回true,这时该选哪个?`StrategyFactory`的遍历顺序会如何影响结果?你有什么解决方案?" |
||||
|
|
||||
|
### 5.3 思考题 |
||||
|
|
||||
|
1. **Repository与List的区别是什么?** 如果Repository只是包了一层List,为什么还要用? |
||||
|
2. **策略工厂的演进**:如果网站数量增加到100个,逐个注册的写法还合适吗?你想到什么解决方案? |
||||
|
3. **`Collections.unmodifiableList()`返回的是什么?** 它真的“不可修改”吗?如果原List被修改,这个不可修改视图会怎样? |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 六、AI协同升级 |
||||
|
|
||||
|
### 架构审计师任务(必做) |
||||
|
|
||||
|
**学生执行步骤**: |
||||
|
1. 画出当前项目的类依赖图(手绘或工具生成) |
||||
|
2. 将类名和依赖关系发给AI |
||||
|
3. 输入指令: |
||||
|
> "作为Java架构审计师,请检查这个爬虫项目的架构。重点关注:①策略模式是否真正实现了开闭原则(增加新网站是否真的只需新增类);②Repository封装是否完整(是否有绕过Repository的路径);③是否存在循环依赖。请逐一指出问题并给出改进建议。" |
||||
|
|
||||
|
**预期AI输出**: |
||||
|
- 指出是否还存在“改一处影响多处”的耦合 |
||||
|
- 判断Repository的API设计是否完备 |
||||
|
- 评价整体架构的开闭原则达成度 |
||||
|
|
||||
|
### 进阶AI探究(选做) |
||||
|
|
||||
|
> "假设我有一个CrawlStrategy接口和10个实现类。不用工厂模式,直接用一个Map<String, CrawlStrategy>存起来,key是策略名称。这和StrategyFactory设计有什么本质区别?各自的优缺点是什么?" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 七、教学反思与调整记录 |
||||
|
|
||||
|
| 日期 | 事项 | 调整内容 | |
||||
|
|------|------|----------| |
||||
|
| 2026-05-01 | 首次编写 | 基于W9骨架,引入策略模式+工厂+Repository | |
||||
|
| 2026-05-07 | 结构优化 | 调整策略模式与工厂的讲解顺序,先策略后工厂更自然 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 附录1:W9到W10改动对照表 |
||||
|
|
||||
|
| 改动项 | W9代码 | W10代码 | |
||||
|
|--------|--------|---------| |
||||
|
| 数据存储 | `List<Article> articles` | `ArticleRepository repository` | |
||||
|
| Command接口 | `execute(String[], List<Article>)` | `execute(String[], ArticleRepository)` | |
||||
|
| 解析逻辑位置 | `CrawlCommand`内部 | 各`CrawlStrategy`实现类 | |
||||
|
| URL匹配 | 无(硬编码) | `StrategyFactory.getStrategy(url)` | |
||||
|
| 数据添加 | `articles.add(article)` | `repository.add(article)` | |
||||
|
| 数据读取 | 直接遍历`articles` | `repository.getAll()` | |
||||
|
|
||||
|
## 附录2:常见问题速查 |
||||
|
|
||||
|
| 问题 | 解答 | |
||||
|
|------|------| |
||||
|
| 策略模式和Command模式有什么区别? | Command封装“动作”(做什么事),Strategy封装“算法”(怎么做)。在爬虫中:crawl是命令(动作),如何解析是策略(算法)。 | |
||||
|
| 工厂一定要叫Factory吗? | 不必须。但叫Factory意味着“创建对象”的职责,符合模式命名的惯例。 | |
||||
|
| `Collections.unmodifiableList()`有什么用? | 返回一个只读视图,调用add/remove等方法会抛`UnsupportedOperationException`。 | |
||||
|
| Repository和DAO有什么区别? | 在我们的上下文中可以视为同义词。严谨地说,Repository是领域驱动设计的概念,更偏向“集合语义”;DAO更偏数据库操作。 | |
||||
|
| 策略的`supports()`返回true但解析失败怎么办? | 那是策略实现的bug,该策略应修复。Factory不负责验证策略的正确性。 | |
||||
|
|
||||
|
## 附录3:教学逻辑说明 |
||||
|
|
||||
|
| 顺序 | 内容 | 设计理由 | |
||||
|
|------|------|----------| |
||||
|
| 1 | W9回顾+痛点暴露 | 承上启下,从已知问题引出新知识 | |
||||
|
| 2 | 策略模式 | 解决解析逻辑耦合问题,深化多态理解 | |
||||
|
| 3 | 解析器工厂 | 解决策略选择问题,引入工厂模式 | |
||||
|
| 4 | Repository模式 | 解决数据安全问题,实践封装原则 | |
||||
|
| 5 | 架构串联 | 将所有部件统一,形成完整心智模型 | |
||||
|
| 6 | 代码落地 | 实践验证,从“听懂”到“会做” | |
||||
|
| 7 | 架构反思+预告 | 暴露新问题,为W11健壮性工程铺垫 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 版本说明 |
||||
|
|
||||
|
- **v1(本版)**:基于W9教案模式首次编写,包含策略模式、工厂模式、Repository模式的完整引入 |
||||
Binary file not shown.
@ -0,0 +1,3 @@ |
|||||
|
{ |
||||
|
"java.configuration.updateBuildConfiguration": "interactive" |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,4 @@ |
|||||
|
*.jar |
||||
|
*.jar |
||||
|
*.class |
||||
|
*.log |
||||
@ -0,0 +1,492 @@ |
|||||
|
--- |
||||
|
id: "24" |
||||
|
title: w10-设计模式 |
||||
|
slug: w10-design-patterns |
||||
|
status: draft |
||||
|
view_count: 0 |
||||
|
created_at: 2026-05-07T12:00:00+08:00 |
||||
|
updated_at: 2026-05-07T14:00:00.000000000+08:00 |
||||
|
--- |
||||
|
|
||||
|
# 高级程序设计 · 第10周 |
||||
|
|
||||
|
### 设计模式:灵活性与可扩展性 |
||||
|
|
||||
|
### 策略模式 + 工厂 + Repository 实战 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 📌 本周导航 |
||||
|
|
||||
|
- W9回顾:骨架的成就与隐患 |
||||
|
- 策略模式:解析器的“插头标准” |
||||
|
- 解析器工厂:自动匹配的魔法 |
||||
|
- Repository:武装数据访问 |
||||
|
- 整体架构串联:调用链全程 |
||||
|
- 代码落地 + 实践任务 |
||||
|
- 架构反思 + W11 预告 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 1️⃣ W9回顾:骨架的成就与隐患 |
||||
|
|
||||
|
### 我们建了一座漂亮的房子 |
||||
|
|
||||
|
- ✅ MVC 分层清晰 |
||||
|
- ✅ Command 模式:**新增命令,Controller 零改动** |
||||
|
- ✅ 所有输出走 `ConsoleView` |
||||
|
- ✅ 工程包结构标准 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 但问题也随之而来 |
||||
|
|
||||
|
```java |
||||
|
// CrawlCommand 里解析逻辑怎么办? |
||||
|
if (url.contains("blog.example.com")) { |
||||
|
// 博客解析... |
||||
|
} else if (url.contains("news.example.com")) { |
||||
|
// 新闻解析... |
||||
|
} else { |
||||
|
view.printError("Unsupported website!"); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 😫 每支持一个新网站,就要加一个 `else if` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 还有另一个“裸奔”的数据 |
||||
|
|
||||
|
```java |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
// 所有 Command 都可以: |
||||
|
articles.clear(); |
||||
|
articles.add(null); |
||||
|
articles.remove(0); |
||||
|
``` |
||||
|
|
||||
|
> 🚨 数据没有任何保护,靠口头约定是靠不住的 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 本周任务 |
||||
|
|
||||
|
1. **解析逻辑可插拔** → 策略模式 + 工厂 |
||||
|
2. **数据访问加守卫** → Repository 模式 |
||||
|
|
||||
|
> W9 搭骨架,W10 装盔甲 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 2️⃣ 策略模式:解析器的“插头标准” |
||||
|
|
||||
|
### 墙上的插座,为什么什么电器都能插? |
||||
|
|
||||
|
- **三孔插座** 是标准接口 |
||||
|
- 电视、电脑、手机充电器都实现这个接口 |
||||
|
- 插座不关心你是什么电器 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 爬虫的世界也一样 |
||||
|
|
||||
|
- `CrawlStrategy` = 插座接口 |
||||
|
- `BlogStrategy`、`NewsStrategy` = 具体电器 |
||||
|
- `CrawlCommand` = 使用电器的人 |
||||
|
- `StrategyFactory` = 插座面板 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 接口即合同 |
||||
|
|
||||
|
```java |
||||
|
public interface CrawlStrategy { |
||||
|
List<Article> parse(String url, Document doc); |
||||
|
boolean supports(String url); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
- `supports()`:我能不能处理这个 URL? |
||||
|
- `parse()`:怎么解析? |
||||
|
- **任何网站想被爬,签这份合同!** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 策略 vs 硬编码 |
||||
|
|
||||
|
| 维度 | if-else 屎山 | 策略模式 | |
||||
|
|------|-------------|----------| |
||||
|
| 新增网站 | 改 Command | 新建策略类 | |
||||
|
| 修改解析 | 翻找 else if | 只改对应类 | |
||||
|
| 测试 | 启动整个爬虫 | 单独测策略 | |
||||
|
| 开闭原则 | ❌ 修改开放 | ✅ 扩展开放,修改关闭 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 具体策略示例 |
||||
|
|
||||
|
```java |
||||
|
public class BlogStrategy implements CrawlStrategy { |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("blog.example.com"); |
||||
|
} |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
for (Element e : doc.select(".post-title")) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> ✨ 一个新网站,一个独立类,各扫门前雪 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 3️⃣ 解析器工厂:自动匹配的魔法 |
||||
|
|
||||
|
### 谁来选择策略? |
||||
|
|
||||
|
- 如果 `CrawlCommand` 遍历所有策略 → 策略模式白用了 |
||||
|
- 我们需要一个黑盒子:**丢入 URL,返回合适的解析器** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 工厂登场 |
||||
|
|
||||
|
```java |
||||
|
public class StrategyFactory { |
||||
|
private final List<CrawlStrategy> strategies = new ArrayList<>(); |
||||
|
|
||||
|
public StrategyFactory() { |
||||
|
strategies.add(new BlogStrategy()); |
||||
|
strategies.add(new NewsStrategy()); |
||||
|
} |
||||
|
|
||||
|
public CrawlStrategy getStrategy(String url) { |
||||
|
for (CrawlStrategy s : strategies) { |
||||
|
if (s.supports(url)) return s; |
||||
|
} |
||||
|
return null; |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 🔧 新增网站只需:新建策略类 + 工厂里注册一行 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 开闭原则的胜利 |
||||
|
|
||||
|
- ✅ `CrawlCommand` 完全不改 |
||||
|
- ✅ 新增 `XxxStrategy` 和一行注册 |
||||
|
- ✅ 所有策略的调用方式完全一致 |
||||
|
|
||||
|
> 这就是 **“对扩展开放,对修改关闭”** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 重构后的 CrawlCommand |
||||
|
|
||||
|
```java |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
String url = args[1]; |
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
if (strategy == null) { |
||||
|
view.printError("No strategy for: " + url); |
||||
|
return; |
||||
|
} |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
List<Article> parsed = strategy.parse(url, doc); |
||||
|
for (Article a : parsed) { |
||||
|
repository.add(a); |
||||
|
} |
||||
|
view.printSuccess("Crawled " + parsed.size() + " articles."); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 🧠 CrawlCommand 现在只做 **“调度”**,不做解析 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 4️⃣ Repository:武装数据访问 |
||||
|
|
||||
|
### 共享 List 的问题 |
||||
|
|
||||
|
```java |
||||
|
articles.clear(); // 清空 |
||||
|
articles.add(null); // 塞 null |
||||
|
articles.remove(0); // 随意删除 |
||||
|
``` |
||||
|
|
||||
|
> 靠约定维护的秩序,终将被打破 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 给数据装上防盗门 |
||||
|
|
||||
|
```java |
||||
|
public class ArticleRepository { |
||||
|
private final List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
public void add(Article article) { |
||||
|
if (article == null) throw new IllegalArgumentException(...); |
||||
|
articles.add(article); |
||||
|
} |
||||
|
|
||||
|
public List<Article> getAll() { |
||||
|
return Collections.unmodifiableList(articles); |
||||
|
} |
||||
|
|
||||
|
public int size() { return articles.size(); } |
||||
|
|
||||
|
public void clear() { articles.clear(); } |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 三道防线 |
||||
|
|
||||
|
| 机制 | 作用 | |
||||
|
|------|------| |
||||
|
| **add 拒绝 null** | 规则写在代码里,不靠口头约定 | |
||||
|
| **getAll 返回不可变视图** | 任何修改立即抛异常 | |
||||
|
| **必须通过 repository 访问** | 封装内部结构,只暴露安全方法 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 所有 Command 签名改变 |
||||
|
|
||||
|
```java |
||||
|
// W9 |
||||
|
public void execute(String[] args, List<Article> articles); |
||||
|
|
||||
|
// W10 |
||||
|
public void execute(String[] args, ArticleRepository repository); |
||||
|
``` |
||||
|
|
||||
|
> 语义变化:从“给你数据随便玩” → “给你安全的存取通道” |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 5️⃣ 整体架构串联 |
||||
|
|
||||
|
### 一个 `crawl` 命令的完整旅程 |
||||
|
|
||||
|
``` |
||||
|
用户输入 "crawl https://blog.example.com" |
||||
|
↓ |
||||
|
ConsoleView 解析 |
||||
|
↓ |
||||
|
Controller 路由 → CrawlCommand |
||||
|
↓ |
||||
|
StrategyFactory.getStrategy(url) → BlogStrategy |
||||
|
↓ |
||||
|
Jsoup 抓取 → Document |
||||
|
↓ |
||||
|
BlogStrategy.parse(url, doc) → List<Article> |
||||
|
↓ |
||||
|
Repository.add() 存储 |
||||
|
↓ |
||||
|
ConsoleView 输出成功信息 |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 架构全景图 |
||||
|
|
||||
|
 |
||||
|
|
||||
|
```mermaid |
||||
|
flowchart TD |
||||
|
User(["👤 用户输入<br/>crawl https://blog.example.com"]) --> View |
||||
|
|
||||
|
subgraph View["🎨 View 层 (ConsoleView)"] |
||||
|
ReadLine["readLine()"] |
||||
|
Display["display() / printSuccess()"] |
||||
|
end |
||||
|
|
||||
|
ReadLine --> Controller |
||||
|
|
||||
|
subgraph Controller["🧭 Controller 层"] |
||||
|
Router["CrawlerController<br/>Map 路由"] |
||||
|
end |
||||
|
|
||||
|
Router --> Command |
||||
|
|
||||
|
subgraph Command["⚡ Command 层"] |
||||
|
CrawlCmd["CrawlCommand<br/>(调度者)"] |
||||
|
end |
||||
|
|
||||
|
CrawlCmd --> Factory |
||||
|
|
||||
|
subgraph Strategy["🧩 Strategy 层"] |
||||
|
Factory["StrategyFactory<br/>(自动匹配)"] |
||||
|
StrategyI["<<interface>> CrawlStrategy"] |
||||
|
BlogS["BlogStrategy"] |
||||
|
NewsS["NewsStrategy"] |
||||
|
Factory --> StrategyI --> BlogS |
||||
|
StrategyI --> NewsS |
||||
|
end |
||||
|
|
||||
|
BlogS --> Repository |
||||
|
|
||||
|
subgraph Repository["🔐 Repository 层"] |
||||
|
Repo["ArticleRepository<br/>(add / getAll)"] |
||||
|
RepoList["List<Article> (私有)"] |
||||
|
Repo --> RepoList |
||||
|
end |
||||
|
|
||||
|
RepoList --> Model |
||||
|
|
||||
|
subgraph Model["📦 Model 层"] |
||||
|
Article["Article"] |
||||
|
end |
||||
|
|
||||
|
CrawlCmd --> Display |
||||
|
Repository --> Display |
||||
|
``` |
||||
|
|
||||
|
> 🗺️ 每一层都有清晰的职责,每一处扩展都只需要新增而不是修改 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 6️⃣ 代码落地(分步升级) |
||||
|
|
||||
|
### 从 W9 升级到 W10 的改动清单 |
||||
|
|
||||
|
1. 新建 `strategy/` 包 → `CrawlStrategy` 接口 |
||||
|
2. 实现 `BlogStrategy`、`NewsStrategy` |
||||
|
3. 实现 `StrategyFactory` |
||||
|
4. 新建 `repository/` 包 → `ArticleRepository` |
||||
|
5. 修改 `Command` 接口签名 |
||||
|
6. 重写 `CrawlCommand` |
||||
|
7. 调整其他所有 `Command` |
||||
|
8. 调整 `Controller` 和 `App.java` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 关键代码演示 |
||||
|
|
||||
|
- `Collections.unmodifiableList()` 的用法 |
||||
|
- `StrategyFactory.getStrategy()` 的遍历逻辑 |
||||
|
- `CrawlCommand` 从“写死解析”到“调度组装” |
||||
|
|
||||
|
```java |
||||
|
// 一个改动示例 |
||||
|
for (Article a : parsed) { |
||||
|
repository.add(a); // 旧: articles.add(a); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 找茬点 |
||||
|
|
||||
|
- `StrategyFactory` 没匹配到策略时返回 `null` |
||||
|
- `CrawlCommand` 检查 `null` 并报错 |
||||
|
- 有没有更优雅的方式避免 `null` 判断? |
||||
|
|
||||
|
> 🔍 课后用 AI 探索 “空对象模式” 的前奏 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 7️⃣ 架构反思 + 下周预告 |
||||
|
|
||||
|
### 当前架构的脆弱点 |
||||
|
|
||||
|
- ❌ 异常处理单一笼统 |
||||
|
- ❌ 没有重试机制 |
||||
|
- ❌ 网络超时无控制 |
||||
|
- ❌ 日志仅输出到终端 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### W11 目标:健壮性工程 |
||||
|
|
||||
|
- ✅ **自定义异常体系**:把“出错了”变成具体的业务异常 |
||||
|
- ✅ **工程化日志**:记录谁、什么时间、做了什么 |
||||
|
- ✅ **防御式编程 + 重试机制**:网络抖动不再致命 |
||||
|
|
||||
|
> W9 搭骨架 → W10 装盔甲 → W11 让它经得起毒打 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 8️⃣ 实践任务(现场) |
||||
|
|
||||
|
### 必做 |
||||
|
|
||||
|
1. 基于 W9 项目升级到 W10 |
||||
|
2. 至少实现 2 个 CrawlStrategy(可模拟) |
||||
|
3. 实现 `StrategyFactory` 和 `ArticleRepository` |
||||
|
4. 测试完整 `crawl` → `list` 流程 |
||||
|
|
||||
|
### 验收标准 |
||||
|
|
||||
|
- [ ] 新增策略只加类+注册,零改动旧代码 |
||||
|
- [ ] `getAll()` 返回不可修改视图 |
||||
|
- [ ] `CrawlCommand` 不含网站特定解析 |
||||
|
- [ ] 所有 Command 用 Repository |
||||
|
- [ ] 无地方直接操作 `List<Article>` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 9️⃣ 课后作业 |
||||
|
|
||||
|
### 必做 |
||||
|
|
||||
|
1. 完善 `ArticleRepository`:增加 `addAll`,防御 null |
||||
|
2. **★ AnalyzeCommand**:复用策略解析但不存储,输出统计信息 |
||||
|
3. **AI 架构审计**:发送类签名给 AI,检查策略解耦与封装 |
||||
|
|
||||
|
### 选做 |
||||
|
|
||||
|
- 正则策略匹配、默认策略、策略优先级 |
||||
|
- 思考题:两个策略都 `supports` 同一 URL 时怎么办? |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🤖 AI 协同升级 |
||||
|
|
||||
|
### 架构审计师(必做) |
||||
|
|
||||
|
- 画出类依赖图 |
||||
|
- 发给 AI:“检查开闭原则达成度,Repository 封装完备性,是否存在循环依赖” |
||||
|
|
||||
|
### 进阶探究 |
||||
|
|
||||
|
- 不用工厂,直接用 `Map<String, CrawlStrategy>` 存起来 vs `StrategyFactory` 的区别? |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 📚 总结 |
||||
|
|
||||
|
- ✅ 策略模式:算法可插拔,新增网站零痛苦 |
||||
|
- ✅ 工厂:自动匹配,URL → 策略的魔法 |
||||
|
- ✅ Repository:数据守卫,规则从口头约定变成代码强制 |
||||
|
- ✅ 架构:从“分开”到“优雅合上”,对扩展开放,对修改关闭 |
||||
|
|
||||
|
### W11 预告 |
||||
|
|
||||
|
自定义异常体系 + 日志 + 重试机制 |
||||
|
|
||||
|
> 🚀 让我们造的爬虫,经得住现实的考验 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 谢谢! |
||||
|
|
||||
|
**保持工程洁癖,下周见!** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
# 居中标题 |
||||
|
|
||||
|
## 居中副标题 |
||||
|
|
||||
|
### 居中内容 |
||||
|
|
||||
|
--- |
||||
@ -0,0 +1,62 @@ |
|||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<groupId>com.example</groupId> |
||||
|
<artifactId>datacollect-cli</artifactId> |
||||
|
<version>0.1.0</version> |
||||
|
<properties> |
||||
|
<maven.compiler.source>11</maven.compiler.source> |
||||
|
<maven.compiler.target>11</maven.compiler.target> |
||||
|
</properties> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>1.17.2</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.slf4j</groupId> |
||||
|
<artifactId>slf4j-api</artifactId> |
||||
|
<version>1.7.36</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>ch.qos.logback</groupId> |
||||
|
<artifactId>logback-classic</artifactId> |
||||
|
<version>1.2.11</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<version>3.8.1</version> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-assembly-plugin</artifactId> |
||||
|
<version>3.3.0</version> |
||||
|
<configuration> |
||||
|
<archive> |
||||
|
<manifest> |
||||
|
<mainClass>com.example.datacollect.Main</mainClass> |
||||
|
</manifest> |
||||
|
</archive> |
||||
|
<descriptorRefs> |
||||
|
<descriptorRef>jar-with-dependencies</descriptorRef> |
||||
|
</descriptorRefs> |
||||
|
</configuration> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>make-assembly</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>single</goal> |
||||
|
</goals> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
</project> |
||||
@ -0,0 +1,21 @@ |
|||||
|
package com.example.datacollect; |
||||
|
|
||||
|
import com.example.datacollect.controller.CrawlerController; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class Main { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
StrategyFactory strategyFactory = new StrategyFactory(); |
||||
|
CrawlerController controller = new CrawlerController(view, repository, strategyFactory); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands."); |
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,129 @@ |
|||||
|
package com.example.datacollect; |
||||
|
|
||||
|
import com.example.datacollect.circuitbreaker.CircuitBreaker; |
||||
|
import com.example.datacollect.exception.UrlFormatException; |
||||
|
import com.example.datacollect.util.RetryUtils; |
||||
|
import com.example.datacollect.util.UrlValidator; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.concurrent.atomic.AtomicInteger; |
||||
|
|
||||
|
public class RetryUtilsExample { |
||||
|
|
||||
|
private static final Logger logger = LoggerFactory.getLogger(RetryUtilsExample.class); |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
logger.info("========== RetryUtils 使用示例 =========="); |
||||
|
|
||||
|
demonstrateSuccessfulRetry(); |
||||
|
demonstrateFailedRetry(); |
||||
|
demonstrateUrlValidation(); |
||||
|
demonstrateCircuitBreaker(); |
||||
|
|
||||
|
logger.info("========== 所有示例执行完成 =========="); |
||||
|
} |
||||
|
|
||||
|
private static void demonstrateSuccessfulRetry() { |
||||
|
logger.info("--- 示例1: 成功后立即返回 ---"); |
||||
|
AtomicInteger attemptCount = new AtomicInteger(0); |
||||
|
|
||||
|
RetryUtils.RetryResult<String> result = RetryUtils.executeWithRetry( |
||||
|
() -> { |
||||
|
attemptCount.incrementAndGet(); |
||||
|
logger.info("第 {} 次尝试", attemptCount.get()); |
||||
|
return "操作成功!"; |
||||
|
}, |
||||
|
IOException.class |
||||
|
); |
||||
|
|
||||
|
logger.info("结果: {}, 尝试次数: {}, 成功: {}", result.getResult(), result.getAttemptCount(), result.isSuccess()); |
||||
|
} |
||||
|
|
||||
|
private static void demonstrateFailedRetry() { |
||||
|
logger.info("--- 示例2: 重试3次后失败 ---"); |
||||
|
AtomicInteger attemptCount = new AtomicInteger(0); |
||||
|
|
||||
|
RetryUtils.RetryResult<String> result = RetryUtils.executeWithRetry( |
||||
|
() -> { |
||||
|
int attempt = attemptCount.incrementAndGet(); |
||||
|
logger.info("第 {} 次尝试,模拟失败", attempt); |
||||
|
throw new RuntimeException("网络连接失败"); |
||||
|
}, |
||||
|
3, |
||||
|
500L, |
||||
|
5000L, |
||||
|
IOException.class, RuntimeException.class |
||||
|
); |
||||
|
|
||||
|
logger.info("结果: 成功={}, 尝试次数={}", result.isSuccess(), result.getAttemptCount()); |
||||
|
logger.info("异常历史:"); |
||||
|
for (RetryUtils.ExceptionInfo info : result.getExceptionHistory()) { |
||||
|
logger.info(" {}", info); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static void demonstrateUrlValidation() { |
||||
|
logger.info("--- 示例3: URL格式校验 ---"); |
||||
|
|
||||
|
String[] testUrls = { |
||||
|
"https://news.hnu.edu.cn", |
||||
|
"https://blog.example.com/articles", |
||||
|
"not-a-url", |
||||
|
"htp:/invalid", |
||||
|
"" |
||||
|
}; |
||||
|
|
||||
|
for (String url : testUrls) { |
||||
|
try { |
||||
|
UrlValidator.validate(url); |
||||
|
logger.info("URL 有效: {}", url); |
||||
|
} catch (UrlFormatException e) { |
||||
|
logger.warn("URL 无效: {}, 错误: {}", url, e.getMessage()); |
||||
|
if (e.getInvalidUrl() != null) { |
||||
|
logger.warn(" 不合法的URL内容: {}", e.getInvalidUrl()); |
||||
|
} |
||||
|
if (e.getCause() != null) { |
||||
|
logger.warn(" 根因异常: {}", e.getCause().getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private static void demonstrateCircuitBreaker() { |
||||
|
logger.info("--- 示例4: 断路器模式 ---"); |
||||
|
|
||||
|
CircuitBreaker breaker = new CircuitBreaker(3, 5000L, 2); |
||||
|
AtomicInteger callCount = new AtomicInteger(0); |
||||
|
|
||||
|
logger.info("初始状态: {}", breaker.getState()); |
||||
|
|
||||
|
for (int i = 1; i <= 5; i++) { |
||||
|
logger.info("--- 第 {} 次调用 ---", i); |
||||
|
try { |
||||
|
String result = breaker.execute(() -> { |
||||
|
int call = callCount.incrementAndGet(); |
||||
|
logger.info(" 实际执行业务逻辑, 调用次数: {}", call); |
||||
|
if (call % 3 == 0) { |
||||
|
throw new RuntimeException("服务暂时不可用"); |
||||
|
} |
||||
|
return "业务处理成功"; |
||||
|
}); |
||||
|
logger.info(" 调用成功: {}", result); |
||||
|
} catch (CircuitBreaker.CircuitBreakerOpenException e) { |
||||
|
logger.warn(" 断路器开启,快速失败: {}", e.getMessage()); |
||||
|
} catch (Exception e) { |
||||
|
logger.warn(" 调用失败: {}", e.getMessage()); |
||||
|
} |
||||
|
logger.info(" 断路器状态: {}", breaker.getState()); |
||||
|
} |
||||
|
|
||||
|
breaker.reset(); |
||||
|
logger.info("重置后断路器状态: {}", breaker.getState()); |
||||
|
} |
||||
|
|
||||
|
private static class IOException extends java.io.IOException { |
||||
|
public IOException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,184 @@ |
|||||
|
package com.example.datacollect.circuitbreaker; |
||||
|
|
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.concurrent.atomic.AtomicInteger; |
||||
|
import java.util.concurrent.atomic.AtomicLong; |
||||
|
import java.util.concurrent.atomic.AtomicReference; |
||||
|
import java.util.function.Supplier; |
||||
|
|
||||
|
public class CircuitBreaker { |
||||
|
|
||||
|
private static final Logger logger = LoggerFactory.getLogger(CircuitBreaker.class); |
||||
|
|
||||
|
private final int failureThreshold; |
||||
|
private final long circuitOpenTimeoutMs; |
||||
|
private final int halfOpenMaxAttempts; |
||||
|
|
||||
|
private final AtomicReference<State> state = new AtomicReference<>(State.CLOSED); |
||||
|
private final AtomicInteger consecutiveFailures = new AtomicInteger(0); |
||||
|
private final AtomicInteger consecutiveSuccesses = new AtomicInteger(0); |
||||
|
private final AtomicLong lastFailureTime = new AtomicLong(0); |
||||
|
|
||||
|
public CircuitBreaker(int failureThreshold, long circuitOpenTimeoutMs) { |
||||
|
this(failureThreshold, circuitOpenTimeoutMs, 1); |
||||
|
} |
||||
|
|
||||
|
public CircuitBreaker(int failureThreshold, long circuitOpenTimeoutMs, int halfOpenMaxAttempts) { |
||||
|
if (failureThreshold <= 0) { |
||||
|
throw new IllegalArgumentException("failureThreshold must be positive"); |
||||
|
} |
||||
|
if (circuitOpenTimeoutMs <= 0) { |
||||
|
throw new IllegalArgumentException("circuitOpenTimeoutMs must be positive"); |
||||
|
} |
||||
|
if (halfOpenMaxAttempts <= 0) { |
||||
|
throw new IllegalArgumentException("halfOpenMaxAttempts must be positive"); |
||||
|
} |
||||
|
this.failureThreshold = failureThreshold; |
||||
|
this.circuitOpenTimeoutMs = circuitOpenTimeoutMs; |
||||
|
this.halfOpenMaxAttempts = halfOpenMaxAttempts; |
||||
|
} |
||||
|
|
||||
|
public <T> T execute(Supplier<T> supplier) throws CircuitBreakerOpenException { |
||||
|
if (!allowRequest()) { |
||||
|
throw new CircuitBreakerOpenException( |
||||
|
String.format("Circuit breaker is OPEN. Failure threshold: %d, Timeout: %dms", |
||||
|
failureThreshold, circuitOpenTimeoutMs)); |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
T result = supplier.get(); |
||||
|
recordSuccess(); |
||||
|
return result; |
||||
|
} catch (Exception e) { |
||||
|
recordFailure(); |
||||
|
throw e; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void execute(Runnable runnable) throws CircuitBreakerOpenException { |
||||
|
execute(() -> { |
||||
|
runnable.run(); |
||||
|
return null; |
||||
|
}); |
||||
|
} |
||||
|
|
||||
|
private boolean allowRequest() { |
||||
|
State currentState = state.get(); |
||||
|
|
||||
|
switch (currentState) { |
||||
|
case CLOSED: |
||||
|
return true; |
||||
|
|
||||
|
case OPEN: |
||||
|
if (shouldAttemptReset()) { |
||||
|
transitionToHalfOpen(); |
||||
|
return true; |
||||
|
} |
||||
|
return false; |
||||
|
|
||||
|
case HALF_OPEN: |
||||
|
return true; |
||||
|
|
||||
|
default: |
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private boolean shouldAttemptReset() { |
||||
|
long timeSinceLastFailure = System.currentTimeMillis() - lastFailureTime.get(); |
||||
|
return timeSinceLastFailure >= circuitOpenTimeoutMs; |
||||
|
} |
||||
|
|
||||
|
private void recordSuccess() { |
||||
|
State currentState = state.get(); |
||||
|
|
||||
|
if (currentState == State.HALF_OPEN) { |
||||
|
consecutiveSuccesses.incrementAndGet(); |
||||
|
logger.debug("Half-open success count: {}", consecutiveSuccesses.get()); |
||||
|
|
||||
|
if (consecutiveSuccesses.get() >= halfOpenMaxAttempts) { |
||||
|
transitionToClosed(); |
||||
|
} |
||||
|
} else if (currentState == State.CLOSED) { |
||||
|
consecutiveFailures.set(0); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void recordFailure() { |
||||
|
lastFailureTime.set(System.currentTimeMillis()); |
||||
|
consecutiveFailures.incrementAndGet(); |
||||
|
|
||||
|
State currentState = state.get(); |
||||
|
logger.debug("Failure recorded. Current state: {}, consecutive failures: {}", |
||||
|
currentState, consecutiveFailures.get()); |
||||
|
|
||||
|
if (currentState == State.HALF_OPEN) { |
||||
|
transitionToOpen(); |
||||
|
} else if (currentState == State.CLOSED && consecutiveFailures.get() >= failureThreshold) { |
||||
|
transitionToOpen(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void transitionToOpen() { |
||||
|
if (state.compareAndSet(State.CLOSED, State.OPEN) || |
||||
|
state.compareAndSet(State.HALF_OPEN, State.OPEN)) { |
||||
|
logger.warn("Circuit breaker transitioned to OPEN. Failure threshold reached: {}", failureThreshold); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void transitionToHalfOpen() { |
||||
|
if (state.compareAndSet(State.OPEN, State.HALF_OPEN)) { |
||||
|
consecutiveSuccesses.set(0); |
||||
|
logger.info("Circuit breaker transitioned to HALF_OPEN. Testing service availability..."); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
private void transitionToClosed() { |
||||
|
if (state.compareAndSet(State.HALF_OPEN, State.CLOSED)) { |
||||
|
consecutiveFailures.set(0); |
||||
|
consecutiveSuccesses.set(0); |
||||
|
logger.info("Circuit breaker transitioned to CLOSED. Service recovered."); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public State getState() { |
||||
|
return state.get(); |
||||
|
} |
||||
|
|
||||
|
public boolean isClosed() { |
||||
|
return state.get() == State.CLOSED; |
||||
|
} |
||||
|
|
||||
|
public boolean isOpen() { |
||||
|
return state.get() == State.OPEN; |
||||
|
} |
||||
|
|
||||
|
public boolean isHalfOpen() { |
||||
|
return state.get() == State.HALF_OPEN; |
||||
|
} |
||||
|
|
||||
|
public int getConsecutiveFailures() { |
||||
|
return consecutiveFailures.get(); |
||||
|
} |
||||
|
|
||||
|
public void reset() { |
||||
|
state.set(State.CLOSED); |
||||
|
consecutiveFailures.set(0); |
||||
|
consecutiveSuccesses.set(0); |
||||
|
lastFailureTime.set(0); |
||||
|
logger.info("Circuit breaker has been reset to CLOSED state."); |
||||
|
} |
||||
|
|
||||
|
public enum State { |
||||
|
CLOSED, |
||||
|
OPEN, |
||||
|
HALF_OPEN |
||||
|
} |
||||
|
|
||||
|
public static class CircuitBreakerOpenException extends RuntimeException { |
||||
|
public CircuitBreakerOpenException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,8 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, ArticleRepository repository); |
||||
|
} |
||||
@ -0,0 +1,61 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.exception.UrlFormatException; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.util.UrlValidator; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
|
||||
|
public class CrawlCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
private final StrategyFactory strategyFactory; |
||||
|
|
||||
|
public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.strategyFactory = strategyFactory; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "crawl"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
|
||||
|
try { |
||||
|
UrlValidator.validate(url); |
||||
|
} catch (UrlFormatException e) { |
||||
|
view.printError("Invalid URL: " + e.getInvalidUrl() + " - " + e.getMessage()); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
if (strategy == null) { |
||||
|
view.printError("No strategy found for: " + url); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
view.printInfo("Crawling: " + url); |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
var articles = strategy.parse(url, doc); |
||||
|
for (var article : articles) { |
||||
|
repository.add(article); |
||||
|
} |
||||
|
view.printSuccess("Crawled " + articles.size() + " articles."); |
||||
|
} catch (UrlFormatException e) { |
||||
|
view.printError("URL format error: " + e.getInvalidUrl() + " - " + e.getMessage()); |
||||
|
} catch (Exception e) { |
||||
|
view.printError("Failed to crawl: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,23 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class ExitCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ExitCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
view.printSuccess("Bye!"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,22 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class HelpCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public HelpCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
view.printInfo("Commands: crawl <url>, list, help, exit"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,22 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
|
||||
|
public class ListCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ListCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "list"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
view.display(repository.getAll()); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,43 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class UrlFormatException extends RuntimeException { |
||||
|
|
||||
|
private final String invalidUrl; |
||||
|
|
||||
|
public UrlFormatException() { |
||||
|
super("Invalid URL format"); |
||||
|
this.invalidUrl = null; |
||||
|
} |
||||
|
|
||||
|
public UrlFormatException(String message) { |
||||
|
super(message); |
||||
|
this.invalidUrl = null; |
||||
|
} |
||||
|
|
||||
|
public UrlFormatException(String message, String invalidUrl) { |
||||
|
super(message); |
||||
|
this.invalidUrl = invalidUrl; |
||||
|
} |
||||
|
|
||||
|
public UrlFormatException(String message, String invalidUrl, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
this.invalidUrl = invalidUrl; |
||||
|
} |
||||
|
|
||||
|
public UrlFormatException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
this.invalidUrl = null; |
||||
|
} |
||||
|
|
||||
|
public String getInvalidUrl() { |
||||
|
return invalidUrl; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
if (invalidUrl != null) { |
||||
|
return String.format("UrlFormatException: %s [invalidUrl=%s]", getMessage(), invalidUrl); |
||||
|
} |
||||
|
return super.toString(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,45 @@ |
|||||
|
package com.example.datacollect.model; |
||||
|
|
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
|
||||
|
public Article(String title, String url, String content) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
public void setUrl(String url) { |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
public String getContent() { |
||||
|
return content; |
||||
|
} |
||||
|
|
||||
|
public void setContent(String content) { |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{" |
||||
|
+ "title='" + title + '\'' |
||||
|
+ ", url='" + url + '\'' |
||||
|
+ '}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class BlogStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("blog.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements titles = doc.select(".post-title"); |
||||
|
for (Element e : titles) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,49 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class HnuNewsStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("news.hnu.edu.cn"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements listItems = doc.select("ul.list11 li"); |
||||
|
|
||||
|
for (Element li : listItems) { |
||||
|
Element link = li.selectFirst("a"); |
||||
|
if (link == null) continue; |
||||
|
|
||||
|
String articleUrl = link.attr("href"); |
||||
|
if (!articleUrl.startsWith("http")) { |
||||
|
articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", ""); |
||||
|
} |
||||
|
|
||||
|
String title = ""; |
||||
|
Element titleEl = link.selectFirst("h4.l2.h4s2"); |
||||
|
if (titleEl != null) { |
||||
|
title = titleEl.text().trim(); |
||||
|
} |
||||
|
|
||||
|
String content = ""; |
||||
|
Element contentEl = link.selectFirst("p.l3.ps3"); |
||||
|
if (contentEl != null) { |
||||
|
content = contentEl.text().trim(); |
||||
|
} |
||||
|
|
||||
|
if (!title.isEmpty()) { |
||||
|
articles.add(new Article(title, articleUrl, content)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class NewsStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("news.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements items = doc.select(".article-headline"); |
||||
|
for (Element e : items) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,162 @@ |
|||||
|
package com.example.datacollect.util; |
||||
|
|
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Arrays; |
||||
|
import java.util.List; |
||||
|
import java.util.concurrent.Callable; |
||||
|
import java.util.concurrent.CopyOnWriteArrayList; |
||||
|
import java.util.concurrent.TimeUnit; |
||||
|
import java.util.function.Supplier; |
||||
|
|
||||
|
public final class RetryUtils { |
||||
|
|
||||
|
private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class); |
||||
|
private static final long DEFAULT_BASE_WAIT_MS = 500L; |
||||
|
private static final long DEFAULT_MAX_WAIT_MS = 30000L; |
||||
|
private static final int DEFAULT_MAX_RETRIES = 3; |
||||
|
|
||||
|
private RetryUtils() { |
||||
|
} |
||||
|
|
||||
|
public static <T> RetryResult<T> executeWithRetry(Callable<T> callable, Class<? extends Exception>... retryableExceptions) { |
||||
|
return executeWithRetry(callable, DEFAULT_MAX_RETRIES, DEFAULT_BASE_WAIT_MS, DEFAULT_MAX_WAIT_MS, retryableExceptions); |
||||
|
} |
||||
|
|
||||
|
public static <T> RetryResult<T> executeWithRetry(Callable<T> callable, int maxRetries, long baseWaitMs, long maxWaitMs, Class<? extends Exception>... retryableExceptions) { |
||||
|
List<ExceptionInfo> exceptionHistory = new CopyOnWriteArrayList<>(); |
||||
|
Class<? extends Exception>[] allowedExceptions = retryableExceptions.length > 0 ? retryableExceptions : new Class[]{Exception.class}; |
||||
|
|
||||
|
for (int attempt = 0; attempt <= maxRetries; attempt++) { |
||||
|
try { |
||||
|
T result = callable.call(); |
||||
|
if (attempt > 0) { |
||||
|
logger.info("Retry succeeded on attempt {}", attempt); |
||||
|
} |
||||
|
return new RetryResult<>(result, attempt, null, exceptionHistory); |
||||
|
} catch (Exception e) { |
||||
|
exceptionHistory.add(new ExceptionInfo(attempt, e)); |
||||
|
|
||||
|
boolean isRetryable = isRetryable(e, allowedExceptions); |
||||
|
if (!isRetryable) { |
||||
|
logger.warn("Non-retryable exception on attempt {}: {}", attempt, e.getMessage()); |
||||
|
return new RetryResult<>(null, attempt, e, exceptionHistory); |
||||
|
} |
||||
|
|
||||
|
if (attempt >= maxRetries) { |
||||
|
logger.warn("Max retries ({}) reached. Last exception: {}", maxRetries, e.getMessage()); |
||||
|
return new RetryResult<>(null, attempt, e, exceptionHistory); |
||||
|
} |
||||
|
|
||||
|
long waitTime = calculateWaitTime(attempt, baseWaitMs, maxWaitMs); |
||||
|
logger.info("Attempt {} failed: {}. Waiting {} ms before retry...", attempt, e.getMessage(), waitTime); |
||||
|
|
||||
|
try { |
||||
|
TimeUnit.MILLISECONDS.sleep(waitTime); |
||||
|
} catch (InterruptedException ie) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
return new RetryResult<>(null, attempt, new InterruptedException("Retry interrupted"), exceptionHistory); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return new RetryResult<>(null, maxRetries, new RuntimeException("Unexpected retry failure"), exceptionHistory); |
||||
|
} |
||||
|
|
||||
|
public static void executeWithRetry(Runnable runnable, Class<? extends Exception>... retryableExceptions) { |
||||
|
executeWithRetry(() -> { |
||||
|
runnable.run(); |
||||
|
return null; |
||||
|
}, retryableExceptions); |
||||
|
} |
||||
|
|
||||
|
public static void executeWithRetry(Runnable runnable, int maxRetries, long baseWaitMs, long maxWaitMs, Class<? extends Exception>... retryableExceptions) { |
||||
|
executeWithRetry(() -> { |
||||
|
runnable.run(); |
||||
|
return null; |
||||
|
}, maxRetries, baseWaitMs, maxWaitMs, retryableExceptions); |
||||
|
} |
||||
|
|
||||
|
private static long calculateWaitTime(int attempt, long baseWaitMs, long maxWaitMs) { |
||||
|
long waitTime = (long) (baseWaitMs * Math.pow(2, attempt)); |
||||
|
return Math.min(waitTime, maxWaitMs); |
||||
|
} |
||||
|
|
||||
|
private static boolean isRetryable(Exception e, Class<? extends Exception>[] allowedExceptions) { |
||||
|
for (Class<? extends Exception> allowed : allowedExceptions) { |
||||
|
if (allowed.isInstance(e)) { |
||||
|
return true; |
||||
|
} |
||||
|
} |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
public static final class RetryResult<T> { |
||||
|
private final T result; |
||||
|
private final int attemptCount; |
||||
|
private final Exception finalException; |
||||
|
private final List<ExceptionInfo> exceptionHistory; |
||||
|
|
||||
|
private RetryResult(T result, int attemptCount, Exception finalException, List<ExceptionInfo> exceptionHistory) { |
||||
|
this.result = result; |
||||
|
this.attemptCount = attemptCount; |
||||
|
this.finalException = finalException; |
||||
|
this.exceptionHistory = new ArrayList<>(exceptionHistory); |
||||
|
} |
||||
|
|
||||
|
public T getResult() { |
||||
|
return result; |
||||
|
} |
||||
|
|
||||
|
public int getAttemptCount() { |
||||
|
return attemptCount; |
||||
|
} |
||||
|
|
||||
|
public Exception getFinalException() { |
||||
|
return finalException; |
||||
|
} |
||||
|
|
||||
|
public List<ExceptionInfo> getExceptionHistory() { |
||||
|
return new ArrayList<>(exceptionHistory); |
||||
|
} |
||||
|
|
||||
|
public boolean isSuccess() { |
||||
|
return finalException == null; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return String.format("RetryResult{success=%s, attempts=%d, finalException=%s, historySize=%d}", |
||||
|
isSuccess(), attemptCount, finalException != null ? finalException.getMessage() : "none", exceptionHistory.size()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static final class ExceptionInfo { |
||||
|
private final int attempt; |
||||
|
private final String message; |
||||
|
private final String exceptionType; |
||||
|
|
||||
|
public ExceptionInfo(int attempt, Exception e) { |
||||
|
this.attempt = attempt; |
||||
|
this.message = e.getMessage(); |
||||
|
this.exceptionType = e.getClass().getName(); |
||||
|
} |
||||
|
|
||||
|
public int getAttempt() { |
||||
|
return attempt; |
||||
|
} |
||||
|
|
||||
|
public String getMessage() { |
||||
|
return message; |
||||
|
} |
||||
|
|
||||
|
public String getExceptionType() { |
||||
|
return exceptionType; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return String.format("[Attempt %d] %s: %s", attempt, exceptionType, message); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,49 @@ |
|||||
|
package com.example.datacollect.util; |
||||
|
|
||||
|
import com.example.datacollect.exception.UrlFormatException; |
||||
|
import java.net.MalformedURLException; |
||||
|
import java.net.URL; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
public final class UrlValidator { |
||||
|
|
||||
|
private static final String URL_REGEX = "^https?://[a-zA-Z0-9][a-zA-Z0-9\\-.]*\\.[a-zA-Z]{2,}(/.*)?$"; |
||||
|
private static final Pattern URL_PATTERN = Pattern.compile(URL_REGEX); |
||||
|
|
||||
|
private UrlValidator() { |
||||
|
} |
||||
|
|
||||
|
public static void validate(String url) { |
||||
|
if (url == null || url.trim().isEmpty()) { |
||||
|
throw new UrlFormatException("URL cannot be null or empty", url); |
||||
|
} |
||||
|
|
||||
|
String trimmedUrl = url.trim(); |
||||
|
|
||||
|
if (!URL_PATTERN.matcher(trimmedUrl).matches()) { |
||||
|
throw new UrlFormatException("URL format is invalid: does not match expected pattern", trimmedUrl); |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
new URL(trimmedUrl); |
||||
|
} catch (MalformedURLException e) { |
||||
|
throw new UrlFormatException("URL is malformed and cannot be parsed", trimmedUrl, e); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public static boolean isValid(String url) { |
||||
|
if (url == null || url.trim().isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
String trimmedUrl = url.trim(); |
||||
|
if (!URL_PATTERN.matcher(trimmedUrl).matches()) { |
||||
|
return false; |
||||
|
} |
||||
|
try { |
||||
|
new URL(trimmedUrl); |
||||
|
return true; |
||||
|
} catch (MalformedURLException e) { |
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,42 @@ |
|||||
|
package com.example.datacollect.view; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class ConsoleView { |
||||
|
private static final String ANSI_RESET = "\u001B[0m"; |
||||
|
private static final String ANSI_GREEN = "\u001B[32m"; |
||||
|
private static final String ANSI_RED = "\u001B[31m"; |
||||
|
private static final String ANSI_BLUE = "\u001B[34m"; |
||||
|
|
||||
|
private final Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
public String readLine() { |
||||
|
System.out.print("> "); |
||||
|
return scanner.nextLine(); |
||||
|
} |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printError(String msg) { |
||||
|
System.out.println(ANSI_RED + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printInfo(String msg) { |
||||
|
System.out.println(ANSI_BLUE + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void display(List<Article> articles) { |
||||
|
if (articles.isEmpty()) { |
||||
|
printInfo("暂无文章,请先执行 crawl。"); |
||||
|
return; |
||||
|
} |
||||
|
for (int i = 0; i < articles.size(); i++) { |
||||
|
Article a = articles.get(i); |
||||
|
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,63 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<configuration scan="true" scanPeriod="30 seconds"> |
||||
|
|
||||
|
<property name="LOG_HOME" value="logs"/> |
||||
|
<property name="APP_NAME" value="datacollect-cli"/> |
||||
|
<property name="LOG_PATTERN" value="%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n"/> |
||||
|
|
||||
|
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder> |
||||
|
<pattern>${LOG_PATTERN}</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<file>${LOG_HOME}/${APP_NAME}.log</file> |
||||
|
<encoder> |
||||
|
<pattern>${LOG_PATTERN}</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
|
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<fileNamePattern>${LOG_HOME}/${APP_NAME}-%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
||||
|
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
||||
|
<maxFileSize>100MB</maxFileSize> |
||||
|
</timeBasedFileNamingAndTriggeringPolicy> |
||||
|
<maxHistory>30</maxHistory> |
||||
|
<totalSizeCap>3GB</totalSizeCap> |
||||
|
</rollingPolicy> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="ASYNC_CONSOLE" class="ch.qos.logback.classic.AsyncAppender"> |
||||
|
<queueSize>512</queueSize> |
||||
|
<discardingThreshold>0</discardingThreshold> |
||||
|
<includeCallerData>false</includeCallerData> |
||||
|
<neverBlock>true</neverBlock> |
||||
|
<appender-ref ref="CONSOLE"/> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="ASYNC_FILE" class="ch.qos.logback.classic.AsyncAppender"> |
||||
|
<queueSize>1024</queueSize> |
||||
|
<discardingThreshold>0</discardingThreshold> |
||||
|
<includeCallerData>true</includeCallerData> |
||||
|
<neverBlock>false</neverBlock> |
||||
|
<appender-ref ref="FILE"/> |
||||
|
</appender> |
||||
|
|
||||
|
<logger name="com.example.datacollect" level="DEBUG" additivity="false"> |
||||
|
<appender-ref ref="ASYNC_CONSOLE"/> |
||||
|
<appender-ref ref="ASYNC_FILE"/> |
||||
|
</logger> |
||||
|
|
||||
|
<logger name="org.jsoup" level="WARN" additivity="false"> |
||||
|
<appender-ref ref="ASYNC_CONSOLE"/> |
||||
|
<appender-ref ref="ASYNC_FILE"/> |
||||
|
</logger> |
||||
|
|
||||
|
<root level="INFO"> |
||||
|
<appender-ref ref="ASYNC_CONSOLE"/> |
||||
|
<appender-ref ref="ASYNC_FILE"/> |
||||
|
</root> |
||||
|
|
||||
|
</configuration> |
||||
@ -0,0 +1,758 @@ |
|||||
|
--- |
||||
|
|
||||
|
# 教案:《高级程序设计》第9周——工程架构:从"写代码"到"造系统" |
||||
|
|
||||
|
| 项目 | 内容 | |
||||
|
|------|------| |
||||
|
| **课程名称** | 高级程序设计 | |
||||
|
| **周次** | 第9周 | |
||||
|
| **主题** | 工程架构——从"写代码"到"造系统" | |
||||
|
| **学时** | 2学时(90分钟) | |
||||
|
| **授课对象** | 具备Python基础、已完成Java面向对象特性学习的学生 | |
||||
|
| **教学环境** | JDK 17+、IntelliJ IDEA、Maven(模板) | |
||||
|
| **前情提要** | 本课程原计划使用JavaFX GUI,后根据教学反馈转向CLI + MVC + 爬虫工程化 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 教学调整说明:为什么选择CLI而不是GUI? |
||||
|
|
||||
|
> **原计划**:JavaFX桌面应用 → **新计划**:CLI命令行应用 |
||||
|
|
||||
|
| 维度 | GUI (JavaFX) | CLI (命令行) | |
||||
|
|------|--------------|-------------| |
||||
|
| **学习重心** | 布局、控件、事件监听 | 架构、分层、命令路由 | |
||||
|
| **学生痛点** | "窗口点击"与后端能力无关 | 真正锻炼工程思维 | |
||||
|
| **AI辅助** | AI生成FXML,学生看不懂 | AI辅助重构架构 | |
||||
|
| **工程化** | 脱离真实后端开发场景 | 模拟真实服务器/大数据开发 | |
||||
|
| **核心转型** | "视觉装饰"优先 | "逻辑架构"优先 | |
||||
|
|
||||
|
**决策理由**: |
||||
|
1. **985学生需要的是工程思维**,不是拖控件 |
||||
|
2. **接口抽象**是弱项,CLI + MVC更能暴露这个问题 |
||||
|
3. **彩色终端**足够酷炫,且代码量可控 |
||||
|
|
||||
|
**更深层的教育价值**: |
||||
|
> 在GUI框架中,架构已被框架强制划定,学生只是"遵守规矩";而CLI世界里没有任何框架告诉你模型在哪、视图在哪——**当外部约束消失,内部的工程纪律才真正建立**。这正是本节课要传递的核心精神。 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 一、教学目标 |
||||
|
|
||||
|
| 目标维度 | 具体描述 | |
||||
|
|----------|----------| |
||||
|
| **知识掌握** | 理解MVC架构的职责划分及其演化脉络;掌握Maven项目结构与pom.xml基础;理解Command模式的路由原理。 | |
||||
|
| **工程实践** | 能搭建规范的Maven项目包结构;能实现基于Scanner的控制台交互;能用Command接口实现可扩展的命令路由;能识别架构中的"越权行为"。 | |
||||
|
| **思维转型** | 从"一个类写全部"转向"分层解耦";从"修改现有代码"转向"新增类实现功能";从"满足功能"转向"代码的工程洁癖"。 | |
||||
|
| **工具应用** | 利用AI辅助审查MVC职责越权;让AI扮演"架构审计师"检查分层是否清晰;理解AI生成代码中的架构缺陷。 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 二、教学重点与难点 |
||||
|
|
||||
|
| 项目 | 内容 | 突破方法 | |
||||
|
|------|------|----------| |
||||
|
| **重点** | MVC三层职责划分、CLI交互实现、Command接口解耦、代码中的工程细节(常量、输出归属) | 以"新增命令需要改什么"为切入点,展示Command模式的优势;通过现场"代码找茬"强化细节意识 | |
||||
|
| **难点** | Controller不写业务逻辑、Command接口的多态实现、共享数据模型的设计缺陷识别 | 现场演示:增加一个命令只需新建类,无需修改Controller;暴露`List<Article>`共享引用的问题并预告解决方案 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 三、教学过程设计(90分钟) |
||||
|
|
||||
|
| 环节 | 时间 | 教学内容 | 师生活动 | AI协同点 | |
||||
|
|------|------|----------|----------|----------| |
||||
|
| **1. 痛点引入:从脚本到工程的鸿沟** | 10' | 展示"意大利面"式爬虫代码,演示改一处需要动全身 | **教师演示**:现场展示一段混乱代码,让学生找问题 | 用AI分析代码耦合度 | |
||||
|
| **2. CLI vs GUI:架构选择的思考** | 10' | 对比两种方案的优缺点,解释为什么CLI更适合培养工程思维 | **教师讲解**:用对比表格说明选择CLI的理由 | — | |
||||
|
| **3. MVC分层设计** | 20' | 讲解Model/View/Controller三层职责,用"餐厅类比"强化理解,随后批判类比局限性 | **教师讲解**:配合架构图讲解三层交互,引导学生寻找类比破绽 | 用AI生成MVC职责对照表 | |
||||
|
| **4. Command模式:可扩展的命令路由** | 15' | 引入Command接口,解释"一个命令就是一个类" | **类比**:Command像酒店的服务部门,Controller是前台 | 让AI解释Command模式的多态原理 | |
||||
|
| **5. Maven模板与环境** | 5' | 直接使用提供的Maven模板,讲解目录结构 | **教师演示**:解压模板 → IDEA打开 → 运行 | — | |
||||
|
| **6. 三层代码落地** | 20' | **Model**:Article实体<br>**View**:ConsoleView(ANSI常量)<br>**Command接口**+实现<br>**Controller**:Map路由 | **教师演示**:分步写出代码,刻意埋入1~2个"越权细节"让学生找茬 | 学生用AI做"架构审计" | |
||||
|
| **7. 架构反思与展望** | 5' | 指出当前`List<Article>`共享引用的问题,预告W10策略模式与仓库层 | **师生互动**:你发现这个设计有什么风险? | 让AI分析共享可变状态的危害 | |
||||
|
| **8. 实践任务:空壳程序** | 5' | 搭建完整包结构,实现CLI循环 | 学生现场编码,教师巡视 | 完成后用AI检查包结构 | |
||||
|
| **9. 总结与过渡** | 5' | 本周实现了"骨架+命令可扩展",下周填入"灵魂"——解析器,并解决数据安全问题 | 总结Command模式优势,预告策略模式 | — | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 四、核心教学内容脚本 |
||||
|
|
||||
|
### 4.1 痛点引入:从脚本到工程的鸿沟(10分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "同学们,前8周我们学的是Java语法,从变量到类,从继承到接口。但有一个问题:代码写完之后,怎么组织?" |
||||
|
> |
||||
|
> "来看这段代码——这是某个同学写的'爬虫',他一个人完成了一个'完整'的项目。" |
||||
|
|
||||
|
**展示"脚本式"代码**: |
||||
|
```java |
||||
|
public class Crawler { |
||||
|
public static void main(String[] args) { |
||||
|
System.out.print("请输入URL: "); |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
String url = scanner.nextLine(); |
||||
|
|
||||
|
List titles = new ArrayList(); |
||||
|
try { |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
Elements elements = doc.select(".post-title"); |
||||
|
for (Element e : elements) { |
||||
|
String title = e.text(); |
||||
|
System.out.println("标题: " + title); |
||||
|
titles.add(title); |
||||
|
} |
||||
|
} catch (Exception ex) { |
||||
|
System.out.println("出错啦: " + ex.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**提问引导**: |
||||
|
1. "如果我想把标题保存到文件,要改哪里?" |
||||
|
2. "如果我想支持另一个网站,它的HTML结构不一样,要怎么办?" |
||||
|
3. "如果我想让输出变成彩色,要改哪里?" |
||||
|
|
||||
|
**痛点提炼**: |
||||
|
> "看到了吗?才60行代码,已经'牵一发而动全身'了。这就是一个'脚本'的宿命——功能全混在一起,改一个小需求,整个文件都要翻。" |
||||
|
> |
||||
|
> "这周我们要解决:**怎么让代码'改起来不疼'?**" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.2 CLI vs GUI:架构选择的思考(10分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "既然要写一个'完整'的爬虫应用,我们有两个选择:图形界面(GUI)或命令行界面(CLI)。为什么我推荐CLI而不是GUI?" |
||||
|
|
||||
|
**对比表格** |
||||
|
|
||||
|
| 维度 | GUI (JavaFX) | CLI (命令行) | |
||||
|
|------|--------------|-------------| |
||||
|
| **代码量** | FXML + Controller + CSS,大量模板代码 | 纯Java,代码量可控 | |
||||
|
| **学习重心** | 布局、控件、事件监听 | 架构、分层、命令路由 | |
||||
|
| **后端能力** | 几乎无关 | 模拟真实服务器开发 | |
||||
|
| **可测试性** | 难(需要UI测试框架) | 易(直接测试Command类) | |
||||
|
| **工程思维** | 弱(关注视觉) | 强(关注逻辑) | |
||||
|
|
||||
|
**核心观点**: |
||||
|
> **CLI更需要MVC!** GUI有现成的事件系统(点击按钮→触发事件),而CLI只有字符流。**没有架构,分分钟写成脚本**。MVC在CLI里是"刚需",不是"装饰"。 |
||||
|
> |
||||
|
> **更深一层**:在GUI里,框架已经硬塞给你一套架构,你只是在填空;但在CLI里,所有结构都必须由你亲手搭建。**当外部约束消失,内部的工程纪律才真正开始建立**——这才是本节课的真正目的。 |
||||
|
|
||||
|
**CLI也能很酷**: |
||||
|
- ANSI彩色输出(红/绿/黄/蓝) |
||||
|
- 表格展示数据 |
||||
|
- 进度条动画 |
||||
|
- 模拟真实大数据开发场景 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.3 MVC分层设计(20分钟) |
||||
|
|
||||
|
#### 4.3.1 MVC的起源与演进 |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "MVC不是新东西,它是1970年代为桌面应用设计的架构思想。但它的核心——'职责分离'——在任何软件里都适用。" |
||||
|
|
||||
|
| 年代 | 场景 | MVC的角色 | |
||||
|
|------|------|----------| |
||||
|
| 1970s | Smalltalk-72 GUI | 最早的用户界面架构 | |
||||
|
| 1990s | Web开发 (Struts) | 后端模板引擎 | |
||||
|
| 2000s | ASP.NET MVC | 现代Web框架 | |
||||
|
| 2020s | CLI + API | 解耦业务逻辑与表现层 | |
||||
|
|
||||
|
#### 4.3.2 从GUI到CLI的映射 |
||||
|
|
||||
|
| GUI组件 | CLI对应 | 说明 | |
||||
|
|--------|--------|------| |
||||
|
| 窗口/按钮 | 命令行输入 | **View = 用户交互** | |
||||
|
| 数据模型 | Article实体类 | **Model = 数据结构** | |
||||
|
| 事件监听 | Command路由 | **Controller = 调度** | |
||||
|
|
||||
|
#### 4.3.3 MVC三层职责 |
||||
|
|
||||
|
**架构图示**: |
||||
|
|
||||
|
``` |
||||
|
┌─────────────────────────────────────────┐ |
||||
|
│ 入口 │ |
||||
|
│ (main方法) │ |
||||
|
└─────────────────┬───────────────────────┘ |
||||
|
│ |
||||
|
▼ |
||||
|
┌─────────────────────────────────────────┐ |
||||
|
│ Controller │ |
||||
|
│ - 接收命令(crawl, help, exit) │ |
||||
|
│ - 分发给对应的Command │ |
||||
|
│ 【口诀】:Controller不管"怎么做", │ |
||||
|
│ 只管"派给谁" │ |
||||
|
└─────────┬───────────────┬───────────────┘ |
||||
|
│ │ |
||||
|
▼ ▼ |
||||
|
┌─────────────────┐ ┌─────────────────┐ |
||||
|
│ Model │ │ View │ |
||||
|
│ - 数据实体 │ │ - 输入解析 │ |
||||
|
│ - 业务逻辑 │ │ - 输出格式化 │ |
||||
|
│ 【口诀】: │ │ 【口诀】: │ |
||||
|
│ Model管"数据" │ │ View管"呈现" │ |
||||
|
└─────────────────┘ └─────────────────┘ |
||||
|
``` |
||||
|
|
||||
|
**三层职责详解** |
||||
|
|
||||
|
| 层级 | 职责 | 典型代码 | 禁止做什么 | |
||||
|
|------|------|----------|------------| |
||||
|
| **Model** | 数据结构 + 业务逻辑 | `class Article { String title; String content; }` | 不能有`System.out.println`,不能有`Scanner` | |
||||
|
| **View** | 接收用户输入 + 格式化输出 | `class ConsoleView { String readInput(); void print(String); }` | 不能写爬虫逻辑,只做"传声筒" | |
||||
|
| **Controller** | 协调调度 | `class CrawlerController { void handle(String cmd) { ... } }` | 不能直接写业务细节,委托给Command | |
||||
|
|
||||
|
#### 4.3.4 类比强化:"餐厅类比" |
||||
|
|
||||
|
> "把MVC想象成一家餐厅: |
||||
|
> - **Model是后厨**:只管做菜(数据加工),不管谁来吃、怎么端 |
||||
|
> - **View是服务员**:只管端菜和收钱(输入输出),不管菜怎么做 |
||||
|
> - **Controller是前台**:只管把顾客的点单传给后厨,把做好的菜端给顾客 |
||||
|
> |
||||
|
> 如果后厨开始管'谁来吃饭',这餐厅就乱了。" |
||||
|
|
||||
|
#### 4.3.5 对"餐厅类比"的批判性思考(关键!) |
||||
|
|
||||
|
**教师导引**: |
||||
|
> "刚才的类比好理解吗?很好。但任何一个类比都有它的边界,如果把它当成真理,就会出问题。现在我们来给这个类比'找茬'。" |
||||
|
|
||||
|
**提问学生**: |
||||
|
1. "后厨真的完全不知道客人是谁吗?如果客人有忌口(比如不吃香菜),这个信息需不需要传到后厨?" |
||||
|
2. "服务员只是端菜吗?在真实餐厅里,服务员经常向后厨反馈'客人觉得今天的菜咸了',这属于View→Model的反向影响吗?" |
||||
|
3. "在这个类比里,我们把前台(Controller)和后厨(Model)的关系说成单向的。但实际上,后厨做完了菜,需要通知前台'菜好了',这不就是**观察者模式**吗?" |
||||
|
|
||||
|
**点明本质**: |
||||
|
> "实际MVC的数据流向常常是**双向**的:Controller调用Model的方法改变数据,Model变化后又通知View更新显示。只不过在本次CLI项目中,我们暂时使用'请求-响应'的单向简化模型——用户输入命令,系统处理,然后立即输出结果。这个简化版够用,但你要知道完整的MVC是更动态的。随着系统复杂,Model层需要一个专门的'仓库类'来管理数据,并通知视图刷新——这正是W10我们将要深入的内容。" |
||||
|
|
||||
|
#### 4.3.6 MVC的数据流向(本课程简化版) |
||||
|
|
||||
|
``` |
||||
|
CLI用户输入 |
||||
|
↓ |
||||
|
View(解析命令字符串) |
||||
|
↓ |
||||
|
Controller(找到对应Command) |
||||
|
↓ |
||||
|
Command.execute()(执行业务逻辑) |
||||
|
↓ |
||||
|
Model(Article数据,目前暂存于List) |
||||
|
↓ |
||||
|
View(display()展示数据) |
||||
|
↓ |
||||
|
CLI终端显示 |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.4 Command模式:可扩展的命令路由(15分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在引入一个设计模式——Command(命令)模式。它的核心思想是:**一个命令就是一个类**。" |
||||
|
|
||||
|
#### 4.4.1 为什么需要Command模式? |
||||
|
|
||||
|
**演示:增加一个命令的代价(switch-case版)** |
||||
|
```java |
||||
|
// 现状代码 |
||||
|
switch (cmd) { |
||||
|
case "crawl": handleCrawl(); break; |
||||
|
case "help": showHelp(); break; |
||||
|
// 如果要增加 list 命令? |
||||
|
// 1. 加 case "list" |
||||
|
// 2. 加 handleList() 方法 |
||||
|
// 3. 可能还要改其他地方... |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**提问**: |
||||
|
- "如果我想增加10个命令,这个类要改多少次?" |
||||
|
- "如果我不小心删了一个case,整个程序还能跑吗?" |
||||
|
|
||||
|
**痛点提炼**: |
||||
|
> "每加一个功能,就要在这个类里戳一个洞。**这就是'肥控制器'陷阱**——所有的逻辑都堆在Controller里,它变成了新的'意大利面'。" |
||||
|
|
||||
|
#### 4.4.2 Command模式的四个要素 |
||||
|
|
||||
|
| 要素 | 角色 | 示例 | |
||||
|
|------|------|------| |
||||
|
| **Command接口** | 抽象的"订单" | `Command` 接口 | |
||||
|
| **ConcreteCommand** | 具体的订单 | `HelpCommand`、`CrawlCommand` | |
||||
|
| **Invoker** | 接单的前台 | `CrawlerController` | |
||||
|
| **Receiver** | 执行者 | `ConsoleView`、`ArticleRepository` | |
||||
|
|
||||
|
#### 4.4.3 Command接口定义 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/command/Command.java |
||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); // 命令名,如 "crawl" |
||||
|
void execute(String[] args, List<Article> articles); // 执行逻辑 |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 4.4.4 Controller的变革(从switch到Map) |
||||
|
|
||||
|
```java |
||||
|
// 修改后的Controller |
||||
|
public class CrawlerController { |
||||
|
private Map<String, Command> commands; // 用Map存命令 |
||||
|
private ConsoleView view; // 持有View以输出错误 |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, List<Article> articles) { |
||||
|
this.view = view; |
||||
|
this.commands = new HashMap<>(); |
||||
|
// 增加命令无需改Controller代码,只需在这里注册 |
||||
|
commands.put("crawl", new CrawlCommand(view)); |
||||
|
commands.put("help", new HelpCommand(view)); |
||||
|
commands.put("list", new ListCommand(view)); |
||||
|
commands.put("exit", new ExitCommand(view)); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
if (input.isEmpty()) return; |
||||
|
String[] parts = input.split("\\s+"); |
||||
|
String cmd = parts[0].toLowerCase(); |
||||
|
|
||||
|
Command command = commands.get(cmd); |
||||
|
if (command == null) { |
||||
|
view.printError("Unknown command: " + cmd); // 通过View输出,而非直接System.out |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// 执行命令,传入参数和文章列表 |
||||
|
command.execute(parts, articles); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**对比表格** |
||||
|
|
||||
|
| 维度 | switch-case | Command模式 | |
||||
|
|------|-------------|-------------| |
||||
|
| 增加命令 | 要改Controller | 新建一个类 | |
||||
|
| 多态体验 | 无 | execute()的多态调用 | |
||||
|
| 可测试性 | 难 | 每个Command可单独测试 | |
||||
|
| 代码量 | 少 | 多,但更清晰 | |
||||
|
|
||||
|
**类比强化**: |
||||
|
> "Command模式就像**酒店的客房服务**:每个服务(清理、送餐、按摩)都是一个独立的部门。前台(Controller)只负责接电话,然后把请求'派发'给对应的部门。部门自己知道怎么干活,不需要前台教。" |
||||
|
> |
||||
|
> "如果想新增一个服务,前台只需要'登记'一下,不需要把现有部门重新装修。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.5 Maven模板与环境(5分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "这周我们不发愁pom.xml配置。我已经把 Maven 模板准备好了,你们只需要解压、打开、运行。" |
||||
|
|
||||
|
**模板使用流程**: |
||||
|
``` |
||||
|
1. 解压 [my-crawler-template.zip] |
||||
|
2. 用 IDEA 打开文件夹 |
||||
|
3. 右键 pom.xml → Maven → Reload Project |
||||
|
4. 运行 App.java |
||||
|
``` |
||||
|
|
||||
|
**标准目录结构**: |
||||
|
``` |
||||
|
src/main/java/com/crawler/ |
||||
|
├── model/ |
||||
|
│ └── Article.java |
||||
|
├── view/ |
||||
|
│ └── ConsoleView.java |
||||
|
├── command/ |
||||
|
│ ├── Command.java (接口) |
||||
|
│ ├── CrawlCommand.java |
||||
|
│ ├── HelpCommand.java |
||||
|
│ ├── ListCommand.java |
||||
|
│ └── ExitCommand.java |
||||
|
└── controller/ |
||||
|
└── CrawlerController.java |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.6 代码落地(20分钟) |
||||
|
|
||||
|
#### 4.6.1 Model层:Article实体 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/model/Article.java |
||||
|
package com.crawler.model; |
||||
|
|
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
|
||||
|
public Article(String title, String url, String content) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { return title; } |
||||
|
public void setTitle(String title) { this.title = title; } |
||||
|
public String getUrl() { return url; } |
||||
|
public void setUrl(String url) { this.url = url; } |
||||
|
public String getContent() { return content; } |
||||
|
public void setContent(String content) { this.content = content; } |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{title='" + title + "', url='" + url + "'}"; |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 4.6.2 View层:ANSI常量集中管理(工程细节!) |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/view/ConsoleView.java |
||||
|
package com.crawler.view; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class ConsoleView { |
||||
|
// ANSI颜色常量——集中管理,避免散落各处 |
||||
|
private static final String ANSI_GREEN = "\033[32m"; |
||||
|
private static final String ANSI_RED = "\033[31m"; |
||||
|
private static final String ANSI_CYAN = "\033[36m"; |
||||
|
private static final String ANSI_RESET = "\033[0m"; |
||||
|
|
||||
|
private Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
public String readLine() { |
||||
|
System.out.print("crawler> "); |
||||
|
return scanner.nextLine().trim(); |
||||
|
} |
||||
|
|
||||
|
public void print(String msg) { |
||||
|
System.out.println(msg); |
||||
|
} |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
print(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printError(String msg) { |
||||
|
print(ANSI_RED + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printInfo(String msg) { |
||||
|
print(ANSI_CYAN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
// 展示文章列表 |
||||
|
public void display(List<Article> articles) { |
||||
|
if (articles.isEmpty()) { |
||||
|
printInfo("No articles yet. Use 'crawl <url>' first."); |
||||
|
return; |
||||
|
} |
||||
|
print("+----------+--------------------------------+"); |
||||
|
print("| Title | URL |"); |
||||
|
print("+----------+--------------------------------+"); |
||||
|
for (Article a : articles) { |
||||
|
String title = a.getTitle(); |
||||
|
if (title.length() > 10) title = title.substring(0, 10) + ".."; |
||||
|
String url = a.getUrl(); |
||||
|
if (url.length() > 30) url = url.substring(0, 27) + "..."; |
||||
|
print("| " + String.format("%-10s", title) + " | " + url + " |"); |
||||
|
} |
||||
|
print("+----------+--------------------------------+"); |
||||
|
printInfo("Total: " + articles.size() + " articles"); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师提示**: |
||||
|
> "注意:所有ANSI转义码都被定义为`private static final`常量。如果把`\033[32m`散落在项目各处,一旦想调整颜色,就得满世界去改——这正是我们之前痛批的'意大利面'。**这就是工程细节**。" |
||||
|
|
||||
|
#### 4.6.3 Command接口与四个实现(全部通过View输出) |
||||
|
|
||||
|
```java |
||||
|
// Command.java |
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, List<Article> articles); |
||||
|
} |
||||
|
|
||||
|
// HelpCommand.java |
||||
|
public class HelpCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public HelpCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "help"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printInfo("Commands: crawl <url>, list, help, exit"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ListCommand.java |
||||
|
public class ListCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ListCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "list"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.display(articles); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// CrawlCommand.java (存根) |
||||
|
public class CrawlCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public CrawlCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "crawl"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
view.printInfo("Stub: Would crawl " + args[1]); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ExitCommand.java |
||||
|
public class ExitCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ExitCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "exit"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printSuccess("Bye!"); // 全部输出都通过View,绝不让System.out直接出现在这里 |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**故意埋设的"找茬点"**: |
||||
|
> "我在刚才的代码里有没有隐藏违反MVC原则的地方?`CrawlCommand`的存根里,`view.printInfo("Stub: Would crawl " + args[1]);` —— 这个字符串拼接算是"业务逻辑"吗?留给大家用AI架构审计时讨论。 |
||||
|
|
||||
|
#### 4.6.4 Controller:Map路由(全部通过View输出) |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/controller/CrawlerController.java |
||||
|
package com.crawler.controller; |
||||
|
|
||||
|
import com.crawler.command.*; |
||||
|
import com.crawler.model.Article; |
||||
|
import com.crawler.view.ConsoleView; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.List; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
private Map<String, Command> commands = new HashMap<>(); |
||||
|
private ConsoleView view; // 持有View |
||||
|
private List<Article> articles; |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, List<Article> articles) { |
||||
|
this.view = view; |
||||
|
this.articles = articles; |
||||
|
commands.put("help", new HelpCommand(view)); |
||||
|
commands.put("list", new ListCommand(view)); |
||||
|
commands.put("crawl", new CrawlCommand(view)); |
||||
|
commands.put("exit", new ExitCommand(view)); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
if (input.isEmpty()) return; |
||||
|
String[] parts = input.split("\\s+"); |
||||
|
String cmdName = parts[0].toLowerCase(); |
||||
|
|
||||
|
Command cmd = commands.get(cmdName); |
||||
|
if (cmd == null) { |
||||
|
view.printError("Unknown command: " + cmdName); // 错误信息也走View! |
||||
|
return; |
||||
|
} |
||||
|
cmd.execute(parts, articles); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 4.6.5 main方法:组装 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/App.java |
||||
|
package com.crawler; |
||||
|
|
||||
|
import com.crawler.controller.CrawlerController; |
||||
|
import com.crawler.model.Article; |
||||
|
import com.crawler.view.ConsoleView; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class App { |
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
CrawlerController controller = new CrawlerController(view, articles); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler!"); |
||||
|
view.printInfo("Type 'help' for commands."); |
||||
|
|
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
#### 4.6.6 架构反思与展望:共享List<Article>的隐患(关键!) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在这个架构已经可用了。但请大家审视一下:我们所有的Command都直接拿到了`List<Article>`的引用。换句话说,任何一个命令都可以随意增、删、改这个列表。" |
||||
|
> |
||||
|
> "这就好像一家酒店,所有服务员、厨师、清洁工都能随意进出保险箱——**数据结构完全裸奔了**。" |
||||
|
|
||||
|
**提问**: |
||||
|
- "如果CrawlCommand不小心写错了代码,把一个null塞进articles,HelpCommand会不会受影响?" |
||||
|
- "如果未来我们要在添加文章时也写入日志文件,现在的设计能优雅实现吗?还是得在所有Command里分别加日志代码?" |
||||
|
|
||||
|
**预告解决方案**: |
||||
|
> "下周,我们将引入**策略模式**和一个真正的**Model仓库层(ArticleRepository)**。这个仓库会把`List`封装起来,对外只提供`add()`、`getAll()`等安全接口。任何命令想修改数据,都必须通过仓库。这就是从'数据结构'到'模型层'的进化——我们W9先搭骨架,W10给它装上盔甲。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.7 实践任务(5分钟) |
||||
|
|
||||
|
**任务要求**: |
||||
|
1. 使用Maven模板创建项目 |
||||
|
2. 实现完整包结构(model/view/command/controller) |
||||
|
3. 实现4个Command:help/list/crawl/exit |
||||
|
4. `list`命令能展示已抓取的文章 |
||||
|
5. 运行并测试循环 |
||||
|
6. **代码找茬(额外加分)**:找出你自己代码中是否存在`System.out`直接调用、硬编码ANSI字符串等"越权行为" |
||||
|
|
||||
|
**验收标准**: |
||||
|
- [x] Maven编译通过 |
||||
|
- [x] Command接口和4个实现分离在不同文件 |
||||
|
- [x] Controller里没有switch-case |
||||
|
- [x] 新增命令只需新建类,不改Controller |
||||
|
- [x] list命令能正确显示空列表 |
||||
|
- [x] 所有输出均通过ConsoleView完成,无直接System.out.println(main除外) |
||||
|
- [x] ANSI颜色码集中定义为View常量 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 五、课后作业 |
||||
|
|
||||
|
### 5.1 必做任务 |
||||
|
|
||||
|
1. **完善Article**:增加`author`、`publishDate`字段 |
||||
|
2. **★ HistoryCommand(强制作业)**: |
||||
|
- 实现`history`命令,记录用户输入过的所有命令 |
||||
|
- 使用`List<String>`存储历史(复习W8集合) |
||||
|
- 示例输出: |
||||
|
``` |
||||
|
crawler> history |
||||
|
1. help |
||||
|
2. list |
||||
|
3. crawl https://example.com |
||||
|
``` |
||||
|
3. **AI架构审计**:将类名和方法名发给AI,指令: |
||||
|
> "作为Java架构审计师,请检查我的MVC三层划分是否存在越权行为?Model层是否包含输入输出代码?View层是否越权写了业务逻辑?有没有地方直接使用了System.out或硬编码ANSI码?" |
||||
|
|
||||
|
### 5.2 选做任务 |
||||
|
|
||||
|
1. **命令别名**:给`crawl`增加别名`c`,`help`增加别名`h` |
||||
|
2. **URL验证**:检查URL格式是否以http://或https://开头 |
||||
|
3. **暗色主题**:实现不同的配色方案(利用View中的ANSI常量,只需修改一处即可) |
||||
|
4. **思考并回答**:分析`List<Article>`共享引用的潜在风险,写一段200字的小结 |
||||
|
|
||||
|
### 5.3 思考题 |
||||
|
|
||||
|
1. **Command vs switch-case**:增加10个命令,哪种方式代码改动量更小? |
||||
|
2. **如果不用Command接口,直接用Map存命令类行不行?** 接口的意义是什么? |
||||
|
3. **Controller里的`commands.put()`能否减少?** 提示:思考"注册机制" |
||||
|
4. **为什么ExitCommand里的`view.printSuccess("Bye!")`比直接`System.out.println`更"MVC"?** 提示:回忆View的职责 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 六、AI协同升级 |
||||
|
|
||||
|
### 架构审计师任务(必做) |
||||
|
|
||||
|
**学生执行步骤**: |
||||
|
1. 列出项目中所有类名(不含方法实现) |
||||
|
2. 将类名列表发给AI |
||||
|
3. 输入指令: |
||||
|
> "作为Java架构审计师,请检查我的MVC三层划分是否清晰。Model层是否包含了不应该有的代码(Scanner/System.out)?View层是否越权写了业务逻辑?请指出任何一处直接使用System.out.println的地方,并建议如何改正。" |
||||
|
|
||||
|
**预期AI输出**: |
||||
|
- 指出哪一层有越权行为 |
||||
|
- 建议如何整改 |
||||
|
- 评价整体架构健康度 |
||||
|
|
||||
|
### 进阶AI探究(选做) |
||||
|
|
||||
|
> "假设我的Command接口中execute方法接收了一个`List<Article>`参数,请分析这种设计在工程上有什么隐患,并给出重构建议。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 七、教学反思与调整记录 |
||||
|
|
||||
|
| 日期 | 事项 | 调整内容 | |
||||
|
|------|------|----------| |
||||
|
| 2026-04-28 | 首次编写 | 基于CLI+MVC重构 | |
||||
|
| 2026-04-30 | 教授反馈 | 引入Command模式、提供Maven模板、升级AI协同比 | |
||||
|
| 2026-04-30 | 逻辑重排 | 按"问题→选择→架构→模式"顺序重写 | |
||||
|
| 2026-05-01 | v2 vs V3合并 | 融合深度改进:增加教育哲学、批判性思考、ANSI常量、共享List隐患、故意埋坑 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 附录1:Maven模板说明 |
||||
|
|
||||
|
> 老师提供`my-crawler-template.zip`压缩包,包含: |
||||
|
> - pom.xml(含Jsoup依赖) |
||||
|
> - 空的src/main/java结构 |
||||
|
> - .gitignore |
||||
|
|
||||
|
## 附录2:常见问题速查 |
||||
|
|
||||
|
| 问题 | 解答 | |
||||
|
|------|------| |
||||
|
| IDEA不识别pom.xml | 右键 pom.xml → Maven → Reload Project | |
||||
|
| 中文乱码 | Settings → Editor → File Encodings → UTF-8 | |
||||
|
| 包名大小写 | 包名全小写,类名首字母大写 | |
||||
|
| Command找不到 | 检查是否 implements Command,是否 @Override getName() | |
||||
|
| 命令不生效 | 检查 commands.put() 是否注册了该命令 | |
||||
|
| 输出颜色乱码 | IDEA控制台需支持ANSI,Windows下建议使用Windows Terminal或调整设置 | |
||||
|
| 我的System.out为什么被老师说越权 | View层才是与用户交互的唯一出口,所有输出都应通过View,这样将来改成GUI或日志时只需改View | |
||||
|
|
||||
|
## 附录3:教学逻辑说明 |
||||
|
|
||||
|
| 顺序 | 内容 | 设计理由 | |
||||
|
|------|------|----------| |
||||
|
| 1 | 痛点引入 | 从问题出发,让学生感受"为什么需要架构" | |
||||
|
| 2 | CLI vs GUI | 解释技术选型,建立"工程思维 > 视觉装饰"的认知 | |
||||
|
| 3 | MVC分层 | 核心架构概念,理解职责分离,通过类比及批判加深理解 | |
||||
|
| 4 | Command模式 | 具体实现方式,解决"肥控制器"问题 | |
||||
|
| 5 | Maven | 工具链支持 | |
||||
|
| 6 | 代码落地 | 实践验证,刻意植入细节规范,训练工程洁癖 | |
||||
|
| 7 | 架构反思 | 暴露共享可变状态隐患,为W10策略模式+仓库层做铺垫 | |
||||
|
| 8 | 实践任务 | 现场编码验证 | |
||||
|
| 9 | 总结 | 强化认知,预告下周 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 版本说明 |
||||
|
|
||||
|
- **v1**:首次编写,CLI+MVC基础框架 |
||||
|
- **v2**:按"问题→选择→架构→模式"逻辑重排 |
||||
|
- **v3 (本版)**:融合v2结构 + V3深度改进,包含: |
||||
|
- 更深的CLI教育哲学 |
||||
|
- 餐厅类比批判性思考 |
||||
|
- ANSI常量集中管理工程细节 |
||||
|
- 全部输出走View |
||||
|
- 共享List架构隐患反思 |
||||
|
- 故意埋坑让学生找茬 |
||||
|
- W10铺垫(策略模式+仓库层) |
||||
@ -0,0 +1,63 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<configuration scan="true" scanPeriod="30 seconds"> |
||||
|
|
||||
|
<property name="LOG_HOME" value="logs"/> |
||||
|
<property name="APP_NAME" value="datacollect-cli"/> |
||||
|
<property name="LOG_PATTERN" value="%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n"/> |
||||
|
|
||||
|
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder> |
||||
|
<pattern>${LOG_PATTERN}</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<file>${LOG_HOME}/${APP_NAME}.log</file> |
||||
|
<encoder> |
||||
|
<pattern>${LOG_PATTERN}</pattern> |
||||
|
<charset>UTF-8</charset> |
||||
|
</encoder> |
||||
|
|
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<fileNamePattern>${LOG_HOME}/${APP_NAME}-%d{yyyy-MM-dd}.%i.log</fileNamePattern> |
||||
|
<timeBasedFileNamingAndTriggeringPolicy class="ch.qos.logback.core.rolling.SizeAndTimeBasedFNATP"> |
||||
|
<maxFileSize>100MB</maxFileSize> |
||||
|
</timeBasedFileNamingAndTriggeringPolicy> |
||||
|
<maxHistory>30</maxHistory> |
||||
|
<totalSizeCap>3GB</totalSizeCap> |
||||
|
</rollingPolicy> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="ASYNC_CONSOLE" class="ch.qos.logback.classic.AsyncAppender"> |
||||
|
<queueSize>512</queueSize> |
||||
|
<discardingThreshold>0</discardingThreshold> |
||||
|
<includeCallerData>false</includeCallerData> |
||||
|
<neverBlock>true</neverBlock> |
||||
|
<appender-ref ref="CONSOLE"/> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="ASYNC_FILE" class="ch.qos.logback.classic.AsyncAppender"> |
||||
|
<queueSize>1024</queueSize> |
||||
|
<discardingThreshold>0</discardingThreshold> |
||||
|
<includeCallerData>true</includeCallerData> |
||||
|
<neverBlock>false</neverBlock> |
||||
|
<appender-ref ref="FILE"/> |
||||
|
</appender> |
||||
|
|
||||
|
<logger name="com.example.datacollect" level="DEBUG" additivity="false"> |
||||
|
<appender-ref ref="ASYNC_CONSOLE"/> |
||||
|
<appender-ref ref="ASYNC_FILE"/> |
||||
|
</logger> |
||||
|
|
||||
|
<logger name="org.jsoup" level="WARN" additivity="false"> |
||||
|
<appender-ref ref="ASYNC_CONSOLE"/> |
||||
|
<appender-ref ref="ASYNC_FILE"/> |
||||
|
</logger> |
||||
|
|
||||
|
<root level="INFO"> |
||||
|
<appender-ref ref="ASYNC_CONSOLE"/> |
||||
|
<appender-ref ref="ASYNC_FILE"/> |
||||
|
</root> |
||||
|
|
||||
|
</configuration> |
||||
@ -0,0 +1,5 @@ |
|||||
|
#Generated by Maven |
||||
|
#Thu Apr 30 11:50:54 CST 2026 |
||||
|
artifactId=datacollect-cli |
||||
|
groupId=com.example |
||||
|
version=0.1.0 |
||||
@ -0,0 +1,16 @@ |
|||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\command\HelpCommand.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\Main.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\util\UrlValidator.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\RetryUtilsExample.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\command\ExitCommand.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\view\ConsoleView.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\command\Command.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\util\RetryUtils.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\circuitbreaker\CircuitBreaker.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\exception\UrlFormatException.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\strategy\BlogStrategy.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\command\CrawlCommand.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\command\ListCommand.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\strategy\NewsStrategy.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java |
||||
|
E:\新建文件夹 (2)\java\w11\w10\java-cli\src\main\java\com\example\datacollect\model\Article.java |
||||
@ -0,0 +1,530 @@ |
|||||
|
## 高级程序设计 · 第9周 |
||||
|
|
||||
|
#### 工程架构:从"写代码"到"造系统" |
||||
|
|
||||
|
##### CLI + MVC + Command模式实战 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 📌 本周导航 |
||||
|
|
||||
|
- 痛点引入:脚本的宿命 |
||||
|
- CLI vs GUI:为什么选命令行? |
||||
|
- MVC分层:职责分离的艺术 |
||||
|
- Command模式:可扩展的路由 |
||||
|
- Maven模板:工程化第一步 |
||||
|
- 代码落地:从接口到实现 |
||||
|
- 架构反思:共享数据的隐患 |
||||
|
- 实践任务 + 课后作业 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 1️⃣ 痛点引入:从脚本到工程的鸿沟 |
||||
|
|
||||
|
#### 这是一段“意大利面”爬虫 |
||||
|
|
||||
|
```java |
||||
|
public class Crawler { |
||||
|
public static void main(String[] args) { |
||||
|
System.out.print("请输入URL: "); |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
String url = scanner.nextLine(); |
||||
|
List titles = new ArrayList(); |
||||
|
try { |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
Elements elements = doc.select(".post-title"); |
||||
|
for (Element e : elements) { |
||||
|
String title = e.text(); |
||||
|
System.out.println("标题: " + title); |
||||
|
titles.add(title); |
||||
|
} |
||||
|
} catch (Exception ex) { |
||||
|
System.out.println("出错啦: " + ex.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 脚本的三大痛点 |
||||
|
|
||||
|
| 需求 | 需要改哪里? | |
||||
|
|------|--------------| |
||||
|
| 保存标题到文件 | 改 main 内部逻辑 | |
||||
|
| 支持不同网站结构 | 全部重写解析代码 | |
||||
|
| 彩色输出 | 一个一个改 print | |
||||
|
|
||||
|
> 😫 **牵一发而动全身 → 改起来疼** |
||||
|
|
||||
|
### 本周目标:**让代码“改起来不疼”** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 2️⃣ CLI vs GUI:架构选择的思考 |
||||
|
|
||||
|
### 图形界面 vs 命令行 |
||||
|
|
||||
|
| 维度 | GUI (JavaFX) | CLI (命令行) | |
||||
|
|------|--------------|-------------| |
||||
|
| 学习重心 | 布局、控件、事件 | **架构、分层、路由** | |
||||
|
| 后端能力 | 弱 | 模拟真实服务器 | |
||||
|
| 工程思维 | 弱(关注视觉) | **强(关注逻辑)** | |
||||
|
| 可测试性 | 难 | 易 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 核心观点 |
||||
|
|
||||
|
> **CLI 更需要 MVC!** |
||||
|
|
||||
|
- GUI 有现成事件系统,框架强塞给你一套架构 |
||||
|
- CLI 只有字符流 → **没有架构,分分钟写成脚本** |
||||
|
|
||||
|
> 🎯 **当外部约束消失,内部的工程纪律才真正开始建立** |
||||
|
|
||||
|
### CLI 也能很酷 |
||||
|
|
||||
|
- ANSI 彩色输出 |
||||
|
- 表格展示数据 |
||||
|
- 模拟大数据/后端开发 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 3️⃣ MVC 分层设计 |
||||
|
|
||||
|
### MVC 的起源与演进 |
||||
|
|
||||
|
| 年代 | 场景 | MVC的角色 | |
||||
|
|------|------|----------| |
||||
|
| 1970s | Smalltalk-72 GUI | 最早的用户界面架构 | |
||||
|
| 1990s | Web开发 (Struts) | 后端模板引擎 | |
||||
|
| 2000s | ASP.NET MVC | 现代Web框架 | |
||||
|
| 2020s | CLI + API | 解耦业务逻辑与表现层 | |
||||
|
|
||||
|
**核心不变:职责分离** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## MVC 三层职责 |
||||
|
|
||||
|
![[mvc.png]] |
||||
|
``` |
||||
|
┌─────────────────────────────────────────┐ |
||||
|
│ 入口 │ |
||||
|
│ (main方法) │ |
||||
|
└─────────────────┬───────────────────────┘ |
||||
|
▼ |
||||
|
┌─────────────────────────────────────────┐ |
||||
|
│ Controller │ |
||||
|
│ 只管"派给谁",不管"怎么做" │ |
||||
|
└─────────┬───────────────┬───────────────┘ |
||||
|
▼ ▼ |
||||
|
┌─────────────────┐ ┌─────────────────┐ |
||||
|
│ Model │ │ View │ |
||||
|
│ 管"数据" │ │ 管"呈现" │ |
||||
|
│ + 业务逻辑 │ │ + 输入输出 │ |
||||
|
└─────────────────┘ └─────────────────┘ |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 三层“禁止做什么” |
||||
|
|
||||
|
| 层级 | 禁止行为 | |
||||
|
| -------------- | -------------------------------------- | |
||||
|
| **Model** | 不能有 `System.out.println`,不能有 `Scanner` | |
||||
|
| **View** | 不能写爬虫逻辑,只做“传声筒” | |
||||
|
| **Controller** | 不能直接写业务细节,委托给 Command | |
||||
|
|
||||
|
> 🔴 **越权就是架构腐败的开始** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🍽️ 餐厅类比(帮助理解) |
||||
|
|
||||
|
- **Model = 后厨**:只管做菜,不管谁来吃、怎么端 |
||||
|
- **View = 服务员**:只管端菜和收钱,不管菜怎么做 |
||||
|
- **Controller = 前台**:接单 → 派给后厨 → 叫服务员上菜 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🤔 对类比的批判性思考(关键!) |
||||
|
|
||||
|
> 任何类比都有边界,不要当成真理 |
||||
|
|
||||
|
| 场景 | 暴露的问题 | |
||||
|
|------|------------| |
||||
|
| 客人有忌口(不吃香菜) | 信息需要传到后厨 → Model 可能需要知道 meta 信息 | |
||||
|
| 服务员反馈“今天的菜咸了” | View → Model 反向影响 | |
||||
|
| 后厨做完菜通知前台 | **观察者模式**,数据流可能是双向的 | |
||||
|
|
||||
|
**本课程简化模型**:请求-响应,单向流 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## MVC 数据流向(本课程简化版) |
||||
|
|
||||
|
``` |
||||
|
CLI用户输入 |
||||
|
↓ |
||||
|
View(解析命令字符串) |
||||
|
↓ |
||||
|
Controller(找到对应Command) |
||||
|
↓ |
||||
|
Command.execute()(执行业务逻辑) |
||||
|
↓ |
||||
|
Model(Article数据,暂存于List) |
||||
|
↓ |
||||
|
View(display()展示数据) |
||||
|
↓ |
||||
|
CLI终端显示 |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 4️⃣ Command 模式:可扩展的命令路由 |
||||
|
|
||||
|
### 为什么需要 Command 模式? |
||||
|
|
||||
|
```java |
||||
|
switch (cmd) { |
||||
|
case "crawl": handleCrawl(); break; |
||||
|
case "help": showHelp(); break; |
||||
|
// 如果要增加 list 命令? |
||||
|
// 1. 加 case "list" |
||||
|
// 2. 加 handleList() 方法 |
||||
|
// 3. 可能还要改其他地方... |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 每加一个功能,就要在这个类里戳一个洞 → **肥控制器陷阱** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Command 模式的四个要素 |
||||
|
|
||||
|
| 要素 | 角色 | 示例 | |
||||
|
|------|------|------| |
||||
|
| Command接口 | 抽象的“订单” | `Command` | |
||||
|
| ConcreteCommand | 具体的订单 | `HelpCommand` | |
||||
|
| Invoker | 接单的前台 | `CrawlerController` | |
||||
|
| Receiver | 执行者 | `ConsoleView`、`ArticleRepository` | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Command 接口定义 |
||||
|
|
||||
|
```java |
||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, List<Article> articles); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Controller 的变革:从 switch 到 Map |
||||
|
|
||||
|
```java |
||||
|
public class CrawlerController { |
||||
|
private Map<String, Command> commands = new HashMap<>(); |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, List<Article> articles) { |
||||
|
commands.put("help", new HelpCommand(view)); |
||||
|
commands.put("list", new ListCommand(view)); |
||||
|
commands.put("crawl", new CrawlCommand(view)); |
||||
|
commands.put("exit", new ExitCommand(view)); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) { |
||||
|
// 解析命令 → 从 Map 取 Command → 调用 execute |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> **增加新命令:只需新建类,Controller 零改动!** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 对比:switch-case vs Command |
||||
|
|
||||
|
| 维度 | switch-case | Command模式 | |
||||
|
|------|-------------|-------------| |
||||
|
| 增加命令 | 要改 Controller | 新建一个类 | |
||||
|
| 多态体验 | 无 | `execute()` 多态 | |
||||
|
| 可测试性 | 难 | 每个 Command 单独测试 | |
||||
|
| 代码量 | 少 | 多,但更清晰 | |
||||
|
|
||||
|
> 🏨 **类比:酒店客房服务,前台只负责派单** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 5️⃣ Maven 模板与环境(5分钟) |
||||
|
|
||||
|
### 直接使用模板,不折腾配置 |
||||
|
|
||||
|
``` |
||||
|
my-crawler-template.zip |
||||
|
↓ 解压 + IDEA打开 |
||||
|
↓ 右键 pom.xml → Maven → Reload Project |
||||
|
↓ 运行 App.java |
||||
|
``` |
||||
|
|
||||
|
### 标准目录结构 |
||||
|
|
||||
|
``` |
||||
|
src/main/java/com/crawler/ |
||||
|
├── model/Article.java |
||||
|
├── view/ConsoleView.java |
||||
|
├── command/ |
||||
|
│ ├── Command.java |
||||
|
│ ├── CrawlCommand.java |
||||
|
│ ├── HelpCommand.java |
||||
|
│ ├── ListCommand.java |
||||
|
│ └── ExitCommand.java |
||||
|
└── controller/CrawlerController.java |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 6️⃣ 代码落地(分步实现) |
||||
|
|
||||
|
### Model:Article 实体 |
||||
|
|
||||
|
```java |
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
// 构造器、getter/setter、toString |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 📦 只存放数据,没有任何输入输出代码 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## View:ConsoleView(ANSI常量集中管理) |
||||
|
|
||||
|
```java |
||||
|
public class ConsoleView { |
||||
|
private static final String ANSI_GREEN = "\033[32m"; |
||||
|
private static final String ANSI_RED = "\033[31m"; |
||||
|
// ... 其他常量 |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
public void printError(String msg) { ... } |
||||
|
public void display(List<Article> articles) { ... } |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> ✨ **所有颜色码集中定义 → 改主题只需改一处** |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Command 实现示例(HelpCommand) |
||||
|
|
||||
|
```java |
||||
|
public class HelpCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public HelpCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "help"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printInfo("Commands: crawl <url>, list, help, exit"); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> ⚠️ 全部输出通过 `view`,绝不让 `System.out` 直接出现在这里 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## CrawlCommand(存根,下周填坑) |
||||
|
|
||||
|
```java |
||||
|
public class CrawlCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public CrawlCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "crawl"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
view.printInfo("Stub: Would crawl " + args[1]); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 🔍 **找茬点**:这里拼接字符串算是“业务逻辑”吗?留给大家用 AI 审计。 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## ExitCommand |
||||
|
|
||||
|
```java |
||||
|
public class ExitCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ExitCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "exit"; } |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printSuccess("Bye!"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> ✅ 所有输出都通过 View → 将来改 GUI 只需换 View 实现 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Controller + main 组装 |
||||
|
|
||||
|
```java |
||||
|
// Controller 中持有 Map<String,Command> |
||||
|
// App.java 中: |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
CrawlerController controller = new CrawlerController(view, articles); |
||||
|
view.printSuccess("Welcome to CLI Crawler!"); |
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
> 🔁 完成交互循环 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 7️⃣ 架构反思:共享 List<Article> 的隐患 |
||||
|
|
||||
|
### 当前问题 |
||||
|
|
||||
|
- 所有 Command 都直接拿到 `List<Article>` 引用 |
||||
|
- 任何一个命令都可以随意增、删、改列表 |
||||
|
- 数据完全“裸奔” |
||||
|
|
||||
|
> 🚨 就像酒店所有员工都能进保险箱 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 提问 |
||||
|
|
||||
|
- 如果 `CrawlCommand` 不小心把 `null` 塞进列表,`ListCommand` 会怎样? |
||||
|
- 如果我们要在添加文章时写日志,现在的设计能优雅实现吗? |
||||
|
|
||||
|
### 预告解决方案(W10) |
||||
|
|
||||
|
- **策略模式** + **仓库层(ArticleRepository)** |
||||
|
- 封装 `List`,对外只暴露 `add()`、`getAll()` 等安全接口 |
||||
|
|
||||
|
> W9 搭骨架,W10 装上盔甲 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 8️⃣ 实践任务(现场5分钟) |
||||
|
|
||||
|
### 必做项 |
||||
|
|
||||
|
1. 使用 Maven 模板创建项目 |
||||
|
2. 实现完整包结构(model/view/command/controller) |
||||
|
3. 实现 4 个 Command:help / list / crawl / exit |
||||
|
4. `list` 能展示已抓取的文章(目前存根即可) |
||||
|
5. 运行并测试循环 |
||||
|
|
||||
|
### 额外加分:代码找茬 |
||||
|
|
||||
|
- 检查是否仍有 `System.out` 直接调用 |
||||
|
- 检查 ANSI 码是否硬编码在多个地方 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 验收标准 |
||||
|
|
||||
|
- [x] Maven 编译通过 |
||||
|
- [x] Command 接口和 4 个实现在不同文件 |
||||
|
- [x] Controller 里没有 switch-case |
||||
|
- [x] 新增命令只需新建类,不改 Controller |
||||
|
- [x] list 能正确显示空列表 |
||||
|
- [x] 所有输出均通过 `ConsoleView` |
||||
|
- [x] ANSI 颜色码集中定义为常量 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 9️⃣ 课后作业 |
||||
|
|
||||
|
### 必做 |
||||
|
|
||||
|
1. **完善 Article**:增加 `author`、`publishDate` 字段 |
||||
|
2. **★ HistoryCommand**:记录用户输入过的所有命令(用 `List<String>`) |
||||
|
3. **AI 架构审计**:将类名发给 AI,指令: |
||||
|
> “作为Java架构审计师,请检查我的MVC三层划分是否存在越权行为?” |
||||
|
|
||||
|
### 选做 |
||||
|
|
||||
|
- 命令别名(c 代替 crawl) |
||||
|
- URL 格式验证 |
||||
|
- 暗色主题(修改一处常量) |
||||
|
- 思考题:分析 `List<Article>` 共享引用的风险(200字小结) |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 🤖 AI 协同升级 |
||||
|
|
||||
|
### 架构审计师任务(必做) |
||||
|
|
||||
|
**步骤**: |
||||
|
1. 列出所有类名(不含方法实现) |
||||
|
2. 发给 AI |
||||
|
3. 指令:“检查 MVC 分层是否清晰,是否有越权行为” |
||||
|
|
||||
|
### 进阶探究(选做) |
||||
|
|
||||
|
> “假设我的 Command 接口中 execute 方法接收了一个 `List<Article>` 参数,请分析这种设计在工程上有什么隐患,并给出重构建议。” |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 📚 总结与过渡 |
||||
|
|
||||
|
### 本周成果 |
||||
|
|
||||
|
- ✅ 工程化包结构 |
||||
|
- ✅ MVC 分层清晰 |
||||
|
- ✅ Command 模式实现可扩展路由 |
||||
|
- ✅ 所有输出走 View,常量集中管理 |
||||
|
|
||||
|
### 下周预告 |
||||
|
|
||||
|
- **策略模式**:封装爬取算法 |
||||
|
- **仓库层(Repository)**:武装 `List<Article>`,解决共享隐患 |
||||
|
|
||||
|
> 🚀 从“写代码”到“造系统”,踏出坚实第一步! |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## Q&A |
||||
|
|
||||
|
### 常见问题 |
||||
|
|
||||
|
| 问题 | 解答 | |
||||
|
|------|------| |
||||
|
| IDEA 不识别 pom.xml | 右键 → Maven → Reload Project | |
||||
|
| 中文乱码 | Settings → File Encodings → UTF-8 | |
||||
|
| 输出颜色乱码 | Windows 建议使用 Windows Terminal | |
||||
|
| 我的 System.out 被批评 | View 才是唯一输出出口 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 谢谢! |
||||
|
|
||||
|
### 课件已上传,模板在课程群 |
||||
|
|
||||
|
**保持工程洁癖,下周见!** |
||||
@ -0,0 +1,705 @@ |
|||||
|
# 教案:《高级程序设计》第10周——设计模式:灵活性与可扩展性 |
||||
|
|
||||
|
| 项目 | 内容 | |
||||
|
| -------- | ---------------------------------------------------------------------------- | |
||||
|
| **课程名称** | 高级程序设计 | |
||||
|
| **周次** | 第10周 | |
||||
|
| **主题** | 设计模式——灵活性与可扩展性 | |
||||
|
| **学时** | 2学时(90分钟) | |
||||
|
| **授课对象** | 已完成第9周CLI+MVC架构学习,具备Command模式基础 | |
||||
|
| **教学环境** | JDK 17+、IntelliJ IDEA、Maven | |
||||
|
| **前情提要** | W9搭建了CLI骨架:MVC分层 + Command路由,但留下了两大隐患——解析逻辑耦合在Command中、List\<Article\>共享引用裸奔 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 教学调整说明:为什么W10要在“骨架”上装“盔甲”? |
||||
|
|
||||
|
> **W9成果**:一个可扩展的命令行骨架 → **W9痛点**:解析器与数据存储仍在“裸奔” |
||||
|
|
||||
|
| 维度 | W9状态 | W10目标 | |
||||
|
|------|--------|---------| |
||||
|
| **架构** | MVC分层清晰 | MVC + 策略模式 + 仓库层 | |
||||
|
| **命令扩展** | 新增命令不改Controller | 新增解析器不改任何旧代码 | |
||||
|
| **数据安全** | List\<Article\>全员可写 | Repository封装,只暴露安全接口 | |
||||
|
| **解析逻辑** | 硬编码在CrawlCommand内 | 策略模式,按URL自动匹配 | |
||||
|
| **代码量** | ~8个类 | ~12个类,但每个更小更纯粹 | |
||||
|
|
||||
|
**决策理由**: |
||||
|
1. W9学生已经感受到Command模式的好处——**多态带来的扩展性** |
||||
|
2. 策略模式是多态思想的又一次实战,是**接口抽象的深化** |
||||
|
3. 仓库层是“封装”这一OOP核心原则的落地,补上W9留下的课 |
||||
|
4. 解析器工厂让学生看到**“自动匹配”**的威力——增加网站支持只需新增一个类 |
||||
|
|
||||
|
**更深层的教育价值**: |
||||
|
> W9教会学生“怎么把代码分开”,W10要教会学生“怎么把代码分开后还能优雅地合上”——**接口即合同,工厂即自动匹配,仓库即数据守卫**。这三句话,就是本周的全部精华。 |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 一、教学目标 |
||||
|
|
||||
|
| 目标维度 | 具体描述 | |
||||
|
|----------|----------| |
||||
|
| **知识掌握** | 理解策略模式的定义与多态本质;掌握工厂模式的两类变体(工厂方法/简单工厂)及适用场景;理解仓库模式对数据访问的封装原理。 | |
||||
|
| **工程实践** | 能在爬虫项目中用策略模式封装不同网站的解析逻辑;能实现解析器工厂,根据URL自动匹配解析策略;能用Repository模式替代裸List,提供安全的数据访问接口。 | |
||||
|
| **思维转型** | 从“写死逻辑”转向“策略可插拔”;从“直接操作集合”转向“通过仓库存取”;理解“对扩展开放,对修改关闭”的开闭原则。 | |
||||
|
| **工具应用** | 利用AI审查策略模式实现是否真正解耦;让AI扮演“网站结构分析师”辅助编写具体解析策略;用AI生成Repository的安全接口建议。 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 二、教学重点与难点 |
||||
|
|
||||
|
| 项目 | 内容 | 突破方法 | |
||||
|
|------|------|----------| |
||||
|
| **重点** | 策略模式的多态本质、解析器工厂的自动匹配机制、Repository对数据访问的封装 | 以“新增网站需要改什么”为切入点,展示策略模式的开闭原则达成;通过“攻击”当前List裸奔的问题,引出Repository的必然性 | |
||||
|
| **难点** | 理解“接口即合同”的抽象思维、工厂模式中反射/Map注册的实现、仓库层与Strategy模式的协同 | 用“插座与电器”类比接口标准;现场演示从硬编码→工厂→反射的演进路径;用时序图展示“用户→Command→Strategy→Repository”的完整调用链 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 三、教学过程设计(90分钟) |
||||
|
|
||||
|
| 环节 | 时间 | 教学内容 | 师生活动 | AI协同点 | |
||||
|
| -------------------------- | --- | ----------------------------------------------------------------- | -------------------------------------- | --------------------------- | |
||||
|
| **1. W9回顾与痛点暴露** | 8' | 回顾W9成果(CLI骨架),暴露两大隐患:①CrawlCommand里解析逻辑硬编码;②List\<Article\>全员可读可写 | **教师演示**:展示W9代码,用“事故场景”引发思考 | — | |
||||
|
| **2. 策略模式:解析器的“插头标准化”** | 18' | 策略模式定义、接口设计、多态调用、与Command模式的对比 | **类比**:插座与电器;**教师演示**:从if-else到策略模式的演进 | 让AI生成“策略模式vs switch-case”对比 | |
||||
|
| **3. 解析器工厂:自动匹配的魔法** | 14' | 工厂模式的两种形态(简单工厂→Map注册工厂),解析器工厂实现 | **教师演示**:先用if-else判断host,再升级为Map注册工厂 | 让AI解释工厂模式与策略模式如何协同 | |
||||
|
| **4. Repository模式:武装数据访问** | 12' | Repository定义、接口设计、替换List\<Article\>后的影响 | **教师演示**:在原代码中把List替换为Repository,展示改动点 | 学生用AI审计Repository接口的“最小完备性” | |
||||
|
| **5. 整体架构串联** | 8' | 用一张时序图串联:用户→CLI→Controller→Command→Strategy→Repository→Model | **师生互动**:让学生在白板上画出调用链 | — | |
||||
|
| **6. 代码落地** | 20' | 实现CrawlStrategy接口 + 两个策略 + 解析器工厂 + ArticleRepository | **教师演示**:分步写出代码,刻意埋入“策略匹配失败”的异常处理 | 完成后用AI检查策略模式实现 | |
||||
|
| **7. 架构反思与W11预告** | 5' | 当前架构还有什么隐患?(异常处理不统一、日志缺失)→ 预告W11健壮性工程 | **师生互动**:如果解析器工厂找不到匹配策略,会发生什么? | — | |
||||
|
| **8. 实践任务** | 5' | 实现策略模式和仓库层,完成本周代码升级 | 学生现场编码,教师巡视 | — | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 四、核心教学内容脚本 |
||||
|
|
||||
|
### 4.1 W9回顾与痛点暴露(8分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "上节课我们搭了一个很漂亮的骨架——CLI+MVC+Command模式。我们先来表扬一下自己:新增一个命令,只要新建一个类,Controller零改动。但请大家想一个问题——" |
||||
|
|
||||
|
**投影展示W9的CrawlCommand存根**: |
||||
|
```java |
||||
|
public class CrawlCommand implements Command { |
||||
|
// ... |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
view.printInfo("Stub: Would crawl " + args[1]); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**提问引导**: |
||||
|
1. "这个存根下周要填坑了。假设我们现在要真正实现爬取,代码写在哪?" |
||||
|
2. "如果我要支持两个网站——比如一个技术博客和一个新闻网站——它们的HTML结构完全不一样,这个`execute`方法会变成什么样?" |
||||
|
|
||||
|
**展示“噩梦版”CrawlCommand**: |
||||
|
```java |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
String url = args[1]; |
||||
|
// 五十行if-else地狱... |
||||
|
if (url.contains("blog.example.com")) { |
||||
|
// 解析技术博客的HTML |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
Elements titles = doc.select(".post-title"); |
||||
|
for (Element e : titles) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
} else if (url.contains("news.example.com")) { |
||||
|
// 解析新闻网站的HTML |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
Elements items = doc.select(".article-headline"); |
||||
|
for (Element e : items) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
} else { |
||||
|
view.printError("Unsupported website!"); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**痛点提炼**: |
||||
|
> "看到了吗?每支持一个新网站,就要在这里加一个`else if`。这就是W1我们痛批的'牵一发而动全身',只不过这次灾难地点从`main`搬到了`CrawlCommand`。" |
||||
|
> |
||||
|
> "更重要的是,我们上节课辛辛苦苦实现了Command模式,难道解析逻辑又要回到if-else地狱吗?**这就是W10要解决的第一个问题:怎么让解析逻辑也可插拔?**" |
||||
|
|
||||
|
**第二个隐患——共享状态的回顾**: |
||||
|
> "还有一件事,我们上节课结束前提到的:`List<Article> articles`在所有Command之间共享。任何一个Command都可以往里面塞东西、删东西、甚至清空。这是W10要解决的第二个问题:**怎么给数据装上'防盗门'?**" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.2 策略模式:解析器的“插头标准化”(18分钟) |
||||
|
|
||||
|
#### 4.2.1 从类比切入 |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "先讲个生活场景。你家里墙上有一个三孔插座,你可以插电视、插电脑、插手机充电器——任何符合这个标准的电器都能用。插座不在乎你是什么电器,它只认接口标准。" |
||||
|
|
||||
|
**类比映射**: |
||||
|
|
||||
|
| 生活场景 | 代码对应 | |
||||
|
|----------|----------| |
||||
|
| 三孔插座 | `CrawlStrategy` 接口 | |
||||
|
| 电视/电脑充电器 | 具体解析策略(BlogStrategy/NewsStrategy) | |
||||
|
| 电流 | 输入:URL + Document;输出:List\<Article\> | |
||||
|
| 你(使用者) | CrawlCommand | |
||||
|
| 插座面板 | 解析器工厂 | |
||||
|
|
||||
|
> "策略模式的核心思想就是:**定义一个算法接口,让具体的算法实现可以互相替换,而使用算法的客户端不受影响。**" |
||||
|
|
||||
|
#### 4.2.2 策略模式定义 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/strategy/CrawlStrategy.java |
||||
|
package com.crawler.strategy; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface CrawlStrategy { |
||||
|
/** |
||||
|
* 从已获取的Document中解析文章列表 |
||||
|
* @param url 原始请求URL(用于填充Article) |
||||
|
* @param doc Jsoup解析后的Document |
||||
|
* @return 解析出的文章列表 |
||||
|
*/ |
||||
|
List<Article> parse(String url, Document doc); |
||||
|
|
||||
|
/** |
||||
|
* 判断此策略是否为给定URL服务 |
||||
|
* @param url 待判断的URL |
||||
|
* @return true表示此策略可以处理该URL |
||||
|
*/ |
||||
|
boolean supports(String url); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "注意,策略接口里有两个方法。`parse`是干活的那个,`supports`是'我能不能干这个活'——这是什么?**这是合同!** 任何网站想被我们爬虫支持,就必须签署这份合同:告诉我你是不是我的客户(supports),以及怎么解析你(parse)。" |
||||
|
|
||||
|
#### 4.2.3 具体策略实现示例 |
||||
|
|
||||
|
```java |
||||
|
// BlogStrategy.java - 技术博客解析策略 |
||||
|
public class BlogStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("blog.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements titles = doc.select(".post-title"); |
||||
|
for (Element e : titles) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// NewsStrategy.java - 新闻网站解析策略 |
||||
|
public class NewsStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("news.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements items = doc.select(".article-headline"); |
||||
|
for (Element e : items) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**对比:策略模式 vs 硬编码if-else** |
||||
|
|
||||
|
| 维度 | if-else屎山 | 策略模式 | |
||||
|
|------|-------------|----------| |
||||
|
| 新增网站 | 改CrawlCommand,加else if | 新写一个类,实现CrawlStrategy | |
||||
|
| 修改解析逻辑 | 在CrawlCommand里翻找对应的else if | 只改对应策略类 | |
||||
|
| 测试 | 必须启动整个爬虫 | 单独对Strategy做单元测试 | |
||||
|
| 是否符合开闭原则 | ❌ 对修改开放 | ✅ 对扩展开放,对修改关闭 | |
||||
|
|
||||
|
**与Command模式的对比(加深理解)**: |
||||
|
> "上节课Command模式,我们为每个命令定义一个类;这节课策略模式,我们为每个网站的解析算法定义一个类。**本质上都是同一个OOP思想:用多态替代条件分支。** 只不过Command的接口是`execute()`,Strategy的接口是`parse()`。" |
||||
|
> |
||||
|
> "这张图你们可以记下来:**接口是消除if-else的利器,多态是接口的灵魂。**" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.3 解析器工厂:自动匹配的魔法(14分钟) |
||||
|
|
||||
|
#### 4.3.1 问题引出 |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在我们有A网站的策略、B网站的策略。问题来了:谁来选策略?谁来遍历所有策略,找到一个supports返回true的?" |
||||
|
> |
||||
|
> "如果把这个逻辑写在CrawlCommand里,那策略模式就白用了——CrawlCommand还是得'知道'有哪些策略。我们要的是一个黑盒子:**把URL丢进去,自动弹出一个合适的解析器。**" |
||||
|
|
||||
|
#### 4.3.2 解析器工厂的实现 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/strategy/StrategyFactory.java |
||||
|
package com.crawler.strategy; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class StrategyFactory { |
||||
|
private final List<CrawlStrategy> strategies = new ArrayList<>(); |
||||
|
|
||||
|
// 注册策略——新的网站只需在这里加一行 |
||||
|
public StrategyFactory() { |
||||
|
strategies.add(new BlogStrategy()); |
||||
|
strategies.add(new NewsStrategy()); |
||||
|
// 未来增加新网站:strategies.add(new XxxStrategy()); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 根据URL自动匹配解析策略 |
||||
|
* @param url 目标URL |
||||
|
* @return 匹配的策略,如果没有匹配返回null |
||||
|
*/ |
||||
|
public CrawlStrategy getStrategy(String url) { |
||||
|
for (CrawlStrategy s : strategies) { |
||||
|
if (s.supports(url)) { |
||||
|
return s; |
||||
|
} |
||||
|
} |
||||
|
return null; // 未找到匹配策略 |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "这个工厂类足够简单:一个List存所有策略,一个方法遍历找到匹配的。但简单不等于不强大。** |
||||
|
> |
||||
|
> **关键点**:新增网站支持,只需要——" |
||||
|
1. 写一个`XxxStrategy`实现`CrawlStrategy` |
||||
|
2. 在工厂构造器里加一行`strategies.add(new XxxStrategy())` |
||||
|
> |
||||
|
> "CrawlCommand一行不改。这就是开闭原则的胜利。" |
||||
|
|
||||
|
#### 4.3.3 从简单工厂到更高级的注册机制(拓展思维) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "有同学可能会问:还要在工厂构造器里加一行,能不能做到完全零改动?当然可以——用反射或者SPI。" |
||||
|
|
||||
|
**演示概念(不要求实现)**: |
||||
|
```java |
||||
|
// 进阶思路:扫描指定包下的所有CrawlStrategy实现类 |
||||
|
// 用反射自动注册,真正做到“新增类即生效” |
||||
|
// 这是Spring框架的核心思想之一 |
||||
|
``` |
||||
|
|
||||
|
> "这个技术我们暂时不要求掌握,但我希望你们知道:你现在写的每一个`new XxxStrategy()`,在未来都可能进化为框架级别的自动装配。**你现在建立的思维习惯,决定了你未来能走多高。**" |
||||
|
|
||||
|
#### 4.3.4 重构后的CrawlCommand |
||||
|
|
||||
|
```java |
||||
|
public class CrawlCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
private StrategyFactory strategyFactory; |
||||
|
private ArticleRepository repository; // 注意:这里是Repository了! |
||||
|
|
||||
|
public CrawlCommand(ConsoleView v, StrategyFactory f, ArticleRepository r) { |
||||
|
this.view = v; |
||||
|
this.strategyFactory = f; |
||||
|
this.repository = r; |
||||
|
} |
||||
|
|
||||
|
public String getName() { return "crawl"; } |
||||
|
|
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
|
||||
|
// 1. 工厂自动选策略 |
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
if (strategy == null) { |
||||
|
view.printError("No strategy found for: " + url); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// 2. 抓取页面 |
||||
|
view.printInfo("Crawling: " + url); |
||||
|
try { |
||||
|
Document doc = Jsoup.connect(url).get(); |
||||
|
List<Article> parsed = strategy.parse(url, doc); |
||||
|
|
||||
|
// 3. 通过仓库存入(而不是直接操作List) |
||||
|
for (Article a : parsed) { |
||||
|
repository.add(a); |
||||
|
} |
||||
|
view.printSuccess("Crawled " + parsed.size() + " articles."); |
||||
|
} catch (IOException e) { |
||||
|
view.printError("Failed to crawl: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "注意这个CrawlCommand现在的职责:拿到URL → 交给工厂选策略 → 执行解析 → 交给仓库存储。**它自己在干什么?在调度!** 这就是上节课我们讲的Controller的'调度思维',现在向Command内部延伸了。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.4 Repository模式:武装数据访问(12分钟) |
||||
|
|
||||
|
#### 4.4.1 问题重提 |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "回到上节课结束时的那个问题:`List<Article>`在所有Command之间共享。任何一个Command都可以做这些事——" |
||||
|
```java |
||||
|
articles.clear(); // 清空所有文章 |
||||
|
articles.add(null); // 塞入null |
||||
|
articles.remove(0); // 随意删除 |
||||
|
``` |
||||
|
|
||||
|
> "如果一个新同事接手开发,他不知道'不要动这个List'的潜规则,写了一个`articles.clear()`,你的`list`命令就突然什么都不显示了。**靠代码约定维护的秩序,早晚会被打破。我们需要实体的'规则'——代码层面的约束。**" |
||||
|
|
||||
|
#### 4.4.2 ArticleRepository的定义 |
||||
|
|
||||
|
```java |
||||
|
// src/main/java/com/crawler/repository/ArticleRepository.java |
||||
|
package com.crawler.repository; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Collections; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ArticleRepository { |
||||
|
private final List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
/** |
||||
|
* 添加一篇文章。注意:不接受null,这是代码层面的规则,不是口头约定。 |
||||
|
*/ |
||||
|
public void add(Article article) { |
||||
|
if (article == null) { |
||||
|
throw new IllegalArgumentException("Article cannot be null"); |
||||
|
} |
||||
|
articles.add(article); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取所有文章的只读视图 |
||||
|
* 调用者无法通过此返回值修改内部数据 |
||||
|
*/ |
||||
|
public List<Article> getAll() { |
||||
|
return Collections.unmodifiableList(articles); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取文章数量 |
||||
|
*/ |
||||
|
public int size() { |
||||
|
return articles.size(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 清空(仅管理员可调——下一篇:权限控制) |
||||
|
*/ |
||||
|
public void clear() { |
||||
|
articles.clear(); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "三个关键设计点——" |
||||
|
> |
||||
|
> - **add()拒绝null**:规则写在代码里,不是写在邮件里 |
||||
|
> - **getAll()返回不可修改的视图**:`Collections.unmodifiableList()`——调用者如果尝试add/remove,会**直接抛异常**,不是'悄悄的bug' |
||||
|
> - **ClearCommand要清空数据?调`repository.clear()`**,而不是直接操作List |
||||
|
> |
||||
|
> "这就是面向对象的第一课——封装。把数据藏起来,只暴露安全的方法。从'直接操作集合'到'通过仓库存取',是程序员成熟度的分水岭。" |
||||
|
|
||||
|
#### 4.4.3 仓库引入后的架构变化 |
||||
|
|
||||
|
**Command接口的execute方法调整**: |
||||
|
|
||||
|
```java |
||||
|
// 调整前(W9) |
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, List<Article> articles); |
||||
|
} |
||||
|
|
||||
|
// 调整后(W10) |
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, ArticleRepository repository); |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "这个改动很小——把`List<Article>`换成`ArticleRepository`。但语义完全不同:之前是'给你数据随便玩',现在是'给你一个安全的存取通道'。" |
||||
|
|
||||
|
**所有Command同步调整**: |
||||
|
|
||||
|
```java |
||||
|
// ListCommand.java - 调整后 |
||||
|
public class ListCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ListCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "list"; } |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
view.display(repository.getAll()); // 通过仓库获取数据 |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ClearCommand.java(新增示例) |
||||
|
public class ClearCommand implements Command { |
||||
|
private ConsoleView view; |
||||
|
public ClearCommand(ConsoleView v) { this.view = v; } |
||||
|
public String getName() { return "clear"; } |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
repository.clear(); |
||||
|
view.printSuccess("All articles cleared."); |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
**Controller和main的调整**: |
||||
|
|
||||
|
```java |
||||
|
// App.java - 调整后 |
||||
|
public class App { |
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); // 替代 List<Article> |
||||
|
StrategyFactory factory = new StrategyFactory(); // 新增 |
||||
|
|
||||
|
CrawlerController controller = new CrawlerController(view, repository, factory); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler v2.0!"); |
||||
|
view.printInfo("Type 'help' for commands."); |
||||
|
|
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
``` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.5 整体架构串联(8分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在我们把所有部件串起来,看看一个`crawl https://blog.example.com`命令走过的完整路径。" |
||||
|
|
||||
|
**时序图(口述配白板绘制)**: |
||||
|
``` |
||||
|
用户输入 "crawl https://blog.example.com" |
||||
|
│ |
||||
|
▼ |
||||
|
ConsoleView.readLine() |
||||
|
│ |
||||
|
▼ |
||||
|
CrawlerController.handle("crawl https://blog.example.com") |
||||
|
│ Map查找 "crawl" → CrawlCommand |
||||
|
▼ |
||||
|
CrawlCommand.execute(args, repository) |
||||
|
│ |
||||
|
├─► StrategyFactory.getStrategy(url) |
||||
|
│ │ 遍历List<CrawlStrategy> |
||||
|
│ │ BlogStrategy.supports(url) → true! |
||||
|
│ ▼ |
||||
|
│ 返回 BlogStrategy |
||||
|
│ |
||||
|
├─► Jsoup.connect(url).get() → Document |
||||
|
│ |
||||
|
├─► BlogStrategy.parse(url, doc) → List<Article> |
||||
|
│ |
||||
|
└─► for each article: repository.add(article) |
||||
|
│ |
||||
|
▼ |
||||
|
ArticleRepository.articles.add(article) |
||||
|
|
||||
|
最终:ConsoleView.printSuccess("Crawled N articles.") |
||||
|
``` |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "七步调用,每一步职责清晰:View负责输入输出,Controller负责路由,Command负责调度,Factory负责匹配,Strategy负责解析,Repository负责存储。**没有哪个类干了两个人的活,也没有哪个类不知道自己的活是什么。**" |
||||
|
> |
||||
|
> "这就是工程化——不是把代码写得快,是把代码写得对。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.6 代码落地(20分钟) |
||||
|
|
||||
|
**教师准备**:课前准备一份“W9升级到W10”的改动清单,现场演示关键改动。 |
||||
|
|
||||
|
**改动清单**: |
||||
|
1. 新建`strategy/`包,创建`CrawlStrategy`接口 |
||||
|
2. 新建`strategy/BlogStrategy.java` |
||||
|
3. 新建`strategy/NewsStrategy.java` |
||||
|
4. 新建`strategy/StrategyFactory.java` |
||||
|
5. 新建`repository/`包,创建`ArticleRepository.java` |
||||
|
6. 修改`Command`接口的`execute`签名 |
||||
|
7. 修改`CrawlCommand`,引入`StrategyFactory`和`ArticleRepository` |
||||
|
8. 修改其余所有`Command`实现类 |
||||
|
9. 修改`CrawlerController`构造器 |
||||
|
10. 修改`App.java` |
||||
|
|
||||
|
**教师演示关键步骤**(重点演示): |
||||
|
- `ArticleRepository`的`Collections.unmodifiableList()` |
||||
|
- `StrategyFactory`的遍历匹配逻辑 |
||||
|
- `CrawlCommand`重写后的调度结构 |
||||
|
|
||||
|
**刻意埋入的“找茬点”**: |
||||
|
> "我在`StrategyFactory.getStrategy()`里,如果没有匹配的策略就返回`null`。然后在`CrawlCommand`里检查null。这其实叫'null object pattern的前奏'——如果我不想让Command检查null,我应该怎么改工厂?大家带着这个问题用AI探究。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.7 架构反思与W11预告(5分钟) |
||||
|
|
||||
|
**教师口播**: |
||||
|
> "现在我们的架构比W9强壮多了:解析逻辑可插拔,数据访问有守卫。但还有一些漏洞——" |
||||
|
|
||||
|
**逐一点破**: |
||||
|
1. **异常处理**:`CrawlCommand`用了一个笼统的`catch (IOException e)`,如果解析过程中抛出其他异常怎么办? |
||||
|
2. **网络超时**:如果目标网站3秒没响应,当前代码会一直等吗? |
||||
|
3. **日志缺失**:所有的成功/失败信息只输出到终端,如果程序半夜跑,第二天想看昨晚抓了多少——看不了。 |
||||
|
4. **重试机制**:如果一次失败就直接报错,要不要给个重试的机会? |
||||
|
|
||||
|
**W11预告**: |
||||
|
> "下周,我们会做三件事:**自定义异常体系**、**工程化日志框架**、**防御式编程与重试机制**。W9搭骨架,W10装盔甲,W11要让这个系统**经得起现实的毒打**。" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
### 4.8 实践任务(5分钟) |
||||
|
|
||||
|
**任务要求**: |
||||
|
1. 从W9代码出发,完成W10升级 |
||||
|
2. 实现至少两个`CrawlStrategy`(可以是模拟的,不要求真实爬取) |
||||
|
3. 实现`StrategyFactory`和`ArticleRepository` |
||||
|
4. 确保所有Command通过Repository访问数据 |
||||
|
5. 运行并测试完整流程 |
||||
|
|
||||
|
**验收标准**: |
||||
|
- [x] 新增策略类只需新建文件+工厂注册一行,其余代码零改动 |
||||
|
- [x] `ArticleRepository`的`getAll()`返回不可修改视图 |
||||
|
- [x] `CrawlCommand`不包含任何网站特定的解析逻辑 |
||||
|
- [x] `StrategyFactory`能根据URL自动匹配正确的策略 |
||||
|
- [x] 所有Command的`execute`方法签名已更新为`ArticleRepository` |
||||
|
- [x] 无任何地方直接操作`List<Article>` |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 五、课后作业 |
||||
|
|
||||
|
### 5.1 必做任务 |
||||
|
|
||||
|
1. **完善ArticleRepository**:增加`addAll(List<Article>)`批量添加方法,注意防御null |
||||
|
2. **★ AnalyzeCommand(集大成作业)**: |
||||
|
- 实现`analyze <url>`命令 |
||||
|
- 内部调用`StrategyFactory`匹配策略 |
||||
|
- 调用策略解析文章后,**不存到Repository**,而是分析统计信息: |
||||
|
- 文章总数 |
||||
|
- 标题平均长度 |
||||
|
- 按某种规则排名的Top 5 |
||||
|
- 结果只输出,不存储 |
||||
|
- **提示**:这就是策略的复用——同一个解析策略,既能为`crawl`服务(存入仓库),也能为`analyze`服务(仅分析) |
||||
|
|
||||
|
3. **AI架构审计**:将完整代码的类图(或类名与方法签名列表)发给AI,指令: |
||||
|
> "作为Java架构审计师,请检查:①策略模式的实现是否正确解耦(CrawlCommand是否仍然包含网站特定逻辑);②Repository是否真正封装了数据访问(是否存在绕过Repository直接操作List的地方);③工厂的匹配逻辑是否存在性能隐患。请给出具体的改进建议。" |
||||
|
|
||||
|
### 5.2 选做任务 |
||||
|
|
||||
|
1. **正则策略匹配**:将`Supports()`的判断从`url.contains()`改为正则表达式,让一张策略可以匹配一类URL |
||||
|
2. **默认策略(DefaultStrategy)**:当没有策略匹配时,提供一个通用的“标题提取”逻辑 |
||||
|
3. **策略优先级**:给每个策略加一个`priority`字段,工厂按优先级匹配(而不是按注册顺序) |
||||
|
4. **思考并回答(200字)**: |
||||
|
> "策略模式中,策略的`supports()`方法有可能让两个策略都返回true,这时该选哪个?`StrategyFactory`的遍历顺序会如何影响结果?你有什么解决方案?" |
||||
|
|
||||
|
### 5.3 思考题 |
||||
|
|
||||
|
1. **Repository与List的区别是什么?** 如果Repository只是包了一层List,为什么还要用? |
||||
|
2. **策略工厂的演进**:如果网站数量增加到100个,逐个注册的写法还合适吗?你想到什么解决方案? |
||||
|
3. **`Collections.unmodifiableList()`返回的是什么?** 它真的“不可修改”吗?如果原List被修改,这个不可修改视图会怎样? |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 六、AI协同升级 |
||||
|
|
||||
|
### 架构审计师任务(必做) |
||||
|
|
||||
|
**学生执行步骤**: |
||||
|
1. 画出当前项目的类依赖图(手绘或工具生成) |
||||
|
2. 将类名和依赖关系发给AI |
||||
|
3. 输入指令: |
||||
|
> "作为Java架构审计师,请检查这个爬虫项目的架构。重点关注:①策略模式是否真正实现了开闭原则(增加新网站是否真的只需新增类);②Repository封装是否完整(是否有绕过Repository的路径);③是否存在循环依赖。请逐一指出问题并给出改进建议。" |
||||
|
|
||||
|
**预期AI输出**: |
||||
|
- 指出是否还存在“改一处影响多处”的耦合 |
||||
|
- 判断Repository的API设计是否完备 |
||||
|
- 评价整体架构的开闭原则达成度 |
||||
|
|
||||
|
### 进阶AI探究(选做) |
||||
|
|
||||
|
> "假设我有一个CrawlStrategy接口和10个实现类。不用工厂模式,直接用一个Map<String, CrawlStrategy>存起来,key是策略名称。这和StrategyFactory设计有什么本质区别?各自的优缺点是什么?" |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 七、教学反思与调整记录 |
||||
|
|
||||
|
| 日期 | 事项 | 调整内容 | |
||||
|
|------|------|----------| |
||||
|
| 2026-05-01 | 首次编写 | 基于W9骨架,引入策略模式+工厂+Repository | |
||||
|
| 2026-05-07 | 结构优化 | 调整策略模式与工厂的讲解顺序,先策略后工厂更自然 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 附录1:W9到W10改动对照表 |
||||
|
|
||||
|
| 改动项 | W9代码 | W10代码 | |
||||
|
|--------|--------|---------| |
||||
|
| 数据存储 | `List<Article> articles` | `ArticleRepository repository` | |
||||
|
| Command接口 | `execute(String[], List<Article>)` | `execute(String[], ArticleRepository)` | |
||||
|
| 解析逻辑位置 | `CrawlCommand`内部 | 各`CrawlStrategy`实现类 | |
||||
|
| URL匹配 | 无(硬编码) | `StrategyFactory.getStrategy(url)` | |
||||
|
| 数据添加 | `articles.add(article)` | `repository.add(article)` | |
||||
|
| 数据读取 | 直接遍历`articles` | `repository.getAll()` | |
||||
|
|
||||
|
## 附录2:常见问题速查 |
||||
|
|
||||
|
| 问题 | 解答 | |
||||
|
|------|------| |
||||
|
| 策略模式和Command模式有什么区别? | Command封装“动作”(做什么事),Strategy封装“算法”(怎么做)。在爬虫中:crawl是命令(动作),如何解析是策略(算法)。 | |
||||
|
| 工厂一定要叫Factory吗? | 不必须。但叫Factory意味着“创建对象”的职责,符合模式命名的惯例。 | |
||||
|
| `Collections.unmodifiableList()`有什么用? | 返回一个只读视图,调用add/remove等方法会抛`UnsupportedOperationException`。 | |
||||
|
| Repository和DAO有什么区别? | 在我们的上下文中可以视为同义词。严谨地说,Repository是领域驱动设计的概念,更偏向“集合语义”;DAO更偏数据库操作。 | |
||||
|
| 策略的`supports()`返回true但解析失败怎么办? | 那是策略实现的bug,该策略应修复。Factory不负责验证策略的正确性。 | |
||||
|
|
||||
|
## 附录3:教学逻辑说明 |
||||
|
|
||||
|
| 顺序 | 内容 | 设计理由 | |
||||
|
|------|------|----------| |
||||
|
| 1 | W9回顾+痛点暴露 | 承上启下,从已知问题引出新知识 | |
||||
|
| 2 | 策略模式 | 解决解析逻辑耦合问题,深化多态理解 | |
||||
|
| 3 | 解析器工厂 | 解决策略选择问题,引入工厂模式 | |
||||
|
| 4 | Repository模式 | 解决数据安全问题,实践封装原则 | |
||||
|
| 5 | 架构串联 | 将所有部件统一,形成完整心智模型 | |
||||
|
| 6 | 代码落地 | 实践验证,从“听懂”到“会做” | |
||||
|
| 7 | 架构反思+预告 | 暴露新问题,为W11健壮性工程铺垫 | |
||||
|
|
||||
|
--- |
||||
|
|
||||
|
## 版本说明 |
||||
|
|
||||
|
- **v1(本版)**:基于W9教案模式首次编写,包含策略模式、工厂模式、Repository模式的完整引入 |
||||
@ -0,0 +1,145 @@ |
|||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
|
||||
|
import java.io.BufferedWriter; |
||||
|
import java.io.FileWriter; |
||||
|
import java.io.IOException; |
||||
|
|
||||
|
/** |
||||
|
* 通用爬虫父类 |
||||
|
* 封装通用功能,定义抽象方法让子类实现具体解析逻辑 |
||||
|
*/ |
||||
|
public abstract class BaseCrawler { |
||||
|
// 通用请求头设置
|
||||
|
protected static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"; |
||||
|
protected static final String ACCEPT = "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8"; |
||||
|
protected static final String ACCEPT_LANGUAGE = "zh-CN,zh;q=0.9"; |
||||
|
protected static final int TIMEOUT = 30000; |
||||
|
|
||||
|
// 延时时间(毫秒)
|
||||
|
protected static final int DELAY_MS = 1000; |
||||
|
|
||||
|
// 进度统计
|
||||
|
protected int totalCount = 0; |
||||
|
protected int targetCount = 0; |
||||
|
|
||||
|
/** |
||||
|
* 抽象方法:解析数据 |
||||
|
* 子类必须实现具体的解析逻辑 |
||||
|
*/ |
||||
|
protected abstract void parseData(Document doc, BufferedWriter writer) throws IOException, InterruptedException; |
||||
|
|
||||
|
/** |
||||
|
* 运行爬虫 |
||||
|
* @param url 目标URL |
||||
|
* @param outputFile 输出文件路径 |
||||
|
* @param targetCount 目标数量(用于进度计算) |
||||
|
*/ |
||||
|
public void run(String url, String outputFile, int targetCount) { |
||||
|
this.targetCount = targetCount; |
||||
|
totalCount = 0; |
||||
|
|
||||
|
System.out.println("开始爬取数据..."); |
||||
|
System.out.println("目标:" + targetCount + "个项目\n"); |
||||
|
|
||||
|
try (BufferedWriter writer = new BufferedWriter(new FileWriter(outputFile))) { |
||||
|
// 写入CSV表头
|
||||
|
writeHeader(writer); |
||||
|
|
||||
|
System.out.println("正在访问页面..."); |
||||
|
|
||||
|
// 发送HTTP请求获取页面
|
||||
|
Document doc = fetchDocument(url); |
||||
|
|
||||
|
System.out.println("页面标题:" + doc.title()); |
||||
|
|
||||
|
// 调用子类实现的解析方法
|
||||
|
parseData(doc, writer); |
||||
|
|
||||
|
// 输出结果
|
||||
|
printResult(outputFile); |
||||
|
|
||||
|
} catch (IOException e) { |
||||
|
System.err.println("爬取失败:" + e.getMessage()); |
||||
|
e.printStackTrace(); |
||||
|
} catch (InterruptedException e) { |
||||
|
Thread.currentThread().interrupt(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取页面文档 |
||||
|
* @param url 目标URL |
||||
|
* @return 页面文档 |
||||
|
* @throws IOException 网络异常 |
||||
|
*/ |
||||
|
protected Document fetchDocument(String url) throws IOException { |
||||
|
return Jsoup.connect(url) |
||||
|
.userAgent(USER_AGENT) |
||||
|
.timeout(TIMEOUT) |
||||
|
.header("Accept", ACCEPT) |
||||
|
.header("Accept-Language", ACCEPT_LANGUAGE) |
||||
|
.followRedirects(true) |
||||
|
.get(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 写入CSV表头 |
||||
|
* 子类可以重写此方法以自定义表头 |
||||
|
*/ |
||||
|
protected void writeHeader(BufferedWriter writer) throws IOException { |
||||
|
writer.write("项目名称,类别,地区,简介"); |
||||
|
writer.newLine(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 延时防反爬 |
||||
|
*/ |
||||
|
protected void delay() throws InterruptedException { |
||||
|
Thread.sleep(DELAY_MS); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 清理CSV字段中的特殊字符 |
||||
|
* @param field 字段值 |
||||
|
* @return 清理后的字段值 |
||||
|
*/ |
||||
|
protected String cleanCsvField(String field) { |
||||
|
if (field == null) { |
||||
|
return ""; |
||||
|
} |
||||
|
// 移除换行符和制表符
|
||||
|
field = field.replace("\n", " ").replace("\r", " ").replace("\t", " "); |
||||
|
// 移除引用标记
|
||||
|
field = field.replace("[", "").replace("]", ""); |
||||
|
// 如果包含逗号,用双引号包裹
|
||||
|
if (field.contains(",")) { |
||||
|
field = "\"" + field.replace("\"", "\"\"") + "\""; |
||||
|
} |
||||
|
return field; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 打印进度 |
||||
|
* @param count 当前处理数量 |
||||
|
*/ |
||||
|
protected void printProgress(int count) { |
||||
|
if (count % 100 == 0) { |
||||
|
System.out.println(" 已爬取 " + count + " 个项目..."); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 打印结果 |
||||
|
* @param outputFile 输出文件路径 |
||||
|
*/ |
||||
|
protected void printResult(String outputFile) { |
||||
|
System.out.println("\n========================================"); |
||||
|
System.out.println("全部爬取完成!"); |
||||
|
System.out.println("共爬取 " + totalCount + " 个项目"); |
||||
|
System.out.println("目标:" + targetCount + "个项目"); |
||||
|
System.out.println("完成率:" + String.format("%.2f", (totalCount / (double) targetCount) * 100) + "%"); |
||||
|
System.out.println("========================================"); |
||||
|
System.out.println("数据已保存到:" + outputFile); |
||||
|
} |
||||
|
} |
||||
Binary file not shown.
@ -0,0 +1,105 @@ |
|||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
|
||||
|
import java.io.BufferedWriter; |
||||
|
import java.io.IOException; |
||||
|
|
||||
|
/** |
||||
|
* 中国非物质文化遗产爬虫 |
||||
|
* 爬取 Wikipedia 上的国家级非遗项目列表(1557项) |
||||
|
* 继承自 BaseCrawler,实现具体的解析逻辑 |
||||
|
*/ |
||||
|
public class IntangibleHeritageCrawler extends BaseCrawler { |
||||
|
|
||||
|
// Wikipedia 非遗列表页面
|
||||
|
private static final String WIKIPEDIA_URL = "https://zh.wikipedia.org/wiki/国家级非物质文化遗产代表性项目名录"; |
||||
|
// 输出文件路径
|
||||
|
private static final String OUTPUT_FILE = "intangible_heritage.csv"; |
||||
|
// 目标项目数量
|
||||
|
private static final int TARGET_COUNT = 1557; |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
// 使用多态:父类引用指向子类对象
|
||||
|
BaseCrawler crawler = new IntangibleHeritageCrawler(); |
||||
|
// 调用父类的通用 run 方法,内部会自动执行子类的 parseData 实现
|
||||
|
crawler.run(WIKIPEDIA_URL, OUTPUT_FILE, TARGET_COUNT); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 重写父类的抽象方法 parseData |
||||
|
* 实现维基百科非遗表格的解析逻辑 |
||||
|
*/ |
||||
|
@Override |
||||
|
protected void parseData(Document doc, BufferedWriter writer) throws IOException, InterruptedException { |
||||
|
// Wikipedia 的表格通常有特定的class
|
||||
|
Elements tables = doc.select("table.wikitable, table.sortable"); |
||||
|
System.out.println("找到 " + tables.size() + " 个表格\n"); |
||||
|
|
||||
|
// 遍历所有表格
|
||||
|
for (Element table : tables) { |
||||
|
// 提取表格标题(类别)
|
||||
|
String category = ""; |
||||
|
Element caption = table.selectFirst("caption"); |
||||
|
if (caption != null) { |
||||
|
category = caption.text().trim(); |
||||
|
} |
||||
|
|
||||
|
// 提取表格行
|
||||
|
Elements rows = table.select("tr"); |
||||
|
System.out.println("表格:" + category + " - 共 " + rows.size() + " 行"); |
||||
|
|
||||
|
int tableCount = 0; |
||||
|
for (Element row : rows) { |
||||
|
try { |
||||
|
// 提取单元格
|
||||
|
Elements cells = row.select("td"); |
||||
|
|
||||
|
if (cells.size() >= 2) { |
||||
|
// 第一列通常是项目名称
|
||||
|
String name = cells.get(0).text().trim(); |
||||
|
|
||||
|
// 第二列通常是地区
|
||||
|
String region = cells.get(1).text().trim(); |
||||
|
|
||||
|
// 如果有第三列,可能是简介或批次
|
||||
|
String description = ""; |
||||
|
if (cells.size() >= 3) { |
||||
|
description = cells.get(2).text().trim(); |
||||
|
} |
||||
|
|
||||
|
// 清理数据(使用父类提供的方法)
|
||||
|
name = cleanCsvField(name); |
||||
|
category = cleanCsvField(category); |
||||
|
region = cleanCsvField(region); |
||||
|
description = cleanCsvField(description); |
||||
|
|
||||
|
// 如果项目名称不为空且不是表头,则写入CSV
|
||||
|
if (!name.isEmpty() && |
||||
|
!name.equals("项目名称") && |
||||
|
!name.equals("名称") && |
||||
|
!name.equals("序号") && |
||||
|
name.length() > 1) { |
||||
|
|
||||
|
writer.write(String.format("%s,%s,%s,%s", name, category, region, description)); |
||||
|
writer.newLine(); |
||||
|
tableCount++; |
||||
|
totalCount++; |
||||
|
|
||||
|
// 打印进度(使用父类提供的方法)
|
||||
|
printProgress(totalCount); |
||||
|
} |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.err.println(" 解析行时出错:" + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println(" 该表格爬取完成!共 " + tableCount + " 个项目\n"); |
||||
|
|
||||
|
// 延时,避免请求过快(使用父类提供的方法)
|
||||
|
delay(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@ -0,0 +1,28 @@ |
|||||
|
/** |
||||
|
* 圆:面积 = π * r² |
||||
|
*/ |
||||
|
public class Circle extends Shape { |
||||
|
|
||||
|
private final double radius; |
||||
|
|
||||
|
public Circle(double radius) { |
||||
|
if (radius <= 0) { |
||||
|
throw new IllegalArgumentException("半径必须为正数"); |
||||
|
} |
||||
|
this.radius = radius; |
||||
|
} |
||||
|
|
||||
|
public double getRadius() { |
||||
|
return radius; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public double getArea() { |
||||
|
return Math.PI * radius * radius; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void draw() { |
||||
|
System.out.println("绘制圆形"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,19 @@ |
|||||
|
/** |
||||
|
* 电脑:使用USB设备 |
||||
|
*/ |
||||
|
public class Computer { |
||||
|
|
||||
|
/** |
||||
|
* 使用USB设备 |
||||
|
* @param usb USB设备实例 |
||||
|
*/ |
||||
|
public void useUSB(USB usb) { |
||||
|
if (usb == null) { |
||||
|
System.out.println("USB设备为空,无法使用。"); |
||||
|
return; |
||||
|
} |
||||
|
usb.open(); |
||||
|
System.out.println("使用USB设备中..."); |
||||
|
usb.close(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,15 @@ |
|||||
|
/** |
||||
|
* 键盘:实现USB接口 |
||||
|
*/ |
||||
|
public class Keyboard implements USB { |
||||
|
|
||||
|
@Override |
||||
|
public void open() { |
||||
|
System.out.println("键盘已连接"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void close() { |
||||
|
System.out.println("键盘已断开"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,15 @@ |
|||||
|
/** |
||||
|
* 鼠标:实现USB接口 |
||||
|
*/ |
||||
|
public class Mouse implements USB { |
||||
|
|
||||
|
@Override |
||||
|
public void open() { |
||||
|
System.out.println("鼠标已连接"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void close() { |
||||
|
System.out.println("鼠标已断开"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,56 @@ |
|||||
|
/** |
||||
|
* 人员抽象基类:包含人员的基本属性 |
||||
|
*/ |
||||
|
public class Person { |
||||
|
|
||||
|
private String name; |
||||
|
private String id; |
||||
|
|
||||
|
/** |
||||
|
* 无参构造方法 |
||||
|
*/ |
||||
|
public Person() { |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 有参构造方法 |
||||
|
* @param name 姓名 |
||||
|
* @param id 身份证号 |
||||
|
*/ |
||||
|
public Person(String name, String id) { |
||||
|
this.name = name; |
||||
|
this.id = id; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取姓名 |
||||
|
* @return 姓名 |
||||
|
*/ |
||||
|
public String getName() { |
||||
|
return name; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 设置姓名 |
||||
|
* @param name 姓名 |
||||
|
*/ |
||||
|
public void setName(String name) { |
||||
|
this.name = name; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取身份证号 |
||||
|
* @return 身份证号 |
||||
|
*/ |
||||
|
public String getId() { |
||||
|
return id; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 设置身份证号 |
||||
|
* @param id 身份证号 |
||||
|
*/ |
||||
|
public void setId(String id) { |
||||
|
this.id = id; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,34 @@ |
|||||
|
/** |
||||
|
* 矩形:面积 = 宽 * 高 |
||||
|
*/ |
||||
|
public class Rectangle extends Shape { |
||||
|
|
||||
|
private final double width; |
||||
|
private final double height; |
||||
|
|
||||
|
public Rectangle(double width, double height) { |
||||
|
if (width <= 0 || height <= 0) { |
||||
|
throw new IllegalArgumentException("宽和高必须为正数"); |
||||
|
} |
||||
|
this.width = width; |
||||
|
this.height = height; |
||||
|
} |
||||
|
|
||||
|
public double getWidth() { |
||||
|
return width; |
||||
|
} |
||||
|
|
||||
|
public double getHeight() { |
||||
|
return height; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public double getArea() { |
||||
|
return width * height; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void draw() { |
||||
|
System.out.println("绘制矩形"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,17 @@ |
|||||
|
/** |
||||
|
* 图形抽象基类:统一多态入口,具体面积由子类实现。 |
||||
|
*/ |
||||
|
public abstract class Shape { |
||||
|
|
||||
|
/** |
||||
|
* @return 图形面积(具体单位由子类语义决定,如平方厘米) |
||||
|
*/ |
||||
|
public abstract double getArea(); |
||||
|
|
||||
|
/** |
||||
|
* 绘制图形 |
||||
|
*/ |
||||
|
public void draw() { |
||||
|
System.out.println("绘制图形"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,57 @@ |
|||||
|
/** |
||||
|
* 演示:多态 —— 同一 {@link ShapeUtil#printArea(Shape)} 处理圆、矩形、三角形。 |
||||
|
* 运行:javac *.java 后执行 java ShapeCalculatorDemo |
||||
|
*/ |
||||
|
public class ShapeCalculatorDemo { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
Shape circle = new Circle(3.0); |
||||
|
Shape rectangle = new Rectangle(4.0, 5.0); |
||||
|
|
||||
|
System.out.println("—— 圆 ——"); |
||||
|
ShapeUtil.printArea(circle); |
||||
|
|
||||
|
System.out.println("—— 矩形 ——"); |
||||
|
ShapeUtil.printArea(rectangle); |
||||
|
|
||||
|
System.out.println("—— 多态数组统一处理 ——"); |
||||
|
Shape[] shapes = {circle, rectangle}; |
||||
|
for (Shape s : shapes) { |
||||
|
ShapeUtil.printArea(s); |
||||
|
} |
||||
|
|
||||
|
// 测试drawShape方法
|
||||
|
System.out.println("\n—— 测试绘制图形 ——"); |
||||
|
ShapeUtil.drawShape(circle); |
||||
|
ShapeUtil.drawShape(rectangle); |
||||
|
|
||||
|
// 测试Computer和USB设备
|
||||
|
System.out.println("\n—— 测试Computer和USB设备 ——"); |
||||
|
Computer computer = new Computer(); |
||||
|
Mouse mouse = new Mouse(); |
||||
|
Keyboard keyboard = new Keyboard(); |
||||
|
|
||||
|
System.out.println("使用鼠标:"); |
||||
|
computer.useUSB(mouse); |
||||
|
|
||||
|
System.out.println("\n使用键盘:"); |
||||
|
computer.useUSB(keyboard); |
||||
|
|
||||
|
// 测试学生管理系统
|
||||
|
System.out.println("\n—— 测试学生管理系统 ——"); |
||||
|
StudentManagementSystem sms = new StudentManagementSystem(); |
||||
|
|
||||
|
// 创建学生对象
|
||||
|
Student student = new Student("张三", "110101200001011234", "2024001", "计算机科学与技术"); |
||||
|
// 创建教师对象
|
||||
|
Teacher teacher = new Teacher("李四", "110101198001011234", "T2024001", "Java程序设计"); |
||||
|
|
||||
|
// 添加学生
|
||||
|
System.out.println("添加学生:"); |
||||
|
sms.addPerson(student); |
||||
|
|
||||
|
// 添加教师
|
||||
|
System.out.println("添加教师:"); |
||||
|
sms.addPerson(teacher); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,35 @@ |
|||||
|
/** |
||||
|
* 图形工具类:通过多态统一打印任意 {@link Shape} 的面积。 |
||||
|
*/ |
||||
|
public final class ShapeUtil { |
||||
|
|
||||
|
private ShapeUtil() { |
||||
|
// 工具类禁止实例化
|
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 打印给定图形的面积(保留两位小数,便于实验输出阅读)。 |
||||
|
* |
||||
|
* @param shape 任意 {@link Shape} 子类实例,可为 null(将给出提示) |
||||
|
*/ |
||||
|
public static void printArea(Shape shape) { |
||||
|
if (shape == null) { |
||||
|
System.out.println("图形引用为空,无法计算面积。"); |
||||
|
return; |
||||
|
} |
||||
|
System.out.printf("该图形的面积为:%.2f%n", shape.getArea()); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 绘制给定图形。 |
||||
|
* |
||||
|
* @param s 任意 {@link Shape} 子类实例,可为 null(将给出提示) |
||||
|
*/ |
||||
|
public static void drawShape(Shape s) { |
||||
|
if (s == null) { |
||||
|
System.out.println("图形引用为空,无法绘制。"); |
||||
|
return; |
||||
|
} |
||||
|
s.draw(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,59 @@ |
|||||
|
/** |
||||
|
* 学生:继承Person类,添加学生特有属性 |
||||
|
*/ |
||||
|
public class Student extends Person { |
||||
|
|
||||
|
private String studentId; // 学号
|
||||
|
private String major; // 专业
|
||||
|
|
||||
|
/** |
||||
|
* 无参构造方法 |
||||
|
*/ |
||||
|
public Student() { |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 有参构造方法 |
||||
|
* @param name 姓名 |
||||
|
* @param id 身份证号 |
||||
|
* @param studentId 学号 |
||||
|
* @param major 专业 |
||||
|
*/ |
||||
|
public Student(String name, String id, String studentId, String major) { |
||||
|
super(name, id); |
||||
|
this.studentId = studentId; |
||||
|
this.major = major; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取学号 |
||||
|
* @return 学号 |
||||
|
*/ |
||||
|
public String getStudentId() { |
||||
|
return studentId; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 设置学号 |
||||
|
* @param studentId 学号 |
||||
|
*/ |
||||
|
public void setStudentId(String studentId) { |
||||
|
this.studentId = studentId; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取专业 |
||||
|
* @return 专业 |
||||
|
*/ |
||||
|
public String getMajor() { |
||||
|
return major; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 设置专业 |
||||
|
* @param major 专业 |
||||
|
*/ |
||||
|
public void setMajor(String major) { |
||||
|
this.major = major; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,38 @@ |
|||||
|
/** |
||||
|
* 学生管理系统:使用多态重构添加人员的方法 |
||||
|
*/ |
||||
|
public class StudentManagementSystem { |
||||
|
|
||||
|
/** |
||||
|
* 添加人员(使用多态,统一处理学生和教师) |
||||
|
* @param p 人员对象(Student或Teacher) |
||||
|
*/ |
||||
|
public void addPerson(Person p) { |
||||
|
if (p == null) { |
||||
|
System.out.println("人员对象为空,无法添加。"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
// 通用信息
|
||||
|
System.out.println("添加人员信息:"); |
||||
|
System.out.println("姓名:" + p.getName()); |
||||
|
System.out.println("身份证号:" + p.getId()); |
||||
|
|
||||
|
// 使用instanceof判断具体类型,处理特有属性
|
||||
|
if (p instanceof Student) { |
||||
|
Student student = (Student) p; |
||||
|
System.out.println("类型:学生"); |
||||
|
System.out.println("学号:" + student.getStudentId()); |
||||
|
System.out.println("专业:" + student.getMajor()); |
||||
|
} else if (p instanceof Teacher) { |
||||
|
Teacher teacher = (Teacher) p; |
||||
|
System.out.println("类型:教师"); |
||||
|
System.out.println("教师编号:" + teacher.getTeacherId()); |
||||
|
System.out.println("教授科目:" + teacher.getSubject()); |
||||
|
} else { |
||||
|
System.out.println("类型:其他人员"); |
||||
|
} |
||||
|
|
||||
|
System.out.println("添加成功!\n"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,59 @@ |
|||||
|
/** |
||||
|
* 教师:继承Person类,添加教师特有属性 |
||||
|
*/ |
||||
|
public class Teacher extends Person { |
||||
|
|
||||
|
private String teacherId; // 教师编号
|
||||
|
private String subject; // 教授科目
|
||||
|
|
||||
|
/** |
||||
|
* 无参构造方法 |
||||
|
*/ |
||||
|
public Teacher() { |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 有参构造方法 |
||||
|
* @param name 姓名 |
||||
|
* @param id 身份证号 |
||||
|
* @param teacherId 教师编号 |
||||
|
* @param subject 教授科目 |
||||
|
*/ |
||||
|
public Teacher(String name, String id, String teacherId, String subject) { |
||||
|
super(name, id); |
||||
|
this.teacherId = teacherId; |
||||
|
this.subject = subject; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取教师编号 |
||||
|
* @return 教师编号 |
||||
|
*/ |
||||
|
public String getTeacherId() { |
||||
|
return teacherId; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 设置教师编号 |
||||
|
* @param teacherId 教师编号 |
||||
|
*/ |
||||
|
public void setTeacherId(String teacherId) { |
||||
|
this.teacherId = teacherId; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取教授科目 |
||||
|
* @return 教授科目 |
||||
|
*/ |
||||
|
public String getSubject() { |
||||
|
return subject; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 设置教授科目 |
||||
|
* @param subject 教授科目 |
||||
|
*/ |
||||
|
public void setSubject(String subject) { |
||||
|
this.subject = subject; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,15 @@ |
|||||
|
/** |
||||
|
* USB接口:定义USB设备的基本行为 |
||||
|
*/ |
||||
|
public interface USB { |
||||
|
|
||||
|
/** |
||||
|
* 打开USB设备 |
||||
|
*/ |
||||
|
void open(); |
||||
|
|
||||
|
/** |
||||
|
* 关闭USB设备 |
||||
|
*/ |
||||
|
void close(); |
||||
|
} |
||||
@ -0,0 +1,5 @@ |
|||||
|
// 抽象类Animal,定义动物的基本行为
|
||||
|
public abstract class Animal { |
||||
|
// 抽象方法makeSound,子类必须实现
|
||||
|
public abstract void makeSound(); |
||||
|
} |
||||
@ -0,0 +1,8 @@ |
|||||
|
// Cat类,继承Animal抽象类
|
||||
|
public class Cat extends Animal { |
||||
|
// 重写makeSound方法,实现猫的叫声
|
||||
|
@Override |
||||
|
public void makeSound() { |
||||
|
System.out.println("Cat meows: Meow! Meow!"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,14 @@ |
|||||
|
// Dog类,继承Animal抽象类并实现Swimmable接口
|
||||
|
public class Dog extends Animal implements Swimmable { |
||||
|
// 重写makeSound方法,实现狗的叫声
|
||||
|
@Override |
||||
|
public void makeSound() { |
||||
|
System.out.println("Dog barks: Woof! Woof!"); |
||||
|
} |
||||
|
|
||||
|
// 实现swim方法,定义狗的游泳行为
|
||||
|
@Override |
||||
|
public void swim() { |
||||
|
System.out.println("Dog is swimming"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,21 @@ |
|||||
|
// 测试类,用于验证多态和接口的使用
|
||||
|
public class TestAnimal { |
||||
|
public static void main(String[] args) { |
||||
|
// 使用多态创建Animal类型的引用,指向Dog和Cat对象
|
||||
|
Animal dog = new Dog(); |
||||
|
Animal cat = new Cat(); |
||||
|
|
||||
|
// 调用makeSound方法,多态会根据实际对象类型调用相应的方法
|
||||
|
System.out.println("Testing makeSound() method:"); |
||||
|
dog.makeSound(); // 实际调用Dog类的makeSound方法
|
||||
|
cat.makeSound(); // 实际调用Cat类的makeSound方法
|
||||
|
|
||||
|
// 调用Dog对象的swim方法
|
||||
|
System.out.println("\nTesting swim() method:"); |
||||
|
// 需要将Animal类型的引用转换为Swimmable接口类型,然后调用swim方法
|
||||
|
((Swimmable) dog).swim(); |
||||
|
|
||||
|
// 注意:Cat类没有实现Swimmable接口,所以不能调用swim方法
|
||||
|
// ((Swimmable) cat).swim(); // 这行代码会编译错误
|
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,41 @@ |
|||||
|
import java.io.BufferedReader; |
||||
|
import java.io.FileReader; |
||||
|
import java.io.IOException; |
||||
|
import java.io.FileNotFoundException; |
||||
|
|
||||
|
public class ScoreCalculator { |
||||
|
public static void main(String[] args) { |
||||
|
String filePath = "scores.txt"; |
||||
|
int sum = 0; |
||||
|
int count = 0; |
||||
|
|
||||
|
try (BufferedReader br = new BufferedReader(new FileReader(filePath))) { |
||||
|
String line; |
||||
|
while ((line = br.readLine()) != null) { |
||||
|
try { |
||||
|
int score = Integer.parseInt(line.trim()); |
||||
|
sum += score; |
||||
|
count++; |
||||
|
} catch (NumberFormatException e) { |
||||
|
System.out.println("警告:跳过无效的数字格式 - \"" + line + "\""); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
if (count == 0) { |
||||
|
System.out.println("未找到有效的成绩数据"); |
||||
|
} else { |
||||
|
double average = (double) sum / count; |
||||
|
System.out.println("========== 成绩统计结果 =========="); |
||||
|
System.out.println("有效成绩数量: " + count); |
||||
|
System.out.println("总成绩: " + sum); |
||||
|
System.out.println("平均分: " + String.format("%.2f", average)); |
||||
|
} |
||||
|
|
||||
|
} catch (FileNotFoundException e) { |
||||
|
System.out.println("错误:文件不存在 - " + filePath); |
||||
|
System.out.println("请确保 scores.txt 文件位于程序运行目录下"); |
||||
|
} catch (IOException e) { |
||||
|
System.out.println("错误:文件读取失败 - " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,5 @@ |
|||||
|
public class Test { |
||||
|
public static void main(String[] args) { |
||||
|
System.out.println("Hello World!"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,9 @@ |
|||||
|
85 |
||||
|
92 |
||||
|
78 |
||||
|
90 |
||||
|
88 |
||||
|
abc |
||||
|
95 |
||||
|
xyz |
||||
|
80 |
||||
Binary file not shown.
@ -0,0 +1,171 @@ |
|||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
/** |
||||
|
* ============================================ |
||||
|
* 任务一:泛型类 Pair<K, V> |
||||
|
* - 包含构造方法、getter/setter |
||||
|
* - swap() 方法用于交换 K 和 V 的值 |
||||
|
* ============================================ |
||||
|
*/ |
||||
|
class Pair<K, V> { |
||||
|
private K key; |
||||
|
private V value; |
||||
|
|
||||
|
public Pair() { |
||||
|
} |
||||
|
|
||||
|
public Pair(K key, V value) { |
||||
|
this.key = key; |
||||
|
this.value = value; |
||||
|
} |
||||
|
|
||||
|
public K getKey() { |
||||
|
return key; |
||||
|
} |
||||
|
|
||||
|
public void setKey(K key) { |
||||
|
this.key = key; |
||||
|
} |
||||
|
|
||||
|
public V getValue() { |
||||
|
return value; |
||||
|
} |
||||
|
|
||||
|
public void setValue(V value) { |
||||
|
this.value = value; |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 交换 key 和 value 的值 |
||||
|
*/ |
||||
|
public void swap() { |
||||
|
K temp = this.key; |
||||
|
this.key = (K) this.value; |
||||
|
this.value = (V) temp; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Pair{key=" + key + ", value=" + value + "}"; |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
System.out.println("========== Pair 测试 =========="); |
||||
|
|
||||
|
Pair<String, Integer> pair = new Pair<>("Hello", 100); |
||||
|
System.out.println("交换前: " + pair); |
||||
|
pair.swap(); |
||||
|
System.out.println("交换后: " + pair); |
||||
|
|
||||
|
Pair<Double, String> pair2 = new Pair<>(3.14, "PI"); |
||||
|
System.out.println("交换前: " + pair2); |
||||
|
pair2.swap(); |
||||
|
System.out.println("交换后: " + pair2); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* ============================================ |
||||
|
* 任务二:泛型缓存类 Cache<K, V> |
||||
|
* - put(K key, V value): 添加缓存 |
||||
|
* - get(K key): 获取缓存 |
||||
|
* - remove(K key): 删除缓存 |
||||
|
* - clear(): 清空缓存 |
||||
|
* ============================================ |
||||
|
*/ |
||||
|
class Cache<K, V> { |
||||
|
private final Map<K, V> cache; |
||||
|
|
||||
|
public Cache() { |
||||
|
this.cache = new HashMap<>(); |
||||
|
} |
||||
|
|
||||
|
public void put(K key, V value) { |
||||
|
if (key == null) { |
||||
|
throw new IllegalArgumentException("key不能为null"); |
||||
|
} |
||||
|
cache.put(key, value); |
||||
|
} |
||||
|
|
||||
|
public V get(K key) { |
||||
|
if (key == null) { |
||||
|
return null; |
||||
|
} |
||||
|
return cache.get(key); |
||||
|
} |
||||
|
|
||||
|
public V remove(K key) { |
||||
|
if (key == null) { |
||||
|
return null; |
||||
|
} |
||||
|
return cache.remove(key); |
||||
|
} |
||||
|
|
||||
|
public void clear() { |
||||
|
cache.clear(); |
||||
|
} |
||||
|
|
||||
|
public int size() { |
||||
|
return cache.size(); |
||||
|
} |
||||
|
|
||||
|
public boolean containsKey(K key) { |
||||
|
if (key == null) { |
||||
|
return false; |
||||
|
} |
||||
|
return cache.containsKey(key); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Cache{" + cache + "}"; |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
System.out.println("========== Cache 测试 =========="); |
||||
|
Cache<String, Integer> cache = new Cache<>(); |
||||
|
|
||||
|
cache.put("Java", 90); |
||||
|
cache.put("Python", 85); |
||||
|
cache.put("C++", 92); |
||||
|
System.out.println("添加后缓存: " + cache); |
||||
|
System.out.println("缓存大小: " + cache.size()); |
||||
|
|
||||
|
System.out.println("获取Java成绩: " + cache.get("Java")); |
||||
|
System.out.println("获取不存在的Key: " + cache.get("Go")); |
||||
|
|
||||
|
cache.put("Java", 95); |
||||
|
System.out.println("更新Java成绩后: " + cache); |
||||
|
|
||||
|
System.out.println("删除Python: " + cache.remove("Python")); |
||||
|
System.out.println("删除后缓存: " + cache); |
||||
|
|
||||
|
cache.clear(); |
||||
|
System.out.println("清空后缓存: " + cache); |
||||
|
|
||||
|
try { |
||||
|
cache.put(null, 100); |
||||
|
} catch (IllegalArgumentException e) { |
||||
|
System.out.println("null key测试: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* ============================================ |
||||
|
* 主类:运行测试 |
||||
|
* ============================================ |
||||
|
*/ |
||||
|
public class GenericHomework { |
||||
|
public static void main(String[] args) { |
||||
|
System.out.println("【任务一】Pair<K, V> 测试"); |
||||
|
System.out.println("========================"); |
||||
|
Pair.main(null); |
||||
|
System.out.println(); |
||||
|
|
||||
|
System.out.println("【任务二】Cache<K, V> 测试"); |
||||
|
System.out.println("========================"); |
||||
|
Cache.main(null); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,113 @@ |
|||||
|
package com.example.datacollect; |
||||
|
|
||||
|
/** |
||||
|
* ============================================================================= |
||||
|
* MVC 三层架构审计报告 |
||||
|
* ============================================================================= |
||||
|
* |
||||
|
* 一、审计目的 |
||||
|
* ---------- |
||||
|
* 检查当前项目的 MVC(Model/View/Controller)三层架构划分是否规范, |
||||
|
* 是否存在跨层直接调用等越权行为。 |
||||
|
* |
||||
|
* |
||||
|
* 二、当前项目结构分析 |
||||
|
* ------------------- |
||||
|
* |
||||
|
* Model 层: |
||||
|
* - com.example.datacollect.model.Article |
||||
|
* |
||||
|
* View 层: |
||||
|
* - com.example.datacollect.view.ConsoleView |
||||
|
* |
||||
|
* Controller 层: |
||||
|
* - com.example.datacollect.controller.CrawlerController |
||||
|
* |
||||
|
* Command 层(可视为 Controller 的扩展): |
||||
|
* - HelpCommand, ListCommand, CrawlCommand, ExitCommand, HistoryCommand |
||||
|
* |
||||
|
* |
||||
|
* 三、发现的问题 |
||||
|
* ------------- |
||||
|
* |
||||
|
* 【问题1】Controller 直接持有 Model 列表引用 ⚠️ |
||||
|
* 位置:CrawlerController.java 第17行 |
||||
|
* 代码:private final List<Article> articles; |
||||
|
* |
||||
|
* 风险: |
||||
|
* - Controller 直接操作 Model 数据,违反单一职责 |
||||
|
* - 多个 Command 都可以直接修改 articles 列表 |
||||
|
* - 数据修改入口分散,难以追踪 |
||||
|
* |
||||
|
* 修改建议: |
||||
|
* - 引入 Service 层(如 ArticleService)专门管理 Article 数据 |
||||
|
* - Controller 只持有 Service 引用,Command 通过 Controller 间接访问 |
||||
|
* |
||||
|
* |
||||
|
* 【问题2】Command 直接持有 View 引用 ⚠️ |
||||
|
* 位置:所有 Command 实现类(如 CrawlCommand.java 第10行) |
||||
|
* 代码:private final ConsoleView view; |
||||
|
* |
||||
|
* 风险: |
||||
|
* - Command 越过 Controller 直接与 View 交互 |
||||
|
* - Command 承担了部分 Controller 职责 |
||||
|
* |
||||
|
* 修改建议: |
||||
|
* - Command 只负责解析命令和调用 Controller |
||||
|
* - View 引用统一由 Controller 管理 |
||||
|
* |
||||
|
* |
||||
|
* 【问题3】List<Article> 共享引用风险 ⚠️ |
||||
|
* 位置:CrawlerController.java 第17行 |
||||
|
* 代码:private final List<Article> articles; |
||||
|
* |
||||
|
* 风险: |
||||
|
* - articles 作为可变共享状态被多个 Command 操作 |
||||
|
* - 多线程环境下可能导致数据不一致 |
||||
|
* |
||||
|
* 修改建议: |
||||
|
* - 使用不可变列表或返回副本 |
||||
|
* - 添加线程安全保护(如 CopyOnWriteArrayList) |
||||
|
* |
||||
|
* |
||||
|
* 【问题4】View 直接遍历 Model 数据 |
||||
|
* 位置:ConsoleView.java display 方法 |
||||
|
* |
||||
|
* 风险: |
||||
|
* - View 层直接访问 Model 数据结构 |
||||
|
* |
||||
|
* 修改建议: |
||||
|
* - Controller 将需要显示的数据封装为 DTO |
||||
|
* |
||||
|
* |
||||
|
* 四、架构修改建议 |
||||
|
* --------------- |
||||
|
* |
||||
|
* 【推荐架构】 |
||||
|
* View -> Controller -> Service -> DAO -> Model |
||||
|
* |
||||
|
* 【具体修改】 |
||||
|
* 1. 新增 ArticleService.java 统一管理 Article 数据 |
||||
|
* 2. 修改 CrawlerController 持有 ArticleService 而非 List<Article> |
||||
|
* 3. Command 只调用 Controller 方法,不直接操作列表 |
||||
|
* |
||||
|
* |
||||
|
* 五、审计清单 |
||||
|
* ----------- |
||||
|
* □ Controller 是否直接持有 Model 数据? -> 是,需要引入 Service |
||||
|
* □ Command 是否直接操作 View? -> 是,需要通过 Controller |
||||
|
* □ 共享 List 是否线程安全? -> 否,需要保护 |
||||
|
* □ View 是否直接访问 Model? -> 部分存在,需要 DTO |
||||
|
* |
||||
|
* |
||||
|
* 六、总结 |
||||
|
* ------- |
||||
|
* 当前项目存在以下越权行为: |
||||
|
* 1. Controller 直接持有 Model 数据 |
||||
|
* 2. Command 直接操作 View 和 Model |
||||
|
* 3. 共享数据缺乏线程安全保护 |
||||
|
* |
||||
|
* 建议逐步引入 Service 层,解耦数据管理和视图渲染。 |
||||
|
* |
||||
|
* ============================================================================= |
||||
|
*/ |
||||
@ -0,0 +1,104 @@ |
|||||
|
package com.example.datacollect.model; |
||||
|
|
||||
|
import java.time.LocalDate; |
||||
|
import java.util.Objects; |
||||
|
|
||||
|
/** |
||||
|
* 文章实体类 |
||||
|
* 用于存储文章的相关信息 |
||||
|
*/ |
||||
|
public class Article { |
||||
|
|
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
private String author; |
||||
|
private LocalDate publishDate; |
||||
|
|
||||
|
/** |
||||
|
* 无参构造方法 |
||||
|
*/ |
||||
|
public Article() { |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 全参构造方法 |
||||
|
* @param title 文章标题 |
||||
|
* @param url 文章URL |
||||
|
* @param content 文章内容 |
||||
|
* @param author 作者 |
||||
|
* @param publishDate 发布日期 |
||||
|
*/ |
||||
|
public Article(String title, String url, String content, String author, LocalDate publishDate) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
this.author = author; |
||||
|
this.publishDate = publishDate; |
||||
|
} |
||||
|
|
||||
|
// Getter 和 Setter 方法
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
public void setUrl(String url) { |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
public String getContent() { |
||||
|
return content; |
||||
|
} |
||||
|
|
||||
|
public void setContent(String content) { |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public String getAuthor() { |
||||
|
return author; |
||||
|
} |
||||
|
|
||||
|
public void setAuthor(String author) { |
||||
|
this.author = author; |
||||
|
} |
||||
|
|
||||
|
public LocalDate getPublishDate() { |
||||
|
return publishDate; |
||||
|
} |
||||
|
|
||||
|
public void setPublishDate(LocalDate publishDate) { |
||||
|
this.publishDate = publishDate; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{" |
||||
|
+ "title='" + title + '\'' |
||||
|
+ ", url='" + url + '\'' |
||||
|
+ ", content='" + content + '\'' |
||||
|
+ ", author='" + author + '\'' |
||||
|
+ ", publishDate=" + publishDate |
||||
|
+ '}'; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public boolean equals(Object o) { |
||||
|
if (this == o) return true; |
||||
|
if (o == null || getClass() != o.getClass()) return false; |
||||
|
Article article = (Article) o; |
||||
|
return Objects.equals(url, article.url); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public int hashCode() { |
||||
|
return Objects.hash(url); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,129 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
/** |
||||
|
* 命令别名管理器 |
||||
|
* 支持将长命令映射为短别名,方便用户输入 |
||||
|
*/ |
||||
|
public class CommandAlias { |
||||
|
|
||||
|
private Map<String, String> aliasMap; |
||||
|
|
||||
|
public CommandAlias() { |
||||
|
this.aliasMap = new HashMap<>(); |
||||
|
initDefaultAliases(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 初始化默认别名 |
||||
|
*/ |
||||
|
private void initDefaultAliases() { |
||||
|
aliasMap.put("c", "crawl"); |
||||
|
aliasMap.put("r", "run"); |
||||
|
aliasMap.put("s", "stop"); |
||||
|
aliasMap.put("h", "help"); |
||||
|
aliasMap.put("q", "quit"); |
||||
|
aliasMap.put("l", "list"); |
||||
|
aliasMap.put("a", "add"); |
||||
|
aliasMap.put("d", "delete"); |
||||
|
aliasMap.put("e", "edit"); |
||||
|
aliasMap.put("hist", "history"); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 添加自定义别名 |
||||
|
* @param alias 别名 |
||||
|
* @param command 原命令 |
||||
|
*/ |
||||
|
public void addAlias(String alias, String command) { |
||||
|
if (alias != null && command != null) { |
||||
|
aliasMap.put(alias.toLowerCase(), command.toLowerCase()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 移除别名 |
||||
|
* @param alias 要移除的别名 |
||||
|
*/ |
||||
|
public void removeAlias(String alias) { |
||||
|
aliasMap.remove(alias.toLowerCase()); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 将别名转换为原命令 |
||||
|
* @param input 用户输入 |
||||
|
* @return 原命令,如果输入不是别名则返回原输入 |
||||
|
*/ |
||||
|
public String resolveCommand(String input) { |
||||
|
if (input == null) { |
||||
|
return null; |
||||
|
} |
||||
|
String trimmed = input.trim().toLowerCase(); |
||||
|
return aliasMap.getOrDefault(trimmed, input); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 检查输入是否为别名 |
||||
|
* @param input 用户输入 |
||||
|
* @return 是否为别名 |
||||
|
*/ |
||||
|
public boolean isAlias(String input) { |
||||
|
if (input == null) { |
||||
|
return false; |
||||
|
} |
||||
|
return aliasMap.containsKey(input.trim().toLowerCase()); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取所有别名映射 |
||||
|
* @return 别名映射的副本 |
||||
|
*/ |
||||
|
public Map<String, String> getAllAliases() { |
||||
|
return new HashMap<>(aliasMap); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 打印所有别名 |
||||
|
*/ |
||||
|
public void printAliases() { |
||||
|
System.out.println("===== 命令别名列表 ====="); |
||||
|
for (Map.Entry<String, String> entry : aliasMap.entrySet()) { |
||||
|
System.out.println(" " + entry.getKey() + " -> " + entry.getValue()); |
||||
|
} |
||||
|
System.out.println("========================"); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 简单交互测试主方法 |
||||
|
*/ |
||||
|
public static void main(String[] args) { |
||||
|
CommandAlias aliasManager = new CommandAlias(); |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
System.out.println("===== 命令别名测试 ====="); |
||||
|
aliasManager.printAliases(); |
||||
|
System.out.println("\n输入命令或别名测试(输入 exit 退出):"); |
||||
|
|
||||
|
while (true) { |
||||
|
System.out.print("\n> "); |
||||
|
String input = scanner.nextLine().trim(); |
||||
|
|
||||
|
if ("exit".equalsIgnoreCase(input)) { |
||||
|
break; |
||||
|
} |
||||
|
|
||||
|
String resolved = aliasManager.resolveCommand(input); |
||||
|
if (!resolved.equals(input)) { |
||||
|
System.out.println("别名 '" + input + "' -> 原命令: " + resolved); |
||||
|
} else { |
||||
|
System.out.println("原命令: " + resolved); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println("测试结束"); |
||||
|
scanner.close(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,96 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
/** |
||||
|
* 命令历史记录管理器 |
||||
|
* 用于记录用户输入的所有命令,并提供查看历史的功能 |
||||
|
*/ |
||||
|
public class HistoryCommand implements Command { |
||||
|
|
||||
|
private List<String> commandHistory; |
||||
|
|
||||
|
public HistoryCommand() { |
||||
|
this.commandHistory = new ArrayList<>(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 添加命令到历史记录 |
||||
|
* @param command 用户输入的命令 |
||||
|
*/ |
||||
|
public void addCommand(String command) { |
||||
|
if (command != null && !command.trim().isEmpty()) { |
||||
|
commandHistory.add(command); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 打印所有历史命令 |
||||
|
*/ |
||||
|
public void printHistory() { |
||||
|
if (commandHistory.isEmpty()) { |
||||
|
System.out.println("暂无命令历史记录"); |
||||
|
return; |
||||
|
} |
||||
|
System.out.println("===== 命令历史记录 ====="); |
||||
|
for (int i = 0; i < commandHistory.size(); i++) { |
||||
|
System.out.println((i + 1) + ". " + commandHistory.get(i)); |
||||
|
} |
||||
|
System.out.println("======================="); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取命令历史列表 |
||||
|
* @return 命令历史列表的副本 |
||||
|
*/ |
||||
|
public List<String> getCommandHistory() { |
||||
|
return new ArrayList<>(commandHistory); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 清空历史记录 |
||||
|
*/ |
||||
|
public void clearHistory() { |
||||
|
commandHistory.clear(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "history"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
printHistory(); |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 简单交互测试主方法 |
||||
|
*/ |
||||
|
public static void main(String[] args) { |
||||
|
HistoryCommand historyCommand = new HistoryCommand(); |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
System.out.println("===== 命令历史测试 ====="); |
||||
|
System.out.println("输入命令(输入 exit 退出,输入 history 查看历史):"); |
||||
|
|
||||
|
while (true) { |
||||
|
System.out.print("\n> "); |
||||
|
String input = scanner.nextLine().trim(); |
||||
|
|
||||
|
if ("exit".equalsIgnoreCase(input)) { |
||||
|
System.out.println("退出程序"); |
||||
|
break; |
||||
|
} else if ("history".equalsIgnoreCase(input)) { |
||||
|
historyCommand.printHistory(); |
||||
|
} else { |
||||
|
historyCommand.addCommand(input); |
||||
|
System.out.println("命令已记录: " + input); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
scanner.close(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,81 @@ |
|||||
|
package com.example.datacollect.util; |
||||
|
|
||||
|
/** |
||||
|
* UI 颜色常量类 |
||||
|
* |
||||
|
* 【修改位置提示】 |
||||
|
* 只需修改 THEME_MODE 常量即可切换亮色/暗色主题 |
||||
|
* THEME_MODE = false -> 亮色主题 |
||||
|
* THEME_MODE = true -> 暗色主题 |
||||
|
*/ |
||||
|
public class UIConstants { |
||||
|
|
||||
|
/** 主题模式开关:false = 亮色主题,true = 暗色主题 */ |
||||
|
public static final boolean THEME_MODE = true; |
||||
|
|
||||
|
/** 亮色主题 - 背景色 */ |
||||
|
public static final String LIGHT_BG_COLOR = "#FFFFFF"; |
||||
|
/** 亮色主题 - 前景色(文字) */ |
||||
|
public static final String LIGHT_FG_COLOR = "#000000"; |
||||
|
/** 亮色主题 - 按钮色 */ |
||||
|
public static final String LIGHT_BUTTON_COLOR = "#007BFF"; |
||||
|
|
||||
|
/** 暗色主题 - 背景色 */ |
||||
|
public static final String DARK_BG_COLOR = "#1E1E1E"; |
||||
|
/** 暗色主题 - 前景色(文字) */ |
||||
|
public static final String DARK_FG_COLOR = "#E0E0E0"; |
||||
|
/** 暗色主题 - 按钮色 */ |
||||
|
public static final String DARK_BUTTON_COLOR = "#0D6EFD"; |
||||
|
|
||||
|
/** 根据主题模式获取背景色 */ |
||||
|
public static String getBackgroundColor() { |
||||
|
return THEME_MODE ? DARK_BG_COLOR : LIGHT_BG_COLOR; |
||||
|
} |
||||
|
|
||||
|
/** 根据主题模式获取前景色 */ |
||||
|
public static String getForegroundColor() { |
||||
|
return THEME_MODE ? DARK_FG_COLOR : LIGHT_FG_COLOR; |
||||
|
} |
||||
|
|
||||
|
/** 根据主题模式获取按钮色 */ |
||||
|
public static String getButtonColor() { |
||||
|
return THEME_MODE ? DARK_BUTTON_COLOR : LIGHT_BUTTON_COLOR; |
||||
|
} |
||||
|
|
||||
|
/** 打印当前主题配置 */ |
||||
|
public static void printCurrentTheme() { |
||||
|
System.out.println("===== UI 主题配置 ====="); |
||||
|
System.out.println("当前模式: " + (THEME_MODE ? "暗色主题" : "亮色主题")); |
||||
|
System.out.println("背景色: " + getBackgroundColor()); |
||||
|
System.out.println("前景色: " + getForegroundColor()); |
||||
|
System.out.println("按钮色: " + getButtonColor()); |
||||
|
System.out.println("======================"); |
||||
|
} |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
printCurrentTheme(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
|
||||
|
/* |
||||
|
* ============================================================================= |
||||
|
* 使用说明 |
||||
|
* ============================================================================= |
||||
|
* |
||||
|
* 1. 找到 UIConstants.java 文件 |
||||
|
* 路径:com.example.datacollect.util.UIConstants |
||||
|
* |
||||
|
* 2. 修改主题开关(只需改这一行): |
||||
|
* public static final boolean THEME_MODE = true; // true = 暗色, false = 亮色
|
||||
|
* |
||||
|
* 3. 所有 UI 颜色会自动切换: |
||||
|
* - 亮色模式: 白色背景 + 黑色文字 + 蓝色按钮 |
||||
|
* - 暗色模式: 深灰背景 + 浅灰文字 + 亮蓝按钮 |
||||
|
* |
||||
|
* 4. 在其他类中使用: |
||||
|
* String bgColor = UIConstants.getBackgroundColor(); |
||||
|
* String fgColor = UIConstants.getForegroundColor(); |
||||
|
* |
||||
|
* ============================================================================= |
||||
|
*/ |
||||
@ -0,0 +1,134 @@ |
|||||
|
package com.example.datacollect.util; |
||||
|
|
||||
|
import java.net.URI; |
||||
|
import java.net.URISyntaxException; |
||||
|
import java.util.regex.Pattern; |
||||
|
|
||||
|
/** |
||||
|
* URL 格式验证工具类 |
||||
|
* 用于判断输入的字符串是否是合法的 HTTP/HTTPS URL |
||||
|
*/ |
||||
|
public class UrlValidator { |
||||
|
|
||||
|
private static final String HTTP_PROTOCOL = "http"; |
||||
|
private static final String HTTPS_PROTOCOL = "https"; |
||||
|
|
||||
|
private static final Pattern DOMAIN_PATTERN = Pattern.compile( |
||||
|
"^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?(\\.[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)*$" |
||||
|
); |
||||
|
|
||||
|
/** |
||||
|
* 验证 URL 是否合法 |
||||
|
* @param url 要验证的 URL 字符串 |
||||
|
* @return 是否为合法的 HTTP/HTTPS URL |
||||
|
*/ |
||||
|
public static boolean isValidUrl(String url) { |
||||
|
if (url == null || url.trim().isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
URI uri = new URI(url); |
||||
|
|
||||
|
String scheme = uri.getScheme(); |
||||
|
if (scheme == null) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
String schemeLower = scheme.toLowerCase(); |
||||
|
if (!HTTP_PROTOCOL.equals(schemeLower) && !HTTPS_PROTOCOL.equals(schemeLower)) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
String host = uri.getHost(); |
||||
|
if (host == null || host.isEmpty()) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
if (!DOMAIN_PATTERN.matcher(host).matches()) { |
||||
|
return false; |
||||
|
} |
||||
|
|
||||
|
return true; |
||||
|
|
||||
|
} catch (URISyntaxException e) { |
||||
|
return false; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 验证 URL 并返回详细信息 |
||||
|
* @param url 要验证的 URL 字符串 |
||||
|
* @return 验证结果描述 |
||||
|
*/ |
||||
|
public static String validateWithMessage(String url) { |
||||
|
if (url == null || url.trim().isEmpty()) { |
||||
|
return "无效:URL 不能为空"; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
URI uri = new URI(url); |
||||
|
|
||||
|
String scheme = uri.getScheme(); |
||||
|
if (scheme == null) { |
||||
|
return "无效:缺少协议 scheme(如 http://)"; |
||||
|
} |
||||
|
|
||||
|
String schemeLower = scheme.toLowerCase(); |
||||
|
if (!HTTP_PROTOCOL.equals(schemeLower) && !HTTPS_PROTOCOL.equals(schemeLower)) { |
||||
|
return "无效:协议必须是 http 或 https,当前为:" + scheme; |
||||
|
} |
||||
|
|
||||
|
String host = uri.getHost(); |
||||
|
if (host == null || host.isEmpty()) { |
||||
|
return "无效:缺少主机名"; |
||||
|
} |
||||
|
|
||||
|
return "有效 URL:" + url; |
||||
|
|
||||
|
} catch (URISyntaxException e) { |
||||
|
return "无效:URL 格式错误 - " + e.getMessage(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 获取 URL 的协议 |
||||
|
* @param url URL 字符串 |
||||
|
* @return 协议名称,如果无效返回 null |
||||
|
*/ |
||||
|
public static String getProtocol(String url) { |
||||
|
try { |
||||
|
URI uri = new URI(url); |
||||
|
return uri.getScheme(); |
||||
|
} catch (URISyntaxException e) { |
||||
|
return null; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
/** |
||||
|
* 测试主方法 |
||||
|
*/ |
||||
|
public static void main(String[] args) { |
||||
|
String[] testUrls = { |
||||
|
"https://www.example.com", |
||||
|
"http://localhost:8080/api", |
||||
|
"https://github.com/user/repo", |
||||
|
"ftp://ftp.example.com/file", |
||||
|
"htp://invalid.protocol", |
||||
|
"not a url", |
||||
|
"", |
||||
|
"https://192.168.1.1:8080", |
||||
|
"https://sub.domain.example.com/path/to/page", |
||||
|
"javascript:alert(1)" |
||||
|
}; |
||||
|
|
||||
|
System.out.println("===== URL 格式验证测试 =====\n"); |
||||
|
|
||||
|
for (String url : testUrls) { |
||||
|
String result = validateWithMessage(url); |
||||
|
System.out.println("测试: " + (url.isEmpty() ? "(空字符串)" : url)); |
||||
|
System.out.println("结果: " + result); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,4 @@ |
|||||
|
*.jar |
||||
|
*.jar |
||||
|
*.class |
||||
|
*.log |
||||
@ -0,0 +1,17 @@ |
|||||
|
# DataCollect 教学项目 — 最小可运行版本 |
||||
|
|
||||
|
这是一个最小可用的 Java CLI 演示工程,目标:打印帮助信息以验证运行环境。 |
||||
|
|
||||
|
构建: |
||||
|
```bash |
||||
|
mvn -q package |
||||
|
``` |
||||
|
|
||||
|
运行(示例): |
||||
|
```bash |
||||
|
java -jar target/datacollect-cli-0.1.0-jar-with-dependencies.jar --help |
||||
|
``` |
||||
|
|
||||
|
项目结构(最小): |
||||
|
- `src/main/java/com/example/datacollect/Main.java` — CLI 入口,打印帮助 |
||||
|
- `pom.xml` — Maven 构建配置,生成可执行 jar |
||||
@ -0,0 +1,45 @@ |
|||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<groupId>com.example</groupId> |
||||
|
<artifactId>datacollect-cli</artifactId> |
||||
|
<version>0.1.0</version> |
||||
|
<properties> |
||||
|
<maven.compiler.source>11</maven.compiler.source> |
||||
|
<maven.compiler.target>11</maven.compiler.target> |
||||
|
</properties> |
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<version>3.8.1</version> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-assembly-plugin</artifactId> |
||||
|
<version>3.3.0</version> |
||||
|
<configuration> |
||||
|
<archive> |
||||
|
<manifest> |
||||
|
<mainClass>com.example.datacollect.Main</mainClass> |
||||
|
</manifest> |
||||
|
</archive> |
||||
|
<descriptorRefs> |
||||
|
<descriptorRef>jar-with-dependencies</descriptorRef> |
||||
|
</descriptorRefs> |
||||
|
</configuration> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>make-assembly</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>single</goal> |
||||
|
</goals> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
</project> |
||||
@ -0,0 +1,21 @@ |
|||||
|
package com.example.datacollect; |
||||
|
|
||||
|
import com.example.datacollect.controller.CrawlerController; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class Main { |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
CrawlerController controller = new CrawlerController(view, articles); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler (w9_1)! Type help for commands."); |
||||
|
while (true) { |
||||
|
controller.handle(view.readLine()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,69 @@ |
|||||
|
package com.example.datacollect; |
||||
|
|
||||
|
/** |
||||
|
* ============================================================================= |
||||
|
* List<Article> 共享引用风险小结 |
||||
|
* ============================================================================= |
||||
|
* |
||||
|
* 【问题描述】 |
||||
|
* |
||||
|
* 当多个组件或线程共享同一个 List<Article> 引用时,会带来以下风险: |
||||
|
* |
||||
|
* 1. 数据被意外修改 |
||||
|
* - 多个模块共享同一列表引用,一个模块的修改会影响其他模块 |
||||
|
* - 例如:ListCommand 删除了文章,CrawlCommand 随后遍历时数据已变化 |
||||
|
* - 导致数据不一致、遍历异常(ConcurrentModificationException) |
||||
|
* |
||||
|
* 2. 线程安全问题 |
||||
|
* - 当前 CrawlerController 直接持有 List<Article> |
||||
|
* - ArrayList 不是线程安全的,高并发下会出现问题 |
||||
|
* - 可能导致数据丢失、索引越界、数据损坏 |
||||
|
* |
||||
|
* 3. 作用域混淆 |
||||
|
* - 难以追踪数据在何时、何处被修改 |
||||
|
* - 调试困难,问题难以复现 |
||||
|
* - 代码可读性和可维护性降低 |
||||
|
* |
||||
|
* |
||||
|
* 【当前项目问题定位】 |
||||
|
* |
||||
|
* 位置:CrawlerController.java 第17行 |
||||
|
* 代码:private final List<Article> articles; |
||||
|
* |
||||
|
* 风险:articles 被多个 Command 直接操作,违反单一职责原则 |
||||
|
* |
||||
|
* |
||||
|
* 【解决方案】 |
||||
|
* |
||||
|
* 1. 防御性复制 |
||||
|
* // 传入时复制
|
||||
|
* public void processArticles(List<Article> input) { |
||||
|
* List<Article> safeCopy = new ArrayList<>(input); |
||||
|
* } |
||||
|
* |
||||
|
* // 传出时复制
|
||||
|
* public List<Article> getArticles() { |
||||
|
* return new ArrayList<>(internalList); |
||||
|
* } |
||||
|
* |
||||
|
* 2. 不可变列表 |
||||
|
* List<Article> unmodifiable = Collections.unmodifiableList(articles); |
||||
|
* |
||||
|
* 3. 线程安全列表 |
||||
|
* private final List<Article> articles = new CopyOnWriteArrayList<>(); |
||||
|
* |
||||
|
* 4. 引入 Service 层 |
||||
|
* - 单一组件负责 List 的管理 |
||||
|
* - 其他组件通过 Service 接口访问 |
||||
|
* - 遵循单一职责原则 |
||||
|
* |
||||
|
* |
||||
|
* 【总结】 |
||||
|
* |
||||
|
* 共享可变数据结构是许多复杂 bug 的根源。当前项目中, |
||||
|
* CrawlerController 直接持有 List<Article> 并传递给所有 Command, |
||||
|
* 这种设计存在数据被意外修改和线程安全风险。建议引入 Service 层 |
||||
|
* 统一管理数据访问,使用防御性复制或线程安全列表提高代码健壮性。 |
||||
|
* |
||||
|
* ============================================================================= |
||||
|
*/ |
||||
@ -0,0 +1,9 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, List<Article> articles); |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class CrawlCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public CrawlCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "crawl"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
return; |
||||
|
} |
||||
|
view.printInfo("Stub: would crawl " + args[1]); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,24 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ExitCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ExitCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printSuccess("Bye!"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,23 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class HelpCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public HelpCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.printInfo("Commands: crawl <url>, list, help, exit"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,23 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class ListCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ListCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "list"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article> articles) { |
||||
|
view.display(articles); |
||||
|
} |
||||
|
} |
||||
Some files were not shown because too many files changed in this diff
Loading…
Reference in new issue