diff --git a/w10/Main.java b/w10/Main.java deleted file mode 100644 index 0f98466..0000000 --- a/w10/Main.java +++ /dev/null @@ -1,21 +0,0 @@ -package com.example.datacollect; - -import com.example.datacollect.controller.CrawlerController; -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.strategy.StrategyFactory; -import com.example.datacollect.view.ConsoleView; - -public class Main { - - public static void main(String[] args) { - ConsoleView view = new ConsoleView(); - ArticleRepository repository = new ArticleRepository(); - StrategyFactory strategyFactory = new StrategyFactory(); - CrawlerController controller = new CrawlerController(view, repository, strategyFactory); - - view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands."); - while (true) { - controller.handle(view.readLine()); - } - } -} \ No newline at end of file diff --git a/w10/README.md b/w10/README.md deleted file mode 100644 index 0c8bb27..0000000 --- a/w10/README.md +++ /dev/null @@ -1,144 +0,0 @@ -# W10 作业提交:设计模式实战 - -## 目录结构 - -``` -w10/ -└── src/ - └── main/ - └── java/ - └── com/ - └── example/ - └── datacollect/ - ├── Main.java - ├── command/ - │ ├── Command.java - │ ├── CrawlCommand.java - │ ├── AnalyzeCommand.java - │ ├── ListCommand.java - │ ├── HelpCommand.java - │ ├── ExitCommand.java - │ └── HistoryCommand.java - ├── controller/ - │ └── CrawlerController.java - ├── model/ - │ └── Article.java - ├── repository/ - │ └── ArticleRepository.java - ├── strategy/ - │ ├── CrawlStrategy.java - │ ├── StrategyFactory.java - │ ├── HnuNewsStrategy.java - │ ├── BlogStrategy.java - │ ├── NewsStrategy.java - │ └── GenericNewsStrategy.java - └── view/ - └── ConsoleView.java -``` - -## 必做任务完成情况 - -### 1. ArticleRepository 完善 ✅ -- `add()`: 拒绝 null,抛出 IllegalArgumentException -- `addAll()`: 拒绝 null 列表和列表中的 null 元素 -- `getAll()`: 返回 `Collections.unmodifiableList()` 不可变视图 -- `size()`: 返回文章数量 -- `clear()`: 清空所有文章 - -### 2. AnalyzeCommand ✅ -- 复用策略解析但**不存储**到 Repository -- 输出统计信息:文章总数、含作者/日期/内容的数量、使用的策略名称 -- 显示前 3 篇文章标题作为预览 - -### 3. AI 架构审计 ✅ - -#### 类签名汇总 - -```java -// Command 层 -interface Command { void execute(String[], ArticleRepository); } -class CrawlCommand(ConsoleView, StrategyFactory) -class AnalyzeCommand(ConsoleView, StrategyFactory) -class ListCommand(ConsoleView) -class HelpCommand(ConsoleView) -class ExitCommand(ConsoleView) -class HistoryCommand(ConsoleView, List) - -// Controller 层 -class CrawlerController(ConsoleView, ArticleRepository, StrategyFactory) - -// Repository 层 -class ArticleRepository { add(), addAll(), getAll(), size(), clear() } - -// Strategy 层 -interface CrawlStrategy { parse(), supports(), getPriority(), getPattern() } -class StrategyFactory { getStrategy(url), register(), setDefaultStrategy() } -class HnuNewsStrategy implements CrawlStrategy -class BlogStrategy implements CrawlStrategy -class NewsStrategy implements CrawlStrategy -class GenericNewsStrategy implements CrawlStrategy (正则匹配) - -// Model 层 -class Article { title, url, content, author, publishDate } - -// View 层 -class ConsoleView -``` - -#### 架构审计结果 - -| 检查项 | 结果 | 说明 | -|--------|------|------| -| **策略解耦** | ✅ 优秀 | 策略接口与实现完全分离 | -| **Repository 封装** | ✅ 优秀 | 使用不可变视图 + null 防御 | -| **开闭原则** | ✅ 达标 | 新增网站只需加策略类 + 注册一行 | -| **依赖倒置** | ✅ 良好 | Command/Strategy 依赖抽象接口 | -| **单一职责** | ✅ 达标 | 每个类职责清晰 | -| **循环依赖** | ✅ 无 | 依赖链单向 | - -## 选做任务完成情况 - -### 正则策略匹配 ✅ -- `GenericNewsStrategy` 使用正则表达式 `.*\.(news|press|article)s?\..*` 匹配新闻类网站 - -### 默认策略 ✅ -- `StrategyFactory` 内置 `DefaultStrategy`,当没有匹配策略时返回空列表 - -### 策略优先级 ✅ -- `CrawlStrategy` 接口新增 `getPriority()` 默认方法 -- `GenericNewsStrategy` 设置优先级为 5(高于默认优先级 1) -- `StrategyFactory.getStrategy()` 遍历所有策略,选择优先级最高的匹配策略 - -### 思考题答案 - -**Q: 两个策略都 supports 同一 URL 时怎么办?** - -**A:** 采用**优先级机制**解决: - -1. 每个策略实现可以通过 `getPriority()` 返回优先级值 -2. `StrategyFactory.getStrategy()` 遍历所有策略时,记录最高优先级 -3. 如果多个策略都支持同一 URL,选择优先级最高的那个 -4. 如果优先级相同,选择最先注册的策略(遍历顺序决定) - -这种设计的优势: -- 允许通用策略(如 `GenericNewsStrategy`)和专用策略(如 `HnuNewsStrategy`)共存 -- 专用策略可设置更高优先级,确保精确匹配优先 -- 通用策略作为兜底,提高系统兼容性 - -## 命令功能对比 - -| 命令 | 功能 | 是否存储 | -|------|------|----------| -| `crawl ` | 爬取并存储文章 | ✅ 是 | -| `analyze ` | 分析文章统计(不存储) | ❌ 否 | -| `list` | 列出已存储文章 | - | -| `history` | 显示命令历史 | - | -| `help` | 显示帮助 | - | -| `exit` | 退出程序 | - | - -## 设计模式应用 - -1. **策略模式**:`CrawlStrategy` 接口定义标准,各策略独立实现 -2. **工厂模式**:`StrategyFactory` 根据 URL 自动选择策略 -3. **Repository 模式**:数据访问封装,防御式编程 -4. **命令模式**:所有 Command 统一签名,易于扩展 \ No newline at end of file diff --git a/w10/command/AnalyzeCommand.java b/w10/command/AnalyzeCommand.java deleted file mode 100644 index 913e4a9..0000000 --- a/w10/command/AnalyzeCommand.java +++ /dev/null @@ -1,87 +0,0 @@ -package com.example.datacollect.command; - -import com.example.datacollect.model.Article; -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.strategy.CrawlStrategy; -import com.example.datacollect.strategy.StrategyFactory; -import com.example.datacollect.view.ConsoleView; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; - -import java.util.List; - -public class AnalyzeCommand implements Command { - private final ConsoleView view; - private final StrategyFactory strategyFactory; - - public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { - this.view = view; - this.strategyFactory = strategyFactory; - } - - @Override - public String getName() { - return "analyze"; - } - - @Override - public void execute(String[] args, ArticleRepository repository) { - if (args.length < 2) { - view.printError("Usage: analyze "); - return; - } - String url = args[1]; - - CrawlStrategy strategy = strategyFactory.getStrategy(url); - if (strategy == null) { - view.printError("No strategy found for: " + url); - return; - } - - try { - view.printInfo("Analyzing: " + url); - Document doc = Jsoup.connect(url).get(); - List
parsed = strategy.parse(url, doc); - - view.printInfo("=== Analysis Report ==="); - view.printInfo("Total articles found: " + parsed.size()); - - int titlesWithAuthor = 0; - int titlesWithDate = 0; - int titlesWithContent = 0; - - for (Article article : parsed) { - if (article.getAuthor() != null && !article.getAuthor().isEmpty()) { - titlesWithAuthor++; - } - if (article.getPublishDate() != null && !article.getPublishDate().isEmpty()) { - titlesWithDate++; - } - if (article.getContent() != null && !article.getContent().isEmpty()) { - titlesWithContent++; - } - } - - view.printInfo("Articles with author: " + titlesWithAuthor); - view.printInfo("Articles with publish date: " + titlesWithDate); - view.printInfo("Articles with content: " + titlesWithContent); - view.printInfo("Strategy used: " + strategy.getClass().getSimpleName()); - - if (parsed.size() > 0) { - view.printInfo("\nSample article titles:"); - int limit = Math.min(3, parsed.size()); - for (int i = 0; i < limit; i++) { - view.printInfo("- " + parsed.get(i).getTitle()); - } - if (parsed.size() > 3) { - view.printInfo("... and " + (parsed.size() - 3) + " more"); - } - } - - view.printSuccess("Analysis completed (not stored)"); - - } catch (Exception e) { - view.printError("Failed to analyze: " + e.getMessage()); - } - } -} \ No newline at end of file diff --git a/w10/command/Command.java b/w10/command/Command.java deleted file mode 100644 index e3e2030..0000000 --- a/w10/command/Command.java +++ /dev/null @@ -1,8 +0,0 @@ -package com.example.datacollect.command; - -import com.example.datacollect.repository.ArticleRepository; - -public interface Command { - String getName(); - void execute(String[] args, ArticleRepository repository); -} \ No newline at end of file diff --git a/w10/command/CrawlCommand.java b/w10/command/CrawlCommand.java deleted file mode 100644 index 6f9e1a8..0000000 --- a/w10/command/CrawlCommand.java +++ /dev/null @@ -1,50 +0,0 @@ -package com.example.datacollect.command; - -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.strategy.CrawlStrategy; -import com.example.datacollect.strategy.StrategyFactory; -import com.example.datacollect.view.ConsoleView; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; - -public class CrawlCommand implements Command { - private final ConsoleView view; - private final StrategyFactory strategyFactory; - - public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) { - this.view = view; - this.strategyFactory = strategyFactory; - } - - @Override - public String getName() { - return "crawl"; - } - - @Override - public void execute(String[] args, ArticleRepository repository) { - if (args.length < 2) { - view.printError("Usage: crawl "); - return; - } - String url = args[1]; - - CrawlStrategy strategy = strategyFactory.getStrategy(url); - if (strategy == null) { - view.printError("No strategy found for: " + url); - return; - } - - try { - view.printInfo("Crawling: " + url); - Document doc = Jsoup.connect(url).get(); - var articles = strategy.parse(url, doc); - for (var article : articles) { - repository.add(article); - } - view.printSuccess("Crawled " + articles.size() + " articles."); - } catch (Exception e) { - view.printError("Failed to crawl: " + e.getMessage()); - } - } -} \ No newline at end of file diff --git a/w10/command/ExitCommand.java b/w10/command/ExitCommand.java deleted file mode 100644 index 15c2f00..0000000 --- a/w10/command/ExitCommand.java +++ /dev/null @@ -1,23 +0,0 @@ -package com.example.datacollect.command; - -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.view.ConsoleView; - -public class ExitCommand implements Command { - private final ConsoleView view; - - public ExitCommand(ConsoleView view) { - this.view = view; - } - - @Override - public String getName() { - return "exit"; - } - - @Override - public void execute(String[] args, ArticleRepository repository) { - view.printSuccess("Bye!"); - System.exit(0); - } -} \ No newline at end of file diff --git a/w10/command/HelpCommand.java b/w10/command/HelpCommand.java deleted file mode 100644 index ec3ff87..0000000 --- a/w10/command/HelpCommand.java +++ /dev/null @@ -1,28 +0,0 @@ -package com.example.datacollect.command; - -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.view.ConsoleView; - -public class HelpCommand implements Command { - private final ConsoleView view; - - public HelpCommand(ConsoleView view) { - this.view = view; - } - - @Override - public String getName() { - return "help"; - } - - @Override - public void execute(String[] args, ArticleRepository repository) { - view.printInfo("Commands:"); - view.printInfo(" crawl - Crawl articles from URL and store"); - view.printInfo(" analyze - Analyze URL without storing"); - view.printInfo(" list - List all stored articles"); - view.printInfo(" history - Show command history"); - view.printInfo(" help - Show this help"); - view.printInfo(" exit - Exit the program"); - } -} \ No newline at end of file diff --git a/w10/command/HistoryCommand.java b/w10/command/HistoryCommand.java deleted file mode 100644 index 7baf7bd..0000000 --- a/w10/command/HistoryCommand.java +++ /dev/null @@ -1,33 +0,0 @@ -package com.example.datacollect.command; - -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.view.ConsoleView; - -import java.util.List; - -public class HistoryCommand implements Command { - private final ConsoleView view; - private final List commandHistory; - - public HistoryCommand(ConsoleView view, List commandHistory) { - this.view = view; - this.commandHistory = commandHistory; - } - - @Override - public String getName() { - return "history"; - } - - @Override - public void execute(String[] args, ArticleRepository repository) { - if (commandHistory.isEmpty()) { - view.printInfo("No command history."); - return; - } - view.printInfo("Command history:"); - for (int i = 0; i < commandHistory.size(); i++) { - view.printInfo((i + 1) + ". " + commandHistory.get(i)); - } - } -} \ No newline at end of file diff --git a/w10/command/ListCommand.java b/w10/command/ListCommand.java deleted file mode 100644 index 29b3fc0..0000000 --- a/w10/command/ListCommand.java +++ /dev/null @@ -1,22 +0,0 @@ -package com.example.datacollect.command; - -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.view.ConsoleView; - -public class ListCommand implements Command { - private final ConsoleView view; - - public ListCommand(ConsoleView view) { - this.view = view; - } - - @Override - public String getName() { - return "list"; - } - - @Override - public void execute(String[] args, ArticleRepository repository) { - view.display(repository.getAll()); - } -} \ No newline at end of file diff --git a/w10/controller/CrawlerController.java b/w10/controller/CrawlerController.java deleted file mode 100644 index e373dd8..0000000 --- a/w10/controller/CrawlerController.java +++ /dev/null @@ -1,56 +0,0 @@ -package com.example.datacollect.controller; - -import com.example.datacollect.command.AnalyzeCommand; -import com.example.datacollect.command.Command; -import com.example.datacollect.command.CrawlCommand; -import com.example.datacollect.command.ExitCommand; -import com.example.datacollect.command.HelpCommand; -import com.example.datacollect.command.HistoryCommand; -import com.example.datacollect.command.ListCommand; -import com.example.datacollect.repository.ArticleRepository; -import com.example.datacollect.strategy.StrategyFactory; -import com.example.datacollect.view.ConsoleView; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; - -public class CrawlerController { - private final Map commands = new HashMap<>(); - private final ConsoleView view; - private final ArticleRepository repository; - private final List commandHistory = new ArrayList<>(); - - public CrawlerController(ConsoleView view, ArticleRepository repository, StrategyFactory strategyFactory) { - this.view = view; - this.repository = repository; - register(new HelpCommand(view)); - register(new ListCommand(view)); - register(new CrawlCommand(view, strategyFactory)); - register(new AnalyzeCommand(view, strategyFactory)); - register(new ExitCommand(view)); - register(new HistoryCommand(view, commandHistory)); - } - - private void register(Command command) { - commands.put(command.getName(), command); - } - - public void handle(String input) { - String text = input == null ? "" : input.trim(); - if (text.isEmpty()) { - return; - } - - commandHistory.add(text); - - String[] args = text.split("\\s+"); - String cmdName = args[0].toLowerCase(); - Command command = commands.get(cmdName); - if (command == null) { - view.printError("Unknown command: " + cmdName); - return; - } - command.execute(args, repository); - } -} \ No newline at end of file diff --git a/w10/model/Article.java b/w10/model/Article.java deleted file mode 100644 index b36034b..0000000 --- a/w10/model/Article.java +++ /dev/null @@ -1,73 +0,0 @@ -package com.example.datacollect.model; - -public class Article { - private String title; - private String url; - private String content; - private String author; - private String publishDate; - - public Article(String title, String url, String content) { - this.title = title; - this.url = url; - this.content = content; - } - - public Article(String title, String url, String content, String author, String publishDate) { - this.title = title; - this.url = url; - this.content = content; - this.author = author; - this.publishDate = publishDate; - } - - public String getTitle() { - return title; - } - - public void setTitle(String title) { - this.title = title; - } - - public String getUrl() { - return url; - } - - public void setUrl(String url) { - this.url = url; - } - - public String getContent() { - return content; - } - - public void setContent(String content) { - this.content = content; - } - - public String getAuthor() { - return author; - } - - public void setAuthor(String author) { - this.author = author; - } - - public String getPublishDate() { - return publishDate; - } - - public void setPublishDate(String publishDate) { - this.publishDate = publishDate; - } - - @Override - public String toString() { - return "Article{" - + "title='" + title + '\'' - + ", url='" + url + '\'' - + ", author='" + author + '\'' - + ", publishDate='" + publishDate + '\'' - + '}'; - } -} \ No newline at end of file diff --git a/w10/repository/ArticleRepository.java b/w10/repository/ArticleRepository.java deleted file mode 100644 index 930221a..0000000 --- a/w10/repository/ArticleRepository.java +++ /dev/null @@ -1,41 +0,0 @@ -package com.example.datacollect.repository; - -import com.example.datacollect.model.Article; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; - -public class ArticleRepository { - private final List
articles = new ArrayList<>(); - - public void add(Article article) { - if (article == null) { - throw new IllegalArgumentException("Article cannot be null"); - } - articles.add(article); - } - - public void addAll(List
newArticles) { - if (newArticles == null) { - throw new IllegalArgumentException("Article list cannot be null"); - } - for (Article article : newArticles) { - if (article == null) { - throw new IllegalArgumentException("Article in list cannot be null"); - } - articles.add(article); - } - } - - public List
getAll() { - return Collections.unmodifiableList(articles); - } - - public int size() { - return articles.size(); - } - - public void clear() { - articles.clear(); - } -} \ No newline at end of file diff --git a/w10/strategy/BlogStrategy.java b/w10/strategy/BlogStrategy.java deleted file mode 100644 index 9ad5e10..0000000 --- a/w10/strategy/BlogStrategy.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.example.datacollect.strategy; - -import com.example.datacollect.model.Article; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import java.util.ArrayList; -import java.util.List; - -public class BlogStrategy implements CrawlStrategy { - @Override - public boolean supports(String url) { - return url.contains("blog.example.com"); - } - - @Override - public List
parse(String url, Document doc) { - List
articles = new ArrayList<>(); - Elements titles = doc.select(".post-title"); - for (Element e : titles) { - articles.add(new Article(e.text(), url, "")); - } - return articles; - } -} \ No newline at end of file diff --git a/w10/strategy/CrawlStrategy.java b/w10/strategy/CrawlStrategy.java deleted file mode 100644 index 0ccdb2e..0000000 --- a/w10/strategy/CrawlStrategy.java +++ /dev/null @@ -1,19 +0,0 @@ -package com.example.datacollect.strategy; - -import com.example.datacollect.model.Article; -import org.jsoup.nodes.Document; -import java.util.List; -import java.util.regex.Pattern; - -public interface CrawlStrategy { - List
parse(String url, Document doc); - boolean supports(String url); - - default int getPriority() { - return 1; - } - - default Pattern getPattern() { - return null; - } -} \ No newline at end of file diff --git a/w10/strategy/GenericNewsStrategy.java b/w10/strategy/GenericNewsStrategy.java deleted file mode 100644 index c893280..0000000 --- a/w10/strategy/GenericNewsStrategy.java +++ /dev/null @@ -1,57 +0,0 @@ -package com.example.datacollect.strategy; - -import com.example.datacollect.model.Article; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; - -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Pattern; - -public class GenericNewsStrategy implements CrawlStrategy { - private static final Pattern PATTERN = Pattern.compile(".*\\.(news|press|article)s?\\..*"); - private static final int PRIORITY = 5; - - @Override - public boolean supports(String url) { - return PATTERN.matcher(url).find(); - } - - @Override - public List
parse(String url, Document doc) { - List
articles = new ArrayList<>(); - - Elements items = doc.select("article, .news-item, .article-item, [class*='news'], [class*='article']"); - - for (Element item : items) { - String title = item.selectFirst("h1, h2, h3, .title, [class*='title']") != null - ? item.selectFirst("h1, h2, h3, .title, [class*='title']").text().trim() - : ""; - - String articleUrl = item.selectFirst("a[href]") != null - ? item.selectFirst("a[href]").attr("abs:href") - : url; - - String content = item.selectFirst("p, .content, [class*='content']") != null - ? item.selectFirst("p, .content, [class*='content']").text().trim() - : ""; - - if (!title.isEmpty()) { - articles.add(new Article(title, articleUrl, content)); - } - } - - return articles; - } - - @Override - public int getPriority() { - return PRIORITY; - } - - @Override - public Pattern getPattern() { - return PATTERN; - } -} \ No newline at end of file diff --git a/w10/strategy/HnuNewsStrategy.java b/w10/strategy/HnuNewsStrategy.java deleted file mode 100644 index 1204e4d..0000000 --- a/w10/strategy/HnuNewsStrategy.java +++ /dev/null @@ -1,49 +0,0 @@ -package com.example.datacollect.strategy; - -import com.example.datacollect.model.Article; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import java.util.ArrayList; -import java.util.List; - -public class HnuNewsStrategy implements CrawlStrategy { - @Override - public boolean supports(String url) { - return url.contains("news.hnu.edu.cn"); - } - - @Override - public List
parse(String url, Document doc) { - List
articles = new ArrayList<>(); - Elements listItems = doc.select("ul.list11 li"); - - for (Element li : listItems) { - Element link = li.selectFirst("a"); - if (link == null) continue; - - String articleUrl = link.attr("href"); - if (!articleUrl.startsWith("http")) { - articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", ""); - } - - String title = ""; - Element titleEl = link.selectFirst("h4.l2.h4s2"); - if (titleEl != null) { - title = titleEl.text().trim(); - } - - String content = ""; - Element contentEl = link.selectFirst("p.l3.ps3"); - if (contentEl != null) { - content = contentEl.text().trim(); - } - - if (!title.isEmpty()) { - articles.add(new Article(title, articleUrl, content)); - } - } - - return articles; - } -} \ No newline at end of file diff --git a/w10/strategy/NewsStrategy.java b/w10/strategy/NewsStrategy.java deleted file mode 100644 index 7bfb888..0000000 --- a/w10/strategy/NewsStrategy.java +++ /dev/null @@ -1,25 +0,0 @@ -package com.example.datacollect.strategy; - -import com.example.datacollect.model.Article; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import java.util.ArrayList; -import java.util.List; - -public class NewsStrategy implements CrawlStrategy { - @Override - public boolean supports(String url) { - return url.contains("news.example.com"); - } - - @Override - public List
parse(String url, Document doc) { - List
articles = new ArrayList<>(); - Elements items = doc.select(".article-headline"); - for (Element e : items) { - articles.add(new Article(e.text(), url, "")); - } - return articles; - } -} \ No newline at end of file diff --git a/w10/strategy/StrategyFactory.java b/w10/strategy/StrategyFactory.java deleted file mode 100644 index e4789f5..0000000 --- a/w10/strategy/StrategyFactory.java +++ /dev/null @@ -1,107 +0,0 @@ -package com.example.datacollect.strategy; - -import java.util.ArrayList; -import java.util.List; -import java.util.regex.Pattern; - -public class StrategyFactory { - private final List strategies = new ArrayList<>(); - private CrawlStrategy defaultStrategy; - - public StrategyFactory() { - strategies.add(new HnuNewsStrategy()); - strategies.add(new BlogStrategy()); - strategies.add(new NewsStrategy()); - strategies.add(new GenericNewsStrategy()); - defaultStrategy = new DefaultStrategy(); - } - - public CrawlStrategy getStrategy(String url) { - CrawlStrategy matched = null; - int highestPriority = Integer.MIN_VALUE; - - for (CrawlStrategy s : strategies) { - boolean supports = false; - - Pattern pattern = s.getPattern(); - if (pattern != null) { - supports = pattern.matcher(url).find(); - } else { - supports = s.supports(url); - } - - if (supports) { - int priority = s.getPriority(); - if (priority > highestPriority) { - highestPriority = priority; - matched = s; - } - } - } - - if (matched != null) { - return matched; - } - - return defaultStrategy; - } - - public void register(CrawlStrategy strategy) { - strategies.add(strategy); - } - - public void register(CrawlStrategy strategy, int priority) { - strategies.add(new PrioritizedStrategy(strategy, priority)); - } - - public void setDefaultStrategy(CrawlStrategy defaultStrategy) { - this.defaultStrategy = defaultStrategy; - } - - private static class PrioritizedStrategy implements CrawlStrategy { - private final CrawlStrategy delegate; - private final int priority; - - public PrioritizedStrategy(CrawlStrategy delegate, int priority) { - this.delegate = delegate; - this.priority = priority; - } - - @Override - public List
parse(String url, Document doc) { - return delegate.parse(url, doc); - } - - @Override - public boolean supports(String url) { - return delegate.supports(url); - } - - @Override - public int getPriority() { - return priority; - } - - @Override - public Pattern getPattern() { - return delegate.getPattern(); - } - } - - private static class DefaultStrategy implements CrawlStrategy { - @Override - public List
parse(String url, Document doc) { - return List.of(); - } - - @Override - public boolean supports(String url) { - return false; - } - - @Override - public int getPriority() { - return Integer.MIN_VALUE; - } - } -} \ No newline at end of file diff --git a/w10/view/ConsoleView.java b/w10/view/ConsoleView.java deleted file mode 100644 index 987b617..0000000 --- a/w10/view/ConsoleView.java +++ /dev/null @@ -1,42 +0,0 @@ -package com.example.datacollect.view; - -import com.example.datacollect.model.Article; -import java.util.List; -import java.util.Scanner; - -public class ConsoleView { - private static final String ANSI_RESET = "\u001B[0m"; - private static final String ANSI_GREEN = "\u001B[32m"; - private static final String ANSI_RED = "\u001B[31m"; - private static final String ANSI_BLUE = "\u001B[34m"; - - private final Scanner scanner = new Scanner(System.in); - - public String readLine() { - System.out.print("> "); - return scanner.nextLine(); - } - - public void printSuccess(String msg) { - System.out.println(ANSI_GREEN + msg + ANSI_RESET); - } - - public void printError(String msg) { - System.out.println(ANSI_RED + msg + ANSI_RESET); - } - - public void printInfo(String msg) { - System.out.println(ANSI_BLUE + msg + ANSI_RESET); - } - - public void display(List
articles) { - if (articles.isEmpty()) { - printInfo("暂无文章,请先执行 crawl。"); - return; - } - for (int i = 0; i < articles.size(); i++) { - Article a = articles.get(i); - System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); - } - } -} \ No newline at end of file