Browse Source

feat(w9):W9-202506010204-孟鑫垚

main
Mengxinyao 2 weeks ago
parent
commit
f0e2453006
  1. BIN
      w9/.DS_Store
  2. 7
      w9/command/Command.java
  3. 41
      w9/command/CrawlCommand.java
  4. 31
      w9/command/ExitCommand.java
  5. 26
      w9/command/HelpCommand.java
  6. 26
      w9/command/ListCommand.java
  7. 26
      w9/command/LoadCommand.java
  8. 26
      w9/command/SaveCommand.java
  9. 73
      w9/controller/CrawlerController.java
  10. 104
      w9/model/Article.java
  11. 101
      w9/view/ConsoleView.java

BIN
w9/.DS_Store

Binary file not shown.

7
w9/command/Command.java

@ -0,0 +1,7 @@
package com.crawler.command;
public interface Command {
void execute(String[] args) throws Exception;
String getCommandName();
String getDescription();
}

41
w9/command/CrawlCommand.java

@ -0,0 +1,41 @@
package com.crawler.command;
import com.crawler.controller.CrawlerController;
import com.crawler.view.ConsoleView;
public class CrawlCommand implements Command {
private final CrawlerController controller;
private final ConsoleView view;
public CrawlCommand(CrawlerController controller, ConsoleView view) {
this.controller = controller;
this.view = view;
}
@Override
public void execute(String[] args) {
if (args.length < 1) {
view.displayError("Usage: crawl <url> [strategy]");
return;
}
String url = args[0];
String strategy = args.length > 1 ? args[1] : "jsoup";
try {
controller.crawl(url, strategy);
} catch (Exception e) {
view.displayError("Crawl failed: " + e.getMessage());
}
}
@Override
public String getCommandName() {
return "crawl";
}
@Override
public String getDescription() {
return "Crawl a website";
}
}

31
w9/command/ExitCommand.java

@ -0,0 +1,31 @@
package com.crawler.command;
import com.crawler.view.ConsoleView;
public class ExitCommand implements Command {
private final ConsoleView view;
private Runnable exitCallback;
public ExitCommand(ConsoleView view, Runnable exitCallback) {
this.view = view;
this.exitCallback = exitCallback;
}
@Override
public void execute(String[] args) {
view.displayGoodbye();
if (exitCallback != null) {
exitCallback.run();
}
}
@Override
public String getCommandName() {
return "exit";
}
@Override
public String getDescription() {
return "Exit the application";
}
}

26
w9/command/HelpCommand.java

@ -0,0 +1,26 @@
package com.crawler.command;
import com.crawler.view.ConsoleView;
public class HelpCommand implements Command {
private final ConsoleView view;
public HelpCommand(ConsoleView view) {
this.view = view;
}
@Override
public void execute(String[] args) {
view.displayHelp();
}
@Override
public String getCommandName() {
return "help";
}
@Override
public String getDescription() {
return "Show help message";
}
}

26
w9/command/ListCommand.java

@ -0,0 +1,26 @@
package com.crawler.command;
import com.crawler.controller.CrawlerController;
public class ListCommand implements Command {
private final CrawlerController controller;
public ListCommand(CrawlerController controller) {
this.controller = controller;
}
@Override
public void execute(String[] args) {
controller.listArticles();
}
@Override
public String getCommandName() {
return "list";
}
@Override
public String getDescription() {
return "List all crawled articles";
}
}

26
w9/command/LoadCommand.java

@ -0,0 +1,26 @@
package com.crawler.command;
import com.crawler.controller.CrawlerController;
public class LoadCommand implements Command {
private final CrawlerController controller;
public LoadCommand(CrawlerController controller) {
this.controller = controller;
}
@Override
public void execute(String[] args) {
controller.loadData();
}
@Override
public String getCommandName() {
return "load";
}
@Override
public String getDescription() {
return "Load articles from data file";
}
}

26
w9/command/SaveCommand.java

@ -0,0 +1,26 @@
package com.crawler.command;
import com.crawler.controller.CrawlerController;
public class SaveCommand implements Command {
private final CrawlerController controller;
public SaveCommand(CrawlerController controller) {
this.controller = controller;
}
@Override
public void execute(String[] args) {
controller.saveData();
}
@Override
public String getCommandName() {
return "save";
}
@Override
public String getDescription() {
return "Save articles to data file";
}
}

73
w9/controller/CrawlerController.java

@ -0,0 +1,73 @@
package com.crawler.controller;
import java.util.List;
import com.crawler.factory.StrategyFactory;
import com.crawler.model.Article;
import com.crawler.repository.ArticleRepository;
import com.crawler.strategy.CrawlStrategy;
import com.crawler.util.DataPersistence;
import com.crawler.view.ConsoleView;
public class CrawlerController {
private final ArticleRepository repository;
private final ConsoleView view;
public CrawlerController(ArticleRepository repository, ConsoleView view) {
this.repository = repository;
this.view = view;
loadSavedData();
}
private void loadSavedData() {
List<Article> savedArticles = DataPersistence.loadArticles();
if (!savedArticles.isEmpty()) {
repository.saveAll(savedArticles);
view.displayInfo("Loaded " + savedArticles.size() + " saved articles");
}
}
public void crawl(String url, String strategyName) throws Exception {
if (url == null || url.trim().isEmpty()) {
throw new IllegalArgumentException("URL cannot be empty");
}
if (!url.startsWith("http://") && !url.startsWith("https://")) {
url = "https://" + url;
}
if (repository.existsByUrl(url)) {
view.displayWarning("URL already crawled: " + url);
return;
}
view.displayInfo("Crawling: " + url);
view.displayInfo("Using strategy: " + strategyName);
CrawlStrategy strategy = StrategyFactory.getStrategy(strategyName);
List<Article> articles = strategy.crawl(url);
for (Article article : articles) {
repository.save(article);
view.displaySuccess("Crawled: " + article.getTitle());
}
saveData();
}
public void listArticles() {
List<Article> articles = repository.findAll();
view.displayArticleList(articles);
}
public void saveData() {
List<Article> articles = repository.findAll();
DataPersistence.saveArticles(articles);
}
public void loadData() {
repository.deleteAll();
List<Article> savedArticles = DataPersistence.loadArticles();
repository.saveAll(savedArticles);
}
}

104
w9/model/Article.java

@ -0,0 +1,104 @@
package com.crawler.model;
import java.io.Serializable;
import java.time.LocalDateTime;
public class Article implements Serializable {
private static final long serialVersionUID = 1L;
private String id;
private String title;
private String url;
private String content;
private String author;
private LocalDateTime publishDate;
private LocalDateTime crawlDate;
private String source;
public Article() {
this.crawlDate = LocalDateTime.now();
}
public Article(String title, String url, String content) {
this.title = title;
this.url = url;
this.content = content;
this.crawlDate = LocalDateTime.now();
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public LocalDateTime getPublishDate() {
return publishDate;
}
public void setPublishDate(LocalDateTime publishDate) {
this.publishDate = publishDate;
}
public LocalDateTime getCrawlDate() {
return crawlDate;
}
public void setCrawlDate(LocalDateTime crawlDate) {
this.crawlDate = crawlDate;
}
public String getSource() {
return source;
}
public void setSource(String source) {
this.source = source;
}
@Override
public String toString() {
return "Article{" +
"id='" + id + '\'' +
", title='" + title + '\'' +
", url='" + url + '\'' +
", author='" + author + '\'' +
", publishDate=" + publishDate +
", crawlDate=" + crawlDate +
", source='" + source + '\'' +
'}';
}
}

101
w9/view/ConsoleView.java

@ -0,0 +1,101 @@
package com.crawler.view;
import java.time.format.DateTimeFormatter;
import java.util.List;
import java.util.Scanner;
import com.crawler.model.Article;
import com.crawler.util.ColorUtil;
public class ConsoleView {
private static final Scanner scanner = new Scanner(System.in);
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss");
public void displayWelcome() {
System.out.println(ColorUtil.cyan("========================================"));
System.out.println(ColorUtil.cyan(" Welcome to My Crawler "));
System.out.println(ColorUtil.cyan("========================================"));
System.out.println();
}
public void displayHelp() {
System.out.println(ColorUtil.yellow("Available commands:"));
System.out.println(ColorUtil.green(" crawl <url> [strategy] - Crawl a website"));
System.out.println(ColorUtil.green(" list - List all crawled articles"));
System.out.println(ColorUtil.green(" save - Save articles to data file"));
System.out.println(ColorUtil.green(" load - Load articles from data file"));
System.out.println(ColorUtil.green(" help - Show this help message"));
System.out.println(ColorUtil.green(" exit - Exit the application"));
System.out.println();
System.out.println(ColorUtil.yellow("Available strategies:"));
System.out.println(ColorUtil.cyan(" blog - Blog crawling strategy"));
System.out.println(ColorUtil.cyan(" news - News crawling strategy"));
System.out.println(ColorUtil.cyan(" jsoup - Generic JSoup strategy (default)"));
System.out.println();
}
public void displayArticleList(List<Article> articles) {
if (articles.isEmpty()) {
System.out.println(ColorUtil.yellow("No articles found."));
return;
}
System.out.println(ColorUtil.cyan("=== Crawled Articles (" + articles.size() + ") ==="));
System.out.println();
for (int i = 0; i < articles.size(); i++) {
displayArticleDetail(articles.get(i), i + 1);
}
}
public void displayArticleDetail(Article article, int index) {
System.out.println(ColorUtil.bold(ColorUtil.green("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")));
System.out.println(ColorUtil.bold(ColorUtil.yellow("[" + index + "] " + article.getTitle())));
System.out.println(ColorUtil.bold(ColorUtil.green("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━")));
System.out.println(ColorUtil.cyan(" ID: ") + article.getId());
System.out.println(ColorUtil.cyan(" URL: ") + article.getUrl());
if (article.getAuthor() != null) {
System.out.println(ColorUtil.cyan(" Author: ") + article.getAuthor());
}
if (article.getSource() != null) {
System.out.println(ColorUtil.cyan(" Source: ") + article.getSource());
}
if (article.getPublishDate() != null) {
System.out.println(ColorUtil.cyan(" Published: ") + article.getPublishDate().format(DATE_FORMATTER));
}
System.out.println(ColorUtil.cyan(" Crawled: ") + article.getCrawlDate().format(DATE_FORMATTER));
System.out.println(ColorUtil.cyan(" Content: "));
if (article.getContent() != null) {
String[] lines = article.getContent().split("(?<=\\G.{80})");
for (String line : lines) {
System.out.println(" " + line);
}
}
System.out.println();
}
public void displaySuccess(String message) {
System.out.println(ColorUtil.green("✓ " + message));
}
public void displayError(String message) {
System.out.println(ColorUtil.red("✗ " + message));
}
public void displayInfo(String message) {
System.out.println(ColorUtil.blue("ℹ " + message));
}
public void displayWarning(String message) {
System.out.println(ColorUtil.yellow("⚠ " + message));
}
public String readInput() {
System.out.print(ColorUtil.purple("> "));
return scanner.nextLine().trim();
}
public void displayGoodbye() {
System.out.println(ColorUtil.cyan("Goodbye! Thank you for using My Crawler."));
}
}
Loading…
Cancel
Save