diff --git a/w9/.DS_Store b/w9/.DS_Store new file mode 100644 index 0000000..7fbd0b3 Binary files /dev/null and b/w9/.DS_Store differ diff --git a/w9/command/Command.java b/w9/command/Command.java new file mode 100644 index 0000000..17d3c1d --- /dev/null +++ b/w9/command/Command.java @@ -0,0 +1,7 @@ +package com.crawler.command; + +public interface Command { + void execute(String[] args) throws Exception; + String getCommandName(); + String getDescription(); +} diff --git a/w9/command/CrawlCommand.java b/w9/command/CrawlCommand.java new file mode 100644 index 0000000..24b1739 --- /dev/null +++ b/w9/command/CrawlCommand.java @@ -0,0 +1,41 @@ +package com.crawler.command; + +import com.crawler.controller.CrawlerController; +import com.crawler.view.ConsoleView; + +public class CrawlCommand implements Command { + private final CrawlerController controller; + private final ConsoleView view; + + public CrawlCommand(CrawlerController controller, ConsoleView view) { + this.controller = controller; + this.view = view; + } + + @Override + public void execute(String[] args) { + if (args.length < 1) { + view.displayError("Usage: crawl [strategy]"); + return; + } + + String url = args[0]; + String strategy = args.length > 1 ? args[1] : "jsoup"; + + try { + controller.crawl(url, strategy); + } catch (Exception e) { + view.displayError("Crawl failed: " + e.getMessage()); + } + } + + @Override + public String getCommandName() { + return "crawl"; + } + + @Override + public String getDescription() { + return "Crawl a website"; + } +} diff --git a/w9/command/ExitCommand.java b/w9/command/ExitCommand.java new file mode 100644 index 0000000..760c7af --- /dev/null +++ b/w9/command/ExitCommand.java @@ -0,0 +1,31 @@ +package com.crawler.command; + +import com.crawler.view.ConsoleView; + +public class ExitCommand implements Command { + private final ConsoleView view; + private Runnable exitCallback; + + public ExitCommand(ConsoleView view, Runnable exitCallback) { + this.view = view; + this.exitCallback = exitCallback; + } + + @Override + public void execute(String[] args) { + view.displayGoodbye(); + if (exitCallback != null) { + exitCallback.run(); + } + } + + @Override + public String getCommandName() { + return "exit"; + } + + @Override + public String getDescription() { + return "Exit the application"; + } +} diff --git a/w9/command/HelpCommand.java b/w9/command/HelpCommand.java new file mode 100644 index 0000000..9e624d6 --- /dev/null +++ b/w9/command/HelpCommand.java @@ -0,0 +1,26 @@ +package com.crawler.command; + +import com.crawler.view.ConsoleView; + +public class HelpCommand implements Command { + private final ConsoleView view; + + public HelpCommand(ConsoleView view) { + this.view = view; + } + + @Override + public void execute(String[] args) { + view.displayHelp(); + } + + @Override + public String getCommandName() { + return "help"; + } + + @Override + public String getDescription() { + return "Show help message"; + } +} diff --git a/w9/command/ListCommand.java b/w9/command/ListCommand.java new file mode 100644 index 0000000..5f26db5 --- /dev/null +++ b/w9/command/ListCommand.java @@ -0,0 +1,26 @@ +package com.crawler.command; + +import com.crawler.controller.CrawlerController; + +public class ListCommand implements Command { + private final CrawlerController controller; + + public ListCommand(CrawlerController controller) { + this.controller = controller; + } + + @Override + public void execute(String[] args) { + controller.listArticles(); + } + + @Override + public String getCommandName() { + return "list"; + } + + @Override + public String getDescription() { + return "List all crawled articles"; + } +} diff --git a/w9/command/LoadCommand.java b/w9/command/LoadCommand.java new file mode 100644 index 0000000..d90160c --- /dev/null +++ b/w9/command/LoadCommand.java @@ -0,0 +1,26 @@ +package com.crawler.command; + +import com.crawler.controller.CrawlerController; + +public class LoadCommand implements Command { + private final CrawlerController controller; + + public LoadCommand(CrawlerController controller) { + this.controller = controller; + } + + @Override + public void execute(String[] args) { + controller.loadData(); + } + + @Override + public String getCommandName() { + return "load"; + } + + @Override + public String getDescription() { + return "Load articles from data file"; + } +} diff --git a/w9/command/SaveCommand.java b/w9/command/SaveCommand.java new file mode 100644 index 0000000..6f14760 --- /dev/null +++ b/w9/command/SaveCommand.java @@ -0,0 +1,26 @@ +package com.crawler.command; + +import com.crawler.controller.CrawlerController; + +public class SaveCommand implements Command { + private final CrawlerController controller; + + public SaveCommand(CrawlerController controller) { + this.controller = controller; + } + + @Override + public void execute(String[] args) { + controller.saveData(); + } + + @Override + public String getCommandName() { + return "save"; + } + + @Override + public String getDescription() { + return "Save articles to data file"; + } +} diff --git a/w9/controller/CrawlerController.java b/w9/controller/CrawlerController.java new file mode 100644 index 0000000..52aa817 --- /dev/null +++ b/w9/controller/CrawlerController.java @@ -0,0 +1,73 @@ +package com.crawler.controller; + +import java.util.List; + +import com.crawler.factory.StrategyFactory; +import com.crawler.model.Article; +import com.crawler.repository.ArticleRepository; +import com.crawler.strategy.CrawlStrategy; +import com.crawler.util.DataPersistence; +import com.crawler.view.ConsoleView; + +public class CrawlerController { + private final ArticleRepository repository; + private final ConsoleView view; + + public CrawlerController(ArticleRepository repository, ConsoleView view) { + this.repository = repository; + this.view = view; + loadSavedData(); + } + + private void loadSavedData() { + List
savedArticles = DataPersistence.loadArticles(); + if (!savedArticles.isEmpty()) { + repository.saveAll(savedArticles); + view.displayInfo("Loaded " + savedArticles.size() + " saved articles"); + } + } + + public void crawl(String url, String strategyName) throws Exception { + if (url == null || url.trim().isEmpty()) { + throw new IllegalArgumentException("URL cannot be empty"); + } + + if (!url.startsWith("http://") && !url.startsWith("https://")) { + url = "https://" + url; + } + + if (repository.existsByUrl(url)) { + view.displayWarning("URL already crawled: " + url); + return; + } + + view.displayInfo("Crawling: " + url); + view.displayInfo("Using strategy: " + strategyName); + + CrawlStrategy strategy = StrategyFactory.getStrategy(strategyName); + List
articles = strategy.crawl(url); + + for (Article article : articles) { + repository.save(article); + view.displaySuccess("Crawled: " + article.getTitle()); + } + + saveData(); + } + + public void listArticles() { + List
articles = repository.findAll(); + view.displayArticleList(articles); + } + + public void saveData() { + List
articles = repository.findAll(); + DataPersistence.saveArticles(articles); + } + + public void loadData() { + repository.deleteAll(); + List
savedArticles = DataPersistence.loadArticles(); + repository.saveAll(savedArticles); + } +} diff --git a/w9/model/Article.java b/w9/model/Article.java new file mode 100644 index 0000000..cb5599e --- /dev/null +++ b/w9/model/Article.java @@ -0,0 +1,104 @@ +package com.crawler.model; + +import java.io.Serializable; +import java.time.LocalDateTime; + +public class Article implements Serializable { + private static final long serialVersionUID = 1L; + private String id; + private String title; + private String url; + private String content; + private String author; + private LocalDateTime publishDate; + private LocalDateTime crawlDate; + private String source; + + public Article() { + this.crawlDate = LocalDateTime.now(); + } + + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + this.crawlDate = LocalDateTime.now(); + } + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public String getAuthor() { + return author; + } + + public void setAuthor(String author) { + this.author = author; + } + + public LocalDateTime getPublishDate() { + return publishDate; + } + + public void setPublishDate(LocalDateTime publishDate) { + this.publishDate = publishDate; + } + + public LocalDateTime getCrawlDate() { + return crawlDate; + } + + public void setCrawlDate(LocalDateTime crawlDate) { + this.crawlDate = crawlDate; + } + + public String getSource() { + return source; + } + + public void setSource(String source) { + this.source = source; + } + + @Override + public String toString() { + return "Article{" + + "id='" + id + '\'' + + ", title='" + title + '\'' + + ", url='" + url + '\'' + + ", author='" + author + '\'' + + ", publishDate=" + publishDate + + ", crawlDate=" + crawlDate + + ", source='" + source + '\'' + + '}'; + } +} diff --git a/w9/view/ConsoleView.java b/w9/view/ConsoleView.java new file mode 100644 index 0000000..71d86e9 --- /dev/null +++ b/w9/view/ConsoleView.java @@ -0,0 +1,101 @@ +package com.crawler.view; + +import java.time.format.DateTimeFormatter; +import java.util.List; +import java.util.Scanner; + +import com.crawler.model.Article; +import com.crawler.util.ColorUtil; + +public class ConsoleView { + private static final Scanner scanner = new Scanner(System.in); + private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); + + public void displayWelcome() { + System.out.println(ColorUtil.cyan("========================================")); + System.out.println(ColorUtil.cyan(" Welcome to My Crawler ")); + System.out.println(ColorUtil.cyan("========================================")); + System.out.println(); + } + + public void displayHelp() { + System.out.println(ColorUtil.yellow("Available commands:")); + System.out.println(ColorUtil.green(" crawl [strategy] - Crawl a website")); + System.out.println(ColorUtil.green(" list - List all crawled articles")); + System.out.println(ColorUtil.green(" save - Save articles to data file")); + System.out.println(ColorUtil.green(" load - Load articles from data file")); + System.out.println(ColorUtil.green(" help - Show this help message")); + System.out.println(ColorUtil.green(" exit - Exit the application")); + System.out.println(); + System.out.println(ColorUtil.yellow("Available strategies:")); + System.out.println(ColorUtil.cyan(" blog - Blog crawling strategy")); + System.out.println(ColorUtil.cyan(" news - News crawling strategy")); + System.out.println(ColorUtil.cyan(" jsoup - Generic JSoup strategy (default)")); + System.out.println(); + } + + public void displayArticleList(List
articles) { + if (articles.isEmpty()) { + System.out.println(ColorUtil.yellow("No articles found.")); + return; + } + + System.out.println(ColorUtil.cyan("=== Crawled Articles (" + articles.size() + ") ===")); + System.out.println(); + + for (int i = 0; i < articles.size(); i++) { + displayArticleDetail(articles.get(i), i + 1); + } + } + + public void displayArticleDetail(Article article, int index) { + System.out.println(ColorUtil.bold(ColorUtil.green("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"))); + System.out.println(ColorUtil.bold(ColorUtil.yellow("[" + index + "] " + article.getTitle()))); + System.out.println(ColorUtil.bold(ColorUtil.green("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"))); + System.out.println(ColorUtil.cyan(" ID: ") + article.getId()); + System.out.println(ColorUtil.cyan(" URL: ") + article.getUrl()); + if (article.getAuthor() != null) { + System.out.println(ColorUtil.cyan(" Author: ") + article.getAuthor()); + } + if (article.getSource() != null) { + System.out.println(ColorUtil.cyan(" Source: ") + article.getSource()); + } + if (article.getPublishDate() != null) { + System.out.println(ColorUtil.cyan(" Published: ") + article.getPublishDate().format(DATE_FORMATTER)); + } + System.out.println(ColorUtil.cyan(" Crawled: ") + article.getCrawlDate().format(DATE_FORMATTER)); + System.out.println(ColorUtil.cyan(" Content: ")); + if (article.getContent() != null) { + String[] lines = article.getContent().split("(?<=\\G.{80})"); + for (String line : lines) { + System.out.println(" " + line); + } + } + System.out.println(); + } + + public void displaySuccess(String message) { + System.out.println(ColorUtil.green("✓ " + message)); + } + + public void displayError(String message) { + System.out.println(ColorUtil.red("✗ " + message)); + } + + public void displayInfo(String message) { + System.out.println(ColorUtil.blue("ℹ " + message)); + } + + public void displayWarning(String message) { + System.out.println(ColorUtil.yellow("⚠ " + message)); + } + + public String readInput() { + System.out.print(ColorUtil.purple("> ")); + return scanner.nextLine().trim(); + } + + public void displayGoodbye() { + System.out.println(ColorUtil.cyan("Goodbye! Thank you for using My Crawler.")); + } +}