11 changed files with 461 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,7 @@ |
|||
package com.crawler.command; |
|||
|
|||
public interface Command { |
|||
void execute(String[] args) throws Exception; |
|||
String getCommandName(); |
|||
String getDescription(); |
|||
} |
|||
@ -0,0 +1,41 @@ |
|||
package com.crawler.command; |
|||
|
|||
import com.crawler.controller.CrawlerController; |
|||
import com.crawler.view.ConsoleView; |
|||
|
|||
public class CrawlCommand implements Command { |
|||
private final CrawlerController controller; |
|||
private final ConsoleView view; |
|||
|
|||
public CrawlCommand(CrawlerController controller, ConsoleView view) { |
|||
this.controller = controller; |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
if (args.length < 1) { |
|||
view.displayError("Usage: crawl <url> [strategy]"); |
|||
return; |
|||
} |
|||
|
|||
String url = args[0]; |
|||
String strategy = args.length > 1 ? args[1] : "jsoup"; |
|||
|
|||
try { |
|||
controller.crawl(url, strategy); |
|||
} catch (Exception e) { |
|||
view.displayError("Crawl failed: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { |
|||
return "crawl"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "Crawl a website"; |
|||
} |
|||
} |
|||
@ -0,0 +1,31 @@ |
|||
package com.crawler.command; |
|||
|
|||
import com.crawler.view.ConsoleView; |
|||
|
|||
public class ExitCommand implements Command { |
|||
private final ConsoleView view; |
|||
private Runnable exitCallback; |
|||
|
|||
public ExitCommand(ConsoleView view, Runnable exitCallback) { |
|||
this.view = view; |
|||
this.exitCallback = exitCallback; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
view.displayGoodbye(); |
|||
if (exitCallback != null) { |
|||
exitCallback.run(); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { |
|||
return "exit"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "Exit the application"; |
|||
} |
|||
} |
|||
@ -0,0 +1,26 @@ |
|||
package com.crawler.command; |
|||
|
|||
import com.crawler.view.ConsoleView; |
|||
|
|||
public class HelpCommand implements Command { |
|||
private final ConsoleView view; |
|||
|
|||
public HelpCommand(ConsoleView view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
view.displayHelp(); |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { |
|||
return "help"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "Show help message"; |
|||
} |
|||
} |
|||
@ -0,0 +1,26 @@ |
|||
package com.crawler.command; |
|||
|
|||
import com.crawler.controller.CrawlerController; |
|||
|
|||
public class ListCommand implements Command { |
|||
private final CrawlerController controller; |
|||
|
|||
public ListCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
controller.listArticles(); |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { |
|||
return "list"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "List all crawled articles"; |
|||
} |
|||
} |
|||
@ -0,0 +1,26 @@ |
|||
package com.crawler.command; |
|||
|
|||
import com.crawler.controller.CrawlerController; |
|||
|
|||
public class LoadCommand implements Command { |
|||
private final CrawlerController controller; |
|||
|
|||
public LoadCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
controller.loadData(); |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { |
|||
return "load"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "Load articles from data file"; |
|||
} |
|||
} |
|||
@ -0,0 +1,26 @@ |
|||
package com.crawler.command; |
|||
|
|||
import com.crawler.controller.CrawlerController; |
|||
|
|||
public class SaveCommand implements Command { |
|||
private final CrawlerController controller; |
|||
|
|||
public SaveCommand(CrawlerController controller) { |
|||
this.controller = controller; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
controller.saveData(); |
|||
} |
|||
|
|||
@Override |
|||
public String getCommandName() { |
|||
return "save"; |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "Save articles to data file"; |
|||
} |
|||
} |
|||
@ -0,0 +1,73 @@ |
|||
package com.crawler.controller; |
|||
|
|||
import java.util.List; |
|||
|
|||
import com.crawler.factory.StrategyFactory; |
|||
import com.crawler.model.Article; |
|||
import com.crawler.repository.ArticleRepository; |
|||
import com.crawler.strategy.CrawlStrategy; |
|||
import com.crawler.util.DataPersistence; |
|||
import com.crawler.view.ConsoleView; |
|||
|
|||
public class CrawlerController { |
|||
private final ArticleRepository repository; |
|||
private final ConsoleView view; |
|||
|
|||
public CrawlerController(ArticleRepository repository, ConsoleView view) { |
|||
this.repository = repository; |
|||
this.view = view; |
|||
loadSavedData(); |
|||
} |
|||
|
|||
private void loadSavedData() { |
|||
List<Article> savedArticles = DataPersistence.loadArticles(); |
|||
if (!savedArticles.isEmpty()) { |
|||
repository.saveAll(savedArticles); |
|||
view.displayInfo("Loaded " + savedArticles.size() + " saved articles"); |
|||
} |
|||
} |
|||
|
|||
public void crawl(String url, String strategyName) throws Exception { |
|||
if (url == null || url.trim().isEmpty()) { |
|||
throw new IllegalArgumentException("URL cannot be empty"); |
|||
} |
|||
|
|||
if (!url.startsWith("http://") && !url.startsWith("https://")) { |
|||
url = "https://" + url; |
|||
} |
|||
|
|||
if (repository.existsByUrl(url)) { |
|||
view.displayWarning("URL already crawled: " + url); |
|||
return; |
|||
} |
|||
|
|||
view.displayInfo("Crawling: " + url); |
|||
view.displayInfo("Using strategy: " + strategyName); |
|||
|
|||
CrawlStrategy strategy = StrategyFactory.getStrategy(strategyName); |
|||
List<Article> articles = strategy.crawl(url); |
|||
|
|||
for (Article article : articles) { |
|||
repository.save(article); |
|||
view.displaySuccess("Crawled: " + article.getTitle()); |
|||
} |
|||
|
|||
saveData(); |
|||
} |
|||
|
|||
public void listArticles() { |
|||
List<Article> articles = repository.findAll(); |
|||
view.displayArticleList(articles); |
|||
} |
|||
|
|||
public void saveData() { |
|||
List<Article> articles = repository.findAll(); |
|||
DataPersistence.saveArticles(articles); |
|||
} |
|||
|
|||
public void loadData() { |
|||
repository.deleteAll(); |
|||
List<Article> savedArticles = DataPersistence.loadArticles(); |
|||
repository.saveAll(savedArticles); |
|||
} |
|||
} |
|||
@ -0,0 +1,104 @@ |
|||
package com.crawler.model; |
|||
|
|||
import java.io.Serializable; |
|||
import java.time.LocalDateTime; |
|||
|
|||
public class Article implements Serializable { |
|||
private static final long serialVersionUID = 1L; |
|||
private String id; |
|||
private String title; |
|||
private String url; |
|||
private String content; |
|||
private String author; |
|||
private LocalDateTime publishDate; |
|||
private LocalDateTime crawlDate; |
|||
private String source; |
|||
|
|||
public Article() { |
|||
this.crawlDate = LocalDateTime.now(); |
|||
} |
|||
|
|||
public Article(String title, String url, String content) { |
|||
this.title = title; |
|||
this.url = url; |
|||
this.content = content; |
|||
this.crawlDate = LocalDateTime.now(); |
|||
} |
|||
|
|||
public String getId() { |
|||
return id; |
|||
} |
|||
|
|||
public void setId(String id) { |
|||
this.id = id; |
|||
} |
|||
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public void setTitle(String title) { |
|||
this.title = title; |
|||
} |
|||
|
|||
public String getUrl() { |
|||
return url; |
|||
} |
|||
|
|||
public void setUrl(String url) { |
|||
this.url = url; |
|||
} |
|||
|
|||
public String getContent() { |
|||
return content; |
|||
} |
|||
|
|||
public void setContent(String content) { |
|||
this.content = content; |
|||
} |
|||
|
|||
public String getAuthor() { |
|||
return author; |
|||
} |
|||
|
|||
public void setAuthor(String author) { |
|||
this.author = author; |
|||
} |
|||
|
|||
public LocalDateTime getPublishDate() { |
|||
return publishDate; |
|||
} |
|||
|
|||
public void setPublishDate(LocalDateTime publishDate) { |
|||
this.publishDate = publishDate; |
|||
} |
|||
|
|||
public LocalDateTime getCrawlDate() { |
|||
return crawlDate; |
|||
} |
|||
|
|||
public void setCrawlDate(LocalDateTime crawlDate) { |
|||
this.crawlDate = crawlDate; |
|||
} |
|||
|
|||
public String getSource() { |
|||
return source; |
|||
} |
|||
|
|||
public void setSource(String source) { |
|||
this.source = source; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "Article{" + |
|||
"id='" + id + '\'' + |
|||
", title='" + title + '\'' + |
|||
", url='" + url + '\'' + |
|||
", author='" + author + '\'' + |
|||
", publishDate=" + publishDate + |
|||
", crawlDate=" + crawlDate + |
|||
", source='" + source + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,101 @@ |
|||
package com.crawler.view; |
|||
|
|||
import java.time.format.DateTimeFormatter; |
|||
import java.util.List; |
|||
import java.util.Scanner; |
|||
|
|||
import com.crawler.model.Article; |
|||
import com.crawler.util.ColorUtil; |
|||
|
|||
public class ConsoleView { |
|||
private static final Scanner scanner = new Scanner(System.in); |
|||
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); |
|||
|
|||
public void displayWelcome() { |
|||
System.out.println(ColorUtil.cyan("========================================")); |
|||
System.out.println(ColorUtil.cyan(" Welcome to My Crawler ")); |
|||
System.out.println(ColorUtil.cyan("========================================")); |
|||
System.out.println(); |
|||
} |
|||
|
|||
public void displayHelp() { |
|||
System.out.println(ColorUtil.yellow("Available commands:")); |
|||
System.out.println(ColorUtil.green(" crawl <url> [strategy] - Crawl a website")); |
|||
System.out.println(ColorUtil.green(" list - List all crawled articles")); |
|||
System.out.println(ColorUtil.green(" save - Save articles to data file")); |
|||
System.out.println(ColorUtil.green(" load - Load articles from data file")); |
|||
System.out.println(ColorUtil.green(" help - Show this help message")); |
|||
System.out.println(ColorUtil.green(" exit - Exit the application")); |
|||
System.out.println(); |
|||
System.out.println(ColorUtil.yellow("Available strategies:")); |
|||
System.out.println(ColorUtil.cyan(" blog - Blog crawling strategy")); |
|||
System.out.println(ColorUtil.cyan(" news - News crawling strategy")); |
|||
System.out.println(ColorUtil.cyan(" jsoup - Generic JSoup strategy (default)")); |
|||
System.out.println(); |
|||
} |
|||
|
|||
public void displayArticleList(List<Article> articles) { |
|||
if (articles.isEmpty()) { |
|||
System.out.println(ColorUtil.yellow("No articles found.")); |
|||
return; |
|||
} |
|||
|
|||
System.out.println(ColorUtil.cyan("=== Crawled Articles (" + articles.size() + ") ===")); |
|||
System.out.println(); |
|||
|
|||
for (int i = 0; i < articles.size(); i++) { |
|||
displayArticleDetail(articles.get(i), i + 1); |
|||
} |
|||
} |
|||
|
|||
public void displayArticleDetail(Article article, int index) { |
|||
System.out.println(ColorUtil.bold(ColorUtil.green("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"))); |
|||
System.out.println(ColorUtil.bold(ColorUtil.yellow("[" + index + "] " + article.getTitle()))); |
|||
System.out.println(ColorUtil.bold(ColorUtil.green("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"))); |
|||
System.out.println(ColorUtil.cyan(" ID: ") + article.getId()); |
|||
System.out.println(ColorUtil.cyan(" URL: ") + article.getUrl()); |
|||
if (article.getAuthor() != null) { |
|||
System.out.println(ColorUtil.cyan(" Author: ") + article.getAuthor()); |
|||
} |
|||
if (article.getSource() != null) { |
|||
System.out.println(ColorUtil.cyan(" Source: ") + article.getSource()); |
|||
} |
|||
if (article.getPublishDate() != null) { |
|||
System.out.println(ColorUtil.cyan(" Published: ") + article.getPublishDate().format(DATE_FORMATTER)); |
|||
} |
|||
System.out.println(ColorUtil.cyan(" Crawled: ") + article.getCrawlDate().format(DATE_FORMATTER)); |
|||
System.out.println(ColorUtil.cyan(" Content: ")); |
|||
if (article.getContent() != null) { |
|||
String[] lines = article.getContent().split("(?<=\\G.{80})"); |
|||
for (String line : lines) { |
|||
System.out.println(" " + line); |
|||
} |
|||
} |
|||
System.out.println(); |
|||
} |
|||
|
|||
public void displaySuccess(String message) { |
|||
System.out.println(ColorUtil.green("✓ " + message)); |
|||
} |
|||
|
|||
public void displayError(String message) { |
|||
System.out.println(ColorUtil.red("✗ " + message)); |
|||
} |
|||
|
|||
public void displayInfo(String message) { |
|||
System.out.println(ColorUtil.blue("ℹ " + message)); |
|||
} |
|||
|
|||
public void displayWarning(String message) { |
|||
System.out.println(ColorUtil.yellow("⚠ " + message)); |
|||
} |
|||
|
|||
public String readInput() { |
|||
System.out.print(ColorUtil.purple("> ")); |
|||
return scanner.nextLine().trim(); |
|||
} |
|||
|
|||
public void displayGoodbye() { |
|||
System.out.println(ColorUtil.cyan("Goodbye! Thank you for using My Crawler.")); |
|||
} |
|||
} |
|||
Loading…
Reference in new issue