11 changed files with 461 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,7 @@ |
|||||
|
package com.crawler.command; |
||||
|
|
||||
|
public interface Command { |
||||
|
void execute(String[] args) throws Exception; |
||||
|
String getCommandName(); |
||||
|
String getDescription(); |
||||
|
} |
||||
@ -0,0 +1,41 @@ |
|||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.controller.CrawlerController; |
||||
|
import com.crawler.view.ConsoleView; |
||||
|
|
||||
|
public class CrawlCommand implements Command { |
||||
|
private final CrawlerController controller; |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public CrawlCommand(CrawlerController controller, ConsoleView view) { |
||||
|
this.controller = controller; |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args) { |
||||
|
if (args.length < 1) { |
||||
|
view.displayError("Usage: crawl <url> [strategy]"); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
String url = args[0]; |
||||
|
String strategy = args.length > 1 ? args[1] : "jsoup"; |
||||
|
|
||||
|
try { |
||||
|
controller.crawl(url, strategy); |
||||
|
} catch (Exception e) { |
||||
|
view.displayError("Crawl failed: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "crawl"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "Crawl a website"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,31 @@ |
|||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.view.ConsoleView; |
||||
|
|
||||
|
public class ExitCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
private Runnable exitCallback; |
||||
|
|
||||
|
public ExitCommand(ConsoleView view, Runnable exitCallback) { |
||||
|
this.view = view; |
||||
|
this.exitCallback = exitCallback; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args) { |
||||
|
view.displayGoodbye(); |
||||
|
if (exitCallback != null) { |
||||
|
exitCallback.run(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "Exit the application"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,26 @@ |
|||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.view.ConsoleView; |
||||
|
|
||||
|
public class HelpCommand implements Command { |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public HelpCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args) { |
||||
|
view.displayHelp(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "Show help message"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,26 @@ |
|||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.controller.CrawlerController; |
||||
|
|
||||
|
public class ListCommand implements Command { |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public ListCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args) { |
||||
|
controller.listArticles(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "list"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "List all crawled articles"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,26 @@ |
|||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.controller.CrawlerController; |
||||
|
|
||||
|
public class LoadCommand implements Command { |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public LoadCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args) { |
||||
|
controller.loadData(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "load"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "Load articles from data file"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,26 @@ |
|||||
|
package com.crawler.command; |
||||
|
|
||||
|
import com.crawler.controller.CrawlerController; |
||||
|
|
||||
|
public class SaveCommand implements Command { |
||||
|
private final CrawlerController controller; |
||||
|
|
||||
|
public SaveCommand(CrawlerController controller) { |
||||
|
this.controller = controller; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args) { |
||||
|
controller.saveData(); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getCommandName() { |
||||
|
return "save"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getDescription() { |
||||
|
return "Save articles to data file"; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,73 @@ |
|||||
|
package com.crawler.controller; |
||||
|
|
||||
|
import java.util.List; |
||||
|
|
||||
|
import com.crawler.factory.StrategyFactory; |
||||
|
import com.crawler.model.Article; |
||||
|
import com.crawler.repository.ArticleRepository; |
||||
|
import com.crawler.strategy.CrawlStrategy; |
||||
|
import com.crawler.util.DataPersistence; |
||||
|
import com.crawler.view.ConsoleView; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
private final ArticleRepository repository; |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public CrawlerController(ArticleRepository repository, ConsoleView view) { |
||||
|
this.repository = repository; |
||||
|
this.view = view; |
||||
|
loadSavedData(); |
||||
|
} |
||||
|
|
||||
|
private void loadSavedData() { |
||||
|
List<Article> savedArticles = DataPersistence.loadArticles(); |
||||
|
if (!savedArticles.isEmpty()) { |
||||
|
repository.saveAll(savedArticles); |
||||
|
view.displayInfo("Loaded " + savedArticles.size() + " saved articles"); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void crawl(String url, String strategyName) throws Exception { |
||||
|
if (url == null || url.trim().isEmpty()) { |
||||
|
throw new IllegalArgumentException("URL cannot be empty"); |
||||
|
} |
||||
|
|
||||
|
if (!url.startsWith("http://") && !url.startsWith("https://")) { |
||||
|
url = "https://" + url; |
||||
|
} |
||||
|
|
||||
|
if (repository.existsByUrl(url)) { |
||||
|
view.displayWarning("URL already crawled: " + url); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
view.displayInfo("Crawling: " + url); |
||||
|
view.displayInfo("Using strategy: " + strategyName); |
||||
|
|
||||
|
CrawlStrategy strategy = StrategyFactory.getStrategy(strategyName); |
||||
|
List<Article> articles = strategy.crawl(url); |
||||
|
|
||||
|
for (Article article : articles) { |
||||
|
repository.save(article); |
||||
|
view.displaySuccess("Crawled: " + article.getTitle()); |
||||
|
} |
||||
|
|
||||
|
saveData(); |
||||
|
} |
||||
|
|
||||
|
public void listArticles() { |
||||
|
List<Article> articles = repository.findAll(); |
||||
|
view.displayArticleList(articles); |
||||
|
} |
||||
|
|
||||
|
public void saveData() { |
||||
|
List<Article> articles = repository.findAll(); |
||||
|
DataPersistence.saveArticles(articles); |
||||
|
} |
||||
|
|
||||
|
public void loadData() { |
||||
|
repository.deleteAll(); |
||||
|
List<Article> savedArticles = DataPersistence.loadArticles(); |
||||
|
repository.saveAll(savedArticles); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,104 @@ |
|||||
|
package com.crawler.model; |
||||
|
|
||||
|
import java.io.Serializable; |
||||
|
import java.time.LocalDateTime; |
||||
|
|
||||
|
public class Article implements Serializable { |
||||
|
private static final long serialVersionUID = 1L; |
||||
|
private String id; |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
private String author; |
||||
|
private LocalDateTime publishDate; |
||||
|
private LocalDateTime crawlDate; |
||||
|
private String source; |
||||
|
|
||||
|
public Article() { |
||||
|
this.crawlDate = LocalDateTime.now(); |
||||
|
} |
||||
|
|
||||
|
public Article(String title, String url, String content) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
this.crawlDate = LocalDateTime.now(); |
||||
|
} |
||||
|
|
||||
|
public String getId() { |
||||
|
return id; |
||||
|
} |
||||
|
|
||||
|
public void setId(String id) { |
||||
|
this.id = id; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
this.title = title; |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
public void setUrl(String url) { |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
public String getContent() { |
||||
|
return content; |
||||
|
} |
||||
|
|
||||
|
public void setContent(String content) { |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public String getAuthor() { |
||||
|
return author; |
||||
|
} |
||||
|
|
||||
|
public void setAuthor(String author) { |
||||
|
this.author = author; |
||||
|
} |
||||
|
|
||||
|
public LocalDateTime getPublishDate() { |
||||
|
return publishDate; |
||||
|
} |
||||
|
|
||||
|
public void setPublishDate(LocalDateTime publishDate) { |
||||
|
this.publishDate = publishDate; |
||||
|
} |
||||
|
|
||||
|
public LocalDateTime getCrawlDate() { |
||||
|
return crawlDate; |
||||
|
} |
||||
|
|
||||
|
public void setCrawlDate(LocalDateTime crawlDate) { |
||||
|
this.crawlDate = crawlDate; |
||||
|
} |
||||
|
|
||||
|
public String getSource() { |
||||
|
return source; |
||||
|
} |
||||
|
|
||||
|
public void setSource(String source) { |
||||
|
this.source = source; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{" + |
||||
|
"id='" + id + '\'' + |
||||
|
", title='" + title + '\'' + |
||||
|
", url='" + url + '\'' + |
||||
|
", author='" + author + '\'' + |
||||
|
", publishDate=" + publishDate + |
||||
|
", crawlDate=" + crawlDate + |
||||
|
", source='" + source + '\'' + |
||||
|
'}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,101 @@ |
|||||
|
package com.crawler.view; |
||||
|
|
||||
|
import java.time.format.DateTimeFormatter; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
import com.crawler.model.Article; |
||||
|
import com.crawler.util.ColorUtil; |
||||
|
|
||||
|
public class ConsoleView { |
||||
|
private static final Scanner scanner = new Scanner(System.in); |
||||
|
private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"); |
||||
|
|
||||
|
public void displayWelcome() { |
||||
|
System.out.println(ColorUtil.cyan("========================================")); |
||||
|
System.out.println(ColorUtil.cyan(" Welcome to My Crawler ")); |
||||
|
System.out.println(ColorUtil.cyan("========================================")); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
public void displayHelp() { |
||||
|
System.out.println(ColorUtil.yellow("Available commands:")); |
||||
|
System.out.println(ColorUtil.green(" crawl <url> [strategy] - Crawl a website")); |
||||
|
System.out.println(ColorUtil.green(" list - List all crawled articles")); |
||||
|
System.out.println(ColorUtil.green(" save - Save articles to data file")); |
||||
|
System.out.println(ColorUtil.green(" load - Load articles from data file")); |
||||
|
System.out.println(ColorUtil.green(" help - Show this help message")); |
||||
|
System.out.println(ColorUtil.green(" exit - Exit the application")); |
||||
|
System.out.println(); |
||||
|
System.out.println(ColorUtil.yellow("Available strategies:")); |
||||
|
System.out.println(ColorUtil.cyan(" blog - Blog crawling strategy")); |
||||
|
System.out.println(ColorUtil.cyan(" news - News crawling strategy")); |
||||
|
System.out.println(ColorUtil.cyan(" jsoup - Generic JSoup strategy (default)")); |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
public void displayArticleList(List<Article> articles) { |
||||
|
if (articles.isEmpty()) { |
||||
|
System.out.println(ColorUtil.yellow("No articles found.")); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
System.out.println(ColorUtil.cyan("=== Crawled Articles (" + articles.size() + ") ===")); |
||||
|
System.out.println(); |
||||
|
|
||||
|
for (int i = 0; i < articles.size(); i++) { |
||||
|
displayArticleDetail(articles.get(i), i + 1); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void displayArticleDetail(Article article, int index) { |
||||
|
System.out.println(ColorUtil.bold(ColorUtil.green("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"))); |
||||
|
System.out.println(ColorUtil.bold(ColorUtil.yellow("[" + index + "] " + article.getTitle()))); |
||||
|
System.out.println(ColorUtil.bold(ColorUtil.green("━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━"))); |
||||
|
System.out.println(ColorUtil.cyan(" ID: ") + article.getId()); |
||||
|
System.out.println(ColorUtil.cyan(" URL: ") + article.getUrl()); |
||||
|
if (article.getAuthor() != null) { |
||||
|
System.out.println(ColorUtil.cyan(" Author: ") + article.getAuthor()); |
||||
|
} |
||||
|
if (article.getSource() != null) { |
||||
|
System.out.println(ColorUtil.cyan(" Source: ") + article.getSource()); |
||||
|
} |
||||
|
if (article.getPublishDate() != null) { |
||||
|
System.out.println(ColorUtil.cyan(" Published: ") + article.getPublishDate().format(DATE_FORMATTER)); |
||||
|
} |
||||
|
System.out.println(ColorUtil.cyan(" Crawled: ") + article.getCrawlDate().format(DATE_FORMATTER)); |
||||
|
System.out.println(ColorUtil.cyan(" Content: ")); |
||||
|
if (article.getContent() != null) { |
||||
|
String[] lines = article.getContent().split("(?<=\\G.{80})"); |
||||
|
for (String line : lines) { |
||||
|
System.out.println(" " + line); |
||||
|
} |
||||
|
} |
||||
|
System.out.println(); |
||||
|
} |
||||
|
|
||||
|
public void displaySuccess(String message) { |
||||
|
System.out.println(ColorUtil.green("✓ " + message)); |
||||
|
} |
||||
|
|
||||
|
public void displayError(String message) { |
||||
|
System.out.println(ColorUtil.red("✗ " + message)); |
||||
|
} |
||||
|
|
||||
|
public void displayInfo(String message) { |
||||
|
System.out.println(ColorUtil.blue("ℹ " + message)); |
||||
|
} |
||||
|
|
||||
|
public void displayWarning(String message) { |
||||
|
System.out.println(ColorUtil.yellow("⚠ " + message)); |
||||
|
} |
||||
|
|
||||
|
public String readInput() { |
||||
|
System.out.print(ColorUtil.purple("> ")); |
||||
|
return scanner.nextLine().trim(); |
||||
|
} |
||||
|
|
||||
|
public void displayGoodbye() { |
||||
|
System.out.println(ColorUtil.cyan("Goodbye! Thank you for using My Crawler.")); |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue