diff --git a/W11/Article.java b/W11/Article.java new file mode 100644 index 0000000..78b7b34 --- /dev/null +++ b/W11/Article.java @@ -0,0 +1,45 @@ +package com.example.datacollect.model; + +public class Article { + private String title; + private String url; + private String content; + + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + @Override + public String toString() { + return "Article{" + + "title='" + title + '\'' + + ", url='" + url + '\'' + + '}'; + } +} diff --git a/W11/ArticleRepository.java b/W11/ArticleRepository.java new file mode 100644 index 0000000..e9a97db --- /dev/null +++ b/W11/ArticleRepository.java @@ -0,0 +1,67 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ArticleRepository { + private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + logger.error("Attempted to add null article"); + throw new IllegalArgumentException("Article cannot be null"); + } + if (article.getTitle() == null || article.getTitle().trim().isEmpty()) { + logger.warn("Attempted to add article with empty title"); + throw new IllegalArgumentException("Article title cannot be null or empty"); + } + if (article.getUrl() == null || article.getUrl().trim().isEmpty()) { + logger.warn("Attempted to add article with empty URL"); + throw new IllegalArgumentException("Article URL cannot be null or empty"); + } + articles.add(article); + logger.debug("Added article: {}", article.getTitle()); + } + + public void addAll(List
articlesToAdd) { + if (articlesToAdd == null) { + logger.error("Attempted to add null list of articles"); + throw new IllegalArgumentException("Article list cannot be null"); + } + for (Article article : articlesToAdd) { + add(article); + } + logger.info("Added {} articles to repository", articlesToAdd.size()); + } + + public List
getAll() { + logger.debug("Retrieving all articles, count: {}", articles.size()); + return Collections.unmodifiableList(articles); + } + + public int size() { + return articles.size(); + } + + public void clear() { + logger.info("Clearing repository, removed {} articles", articles.size()); + articles.clear(); + } + + public boolean isEmpty() { + return articles.isEmpty(); + } + + public Article get(int index) { + if (index < 0 || index >= articles.size()) { + logger.error("Attempted to access article at invalid index: {}", index); + throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + articles.size()); + } + return articles.get(index); + } +} diff --git a/W11/BlogStrategy.java b/W11/BlogStrategy.java new file mode 100644 index 0000000..9033aac --- /dev/null +++ b/W11/BlogStrategy.java @@ -0,0 +1,32 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.List; + +public class BlogStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(BlogStrategy.class); + + @Override + public boolean supports(String url) { + return url.contains("blog.example.com"); + } + + @Override + public List
parse(String url, Document doc) throws ParseException { + logger.debug("Parsing blog content from: {}", url); + List
articles = new ArrayList<>(); + Elements titles = doc.select(".post-title"); + for (Element e : titles) { + articles.add(new Article(e.text(), url, "")); + } + logger.debug("Parsed {} articles from blog", articles.size()); + return articles; + } +} diff --git a/W11/CrawlStrategy.java b/W11/CrawlStrategy.java new file mode 100644 index 0000000..7f6248b --- /dev/null +++ b/W11/CrawlStrategy.java @@ -0,0 +1,11 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import java.util.List; + +public interface CrawlStrategy { + List
parse(String url, Document doc) throws ParseException; + boolean supports(String url); +} diff --git a/W11/Main.java b/W11/Main.java new file mode 100644 index 0000000..c340d0c --- /dev/null +++ b/W11/Main.java @@ -0,0 +1,25 @@ +package com.example.datacollect; + +import com.example.datacollect.controller.CrawlerController; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class Main { + private static final Logger logger = LoggerFactory.getLogger(Main.class); + + public static void main(String[] args) { + logger.info("Starting CLI Crawler application"); + ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + StrategyFactory strategyFactory = new StrategyFactory(); + CrawlerController controller = new CrawlerController(view, repository, strategyFactory); + + view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands."); + while (true) { + controller.handle(view.readLine()); + } + } +}