diff --git a/W11/BlogStrategy.java b/W11/BlogStrategy.java deleted file mode 100644 index 9033aac..0000000 --- a/W11/BlogStrategy.java +++ /dev/null @@ -1,32 +0,0 @@ -package com.example.datacollect.strategy; - -import com.example.datacollect.exception.ParseException; -import com.example.datacollect.model.Article; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import java.util.ArrayList; -import java.util.List; - -public class BlogStrategy implements CrawlStrategy { - private static final Logger logger = LoggerFactory.getLogger(BlogStrategy.class); - - @Override - public boolean supports(String url) { - return url.contains("blog.example.com"); - } - - @Override - public List
parse(String url, Document doc) throws ParseException { - logger.debug("Parsing blog content from: {}", url); - List
articles = new ArrayList<>(); - Elements titles = doc.select(".post-title"); - for (Element e : titles) { - articles.add(new Article(e.text(), url, "")); - } - logger.debug("Parsed {} articles from blog", articles.size()); - return articles; - } -}