Browse Source

上传文件至 'W11'

main
zhouzihao 1 month ago
parent
commit
df65ecdf80
  1. 45
      W11/Article.java
  2. 67
      W11/ArticleRepository.java
  3. 32
      W11/BlogStrategy.java
  4. 11
      W11/CrawlStrategy.java
  5. 25
      W11/Main.java

45
W11/Article.java

@ -0,0 +1,45 @@
package com.example.datacollect.model;
public class Article {
private String title;
private String url;
private String content;
public Article(String title, String url, String content) {
this.title = title;
this.url = url;
this.content = content;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
@Override
public String toString() {
return "Article{"
+ "title='" + title + '\''
+ ", url='" + url + '\''
+ '}';
}
}

67
W11/ArticleRepository.java

@ -0,0 +1,67 @@
package com.example.datacollect.repository;
import com.example.datacollect.model.Article;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class ArticleRepository {
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
private final List<Article> articles = new ArrayList<>();
public void add(Article article) {
if (article == null) {
logger.error("Attempted to add null article");
throw new IllegalArgumentException("Article cannot be null");
}
if (article.getTitle() == null || article.getTitle().trim().isEmpty()) {
logger.warn("Attempted to add article with empty title");
throw new IllegalArgumentException("Article title cannot be null or empty");
}
if (article.getUrl() == null || article.getUrl().trim().isEmpty()) {
logger.warn("Attempted to add article with empty URL");
throw new IllegalArgumentException("Article URL cannot be null or empty");
}
articles.add(article);
logger.debug("Added article: {}", article.getTitle());
}
public void addAll(List<Article> articlesToAdd) {
if (articlesToAdd == null) {
logger.error("Attempted to add null list of articles");
throw new IllegalArgumentException("Article list cannot be null");
}
for (Article article : articlesToAdd) {
add(article);
}
logger.info("Added {} articles to repository", articlesToAdd.size());
}
public List<Article> getAll() {
logger.debug("Retrieving all articles, count: {}", articles.size());
return Collections.unmodifiableList(articles);
}
public int size() {
return articles.size();
}
public void clear() {
logger.info("Clearing repository, removed {} articles", articles.size());
articles.clear();
}
public boolean isEmpty() {
return articles.isEmpty();
}
public Article get(int index) {
if (index < 0 || index >= articles.size()) {
logger.error("Attempted to access article at invalid index: {}", index);
throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + articles.size());
}
return articles.get(index);
}
}

32
W11/BlogStrategy.java

@ -0,0 +1,32 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class BlogStrategy implements CrawlStrategy {
private static final Logger logger = LoggerFactory.getLogger(BlogStrategy.class);
@Override
public boolean supports(String url) {
return url.contains("blog.example.com");
}
@Override
public List<Article> parse(String url, Document doc) throws ParseException {
logger.debug("Parsing blog content from: {}", url);
List<Article> articles = new ArrayList<>();
Elements titles = doc.select(".post-title");
for (Element e : titles) {
articles.add(new Article(e.text(), url, ""));
}
logger.debug("Parsed {} articles from blog", articles.size());
return articles;
}
}

11
W11/CrawlStrategy.java

@ -0,0 +1,11 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import java.util.List;
public interface CrawlStrategy {
List<Article> parse(String url, Document doc) throws ParseException;
boolean supports(String url);
}

25
W11/Main.java

@ -0,0 +1,25 @@
package com.example.datacollect;
import com.example.datacollect.controller.CrawlerController;
import com.example.datacollect.repository.ArticleRepository;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class Main {
private static final Logger logger = LoggerFactory.getLogger(Main.class);
public static void main(String[] args) {
logger.info("Starting CLI Crawler application");
ConsoleView view = new ConsoleView();
ArticleRepository repository = new ArticleRepository();
StrategyFactory strategyFactory = new StrategyFactory();
CrawlerController controller = new CrawlerController(view, repository, strategyFactory);
view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands.");
while (true) {
controller.handle(view.readLine());
}
}
}
Loading…
Cancel
Save