From 8e229ed347b05fe7ef40b6726c6e890e58926c21 Mon Sep 17 00:00:00 2001 From: zhuyanshuo <3663541984@qq.com> Date: Tue, 19 May 2026 12:10:35 +0800 Subject: [PATCH] =?UTF-8?q?=E7=88=AC=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- w11/pom.xml | 43 +++++++++ .../java/com/example/datacollect/Main.java | 23 +++++ .../example/datacollect/command/Command.java | 6 ++ .../datacollect/command/CrawlCommand.java | 43 +++++++++ .../datacollect/command/ExitCommand.java | 22 +++++ .../datacollect/command/HelpCommand.java | 21 +++++ .../datacollect/command/ListCommand.java | 24 +++++ .../controller/CrawlerController.java | 50 +++++++++++ .../exception/CrawlerException.java | 11 +++ .../exception/NetworkException.java | 11 +++ .../datacollect/exception/ParseException.java | 11 +++ .../example/datacollect/model/Article.java | 25 ++++++ .../repository/ArticleRepository.java | 54 +++++++++++ .../datacollect/service/CrawlerService.java | 85 ++++++++++++++++++ .../datacollect/strategy/BlogStrategy.java | 30 +++++++ .../datacollect/strategy/CrawlStrategy.java | 10 +++ .../datacollect/strategy/NewsStrategy.java | 30 +++++++ .../datacollect/strategy/StrategyFactory.java | 29 ++++++ .../datacollect/strategy/WeiboStrategy.java | 38 ++++++++ .../example/datacollect/view/ConsoleView.java | 42 +++++++++ w11/src/main/resources/logback.xml | 24 +++++ .../com/example/datacollect/Main.class | Bin 0 -> 1597 bytes .../example/datacollect/command/Command.class | Bin 0 -> 211 bytes .../datacollect/command/CrawlCommand.class | Bin 0 -> 2526 bytes .../datacollect/command/ExitCommand.class | Bin 0 -> 908 bytes .../datacollect/command/HelpCommand.class | Bin 0 -> 893 bytes .../datacollect/command/ListCommand.class | Bin 0 -> 1095 bytes .../controller/CrawlerController.class | Bin 0 -> 3091 bytes .../exception/CrawlerException.class | Bin 0 -> 589 bytes .../exception/NetworkException.class | Bin 0 -> 613 bytes .../exception/ParseException.class | Bin 0 -> 607 bytes .../example/datacollect/model/Article.class | Bin 0 -> 765 bytes .../repository/ArticleRepository.class | Bin 0 -> 3143 bytes .../datacollect/service/CrawlerService.class | Bin 0 -> 5511 bytes .../datacollect/strategy/BlogStrategy.class | Bin 0 -> 2361 bytes .../datacollect/strategy/CrawlStrategy.class | Bin 0 -> 424 bytes .../datacollect/strategy/NewsStrategy.class | Bin 0 -> 2370 bytes .../strategy/StrategyFactory.class | Bin 0 -> 2392 bytes .../datacollect/strategy/WeiboStrategy.class | Bin 0 -> 2655 bytes .../datacollect/view/ConsoleView.class | Bin 0 -> 2619 bytes w11/target/classes/logback.xml | 24 +++++ .../compile/default-compile/createdFiles.lst | 19 ++++ .../compile/default-compile/inputFiles.lst | 19 ++++ 43 files changed, 694 insertions(+) create mode 100644 w11/pom.xml create mode 100644 w11/src/main/java/com/example/datacollect/Main.java create mode 100644 w11/src/main/java/com/example/datacollect/command/Command.java create mode 100644 w11/src/main/java/com/example/datacollect/command/CrawlCommand.java create mode 100644 w11/src/main/java/com/example/datacollect/command/ExitCommand.java create mode 100644 w11/src/main/java/com/example/datacollect/command/HelpCommand.java create mode 100644 w11/src/main/java/com/example/datacollect/command/ListCommand.java create mode 100644 w11/src/main/java/com/example/datacollect/controller/CrawlerController.java create mode 100644 w11/src/main/java/com/example/datacollect/exception/CrawlerException.java create mode 100644 w11/src/main/java/com/example/datacollect/exception/NetworkException.java create mode 100644 w11/src/main/java/com/example/datacollect/exception/ParseException.java create mode 100644 w11/src/main/java/com/example/datacollect/model/Article.java create mode 100644 w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java create mode 100644 w11/src/main/java/com/example/datacollect/service/CrawlerService.java create mode 100644 w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java create mode 100644 w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java create mode 100644 w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java create mode 100644 w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java create mode 100644 w11/src/main/java/com/example/datacollect/strategy/WeiboStrategy.java create mode 100644 w11/src/main/java/com/example/datacollect/view/ConsoleView.java create mode 100644 w11/src/main/resources/logback.xml create mode 100644 w11/target/classes/com/example/datacollect/Main.class create mode 100644 w11/target/classes/com/example/datacollect/command/Command.class create mode 100644 w11/target/classes/com/example/datacollect/command/CrawlCommand.class create mode 100644 w11/target/classes/com/example/datacollect/command/ExitCommand.class create mode 100644 w11/target/classes/com/example/datacollect/command/HelpCommand.class create mode 100644 w11/target/classes/com/example/datacollect/command/ListCommand.class create mode 100644 w11/target/classes/com/example/datacollect/controller/CrawlerController.class create mode 100644 w11/target/classes/com/example/datacollect/exception/CrawlerException.class create mode 100644 w11/target/classes/com/example/datacollect/exception/NetworkException.class create mode 100644 w11/target/classes/com/example/datacollect/exception/ParseException.class create mode 100644 w11/target/classes/com/example/datacollect/model/Article.class create mode 100644 w11/target/classes/com/example/datacollect/repository/ArticleRepository.class create mode 100644 w11/target/classes/com/example/datacollect/service/CrawlerService.class create mode 100644 w11/target/classes/com/example/datacollect/strategy/BlogStrategy.class create mode 100644 w11/target/classes/com/example/datacollect/strategy/CrawlStrategy.class create mode 100644 w11/target/classes/com/example/datacollect/strategy/NewsStrategy.class create mode 100644 w11/target/classes/com/example/datacollect/strategy/StrategyFactory.class create mode 100644 w11/target/classes/com/example/datacollect/strategy/WeiboStrategy.class create mode 100644 w11/target/classes/com/example/datacollect/view/ConsoleView.class create mode 100644 w11/target/classes/logback.xml create mode 100644 w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst create mode 100644 w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst diff --git a/w11/pom.xml b/w11/pom.xml new file mode 100644 index 0000000..79b3cec --- /dev/null +++ b/w11/pom.xml @@ -0,0 +1,43 @@ + + + 4.0.0 + + com.example + datacollect + 1.0-SNAPSHOT + + + 11 + 11 + UTF-8 + + + + + org.jsoup + jsoup + 1.15.3 + + + ch.qos.logback + logback-classic + 1.4.8 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 11 + 11 + + + + + \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/Main.java b/w11/src/main/java/com/example/datacollect/Main.java new file mode 100644 index 0000000..b4dc6f3 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/Main.java @@ -0,0 +1,23 @@ +package com.example.datacollect; + +import com.example.datacollect.controller.CrawlerController; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.service.CrawlerService; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; + +public class Main { + + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + StrategyFactory strategyFactory = new StrategyFactory(); + CrawlerService service = new CrawlerService(repository, strategyFactory); + CrawlerController controller = new CrawlerController(view, service); + + view.printSuccess("Welcome to CLI Crawler (w11)! Type help for commands."); + while (true) { + controller.handle(view.readLine()); + } + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/Command.java b/w11/src/main/java/com/example/datacollect/command/Command.java new file mode 100644 index 0000000..2ef4a45 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/Command.java @@ -0,0 +1,6 @@ +package com.example.datacollect.command; + +public interface Command { + String getName(); + void execute(String[] args); +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/CrawlCommand.java b/w11/src/main/java/com/example/datacollect/command/CrawlCommand.java new file mode 100644 index 0000000..cc9c927 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/CrawlCommand.java @@ -0,0 +1,43 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.service.CrawlerService; +import com.example.datacollect.view.ConsoleView; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CrawlCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); + private final ConsoleView view; + private final CrawlerService service; + + public CrawlCommand(ConsoleView view, CrawlerService service) { + this.view = view; + this.service = service; + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public void execute(String[] args) { + if (args.length < 2) { + view.printError("Usage: crawl "); + return; + } + String url = args[1]; + + try { + view.printInfo("Crawling: " + url); + List
articles = service.crawl(url); + view.printSuccess("Crawled " + articles.size() + " articles."); + } catch (IllegalArgumentException e) { + view.printError(e.getMessage()); + } catch (RuntimeException e) { + view.printError(e.getMessage()); + } + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/ExitCommand.java b/w11/src/main/java/com/example/datacollect/command/ExitCommand.java new file mode 100644 index 0000000..1d2aa11 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/ExitCommand.java @@ -0,0 +1,22 @@ +package com.example.datacollect.command; + +import com.example.datacollect.view.ConsoleView; + +public class ExitCommand implements Command { + private final ConsoleView view; + + public ExitCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "exit"; + } + + @Override + public void execute(String[] args) { + view.printSuccess("Bye!"); + System.exit(0); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/HelpCommand.java b/w11/src/main/java/com/example/datacollect/command/HelpCommand.java new file mode 100644 index 0000000..80a80c2 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/HelpCommand.java @@ -0,0 +1,21 @@ +package com.example.datacollect.command; + +import com.example.datacollect.view.ConsoleView; + +public class HelpCommand implements Command { + private final ConsoleView view; + + public HelpCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "help"; + } + + @Override + public void execute(String[] args) { + view.printInfo("Commands: crawl , list, help, exit"); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/ListCommand.java b/w11/src/main/java/com/example/datacollect/command/ListCommand.java new file mode 100644 index 0000000..1bcc30a --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/ListCommand.java @@ -0,0 +1,24 @@ +package com.example.datacollect.command; + +import com.example.datacollect.service.CrawlerService; +import com.example.datacollect.view.ConsoleView; + +public class ListCommand implements Command { + private final ConsoleView view; + private final CrawlerService service; + + public ListCommand(ConsoleView view, CrawlerService service) { + this.view = view; + this.service = service; + } + + @Override + public String getName() { + return "list"; + } + + @Override + public void execute(String[] args) { + view.display(service.getAllArticles()); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/controller/CrawlerController.java b/w11/src/main/java/com/example/datacollect/controller/CrawlerController.java new file mode 100644 index 0000000..dee257d --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/controller/CrawlerController.java @@ -0,0 +1,50 @@ +package com.example.datacollect.controller; + +import com.example.datacollect.command.Command; +import com.example.datacollect.command.CrawlCommand; +import com.example.datacollect.command.ExitCommand; +import com.example.datacollect.command.HelpCommand; +import com.example.datacollect.command.ListCommand; +import com.example.datacollect.service.CrawlerService; +import com.example.datacollect.view.ConsoleView; +import java.util.HashMap; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CrawlerController { + private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); + private final Map commands = new HashMap<>(); + private final ConsoleView view; + + public CrawlerController(ConsoleView view, CrawlerService service) { + this.view = view; + register(new HelpCommand(view)); + register(new ListCommand(view, service)); + register(new CrawlCommand(view, service)); + register(new ExitCommand(view)); + } + + private void register(Command command) { + commands.put(command.getName(), command); + } + + public void handle(String input) { + String text = input == null ? "" : input.trim(); + if (text.isEmpty()) { + return; + } + + String[] args = text.split("\\s+"); + String cmdName = args[0].toLowerCase(); + logger.info("Received command: {}", cmdName); + + Command command = commands.get(cmdName); + if (command == null) { + logger.warn("Unknown command: {}", cmdName); + view.printError("Unknown command: " + cmdName); + return; + } + command.execute(args); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/exception/CrawlerException.java b/w11/src/main/java/com/example/datacollect/exception/CrawlerException.java new file mode 100644 index 0000000..d9c9c2e --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/exception/CrawlerException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class CrawlerException extends RuntimeException { + public CrawlerException(String message) { + super(message); + } + + public CrawlerException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/exception/NetworkException.java b/w11/src/main/java/com/example/datacollect/exception/NetworkException.java new file mode 100644 index 0000000..0fb8e5e --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/exception/NetworkException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class NetworkException extends CrawlerException { + public NetworkException(String message) { + super(message); + } + + public NetworkException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/exception/ParseException.java b/w11/src/main/java/com/example/datacollect/exception/ParseException.java new file mode 100644 index 0000000..205665a --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/exception/ParseException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class ParseException extends CrawlerException { + public ParseException(String message) { + super(message); + } + + public ParseException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/model/Article.java b/w11/src/main/java/com/example/datacollect/model/Article.java new file mode 100644 index 0000000..b98ff71 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/model/Article.java @@ -0,0 +1,25 @@ +package com.example.datacollect.model; + +public class Article { + private final String title; + private final String url; + private final String content; + + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + } + + public String getTitle() { + return title; + } + + public String getUrl() { + return url; + } + + public String getContent() { + return content; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java new file mode 100644 index 0000000..36d2ebd --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java @@ -0,0 +1,54 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ArticleRepository { + private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + logger.warn("Attempt to add null article"); + throw new IllegalArgumentException("Article cannot be null"); + } + if (article.getTitle() == null || article.getTitle().trim().isEmpty()) { + logger.warn("Attempt to add article with empty title"); + throw new IllegalArgumentException("Article title cannot be empty"); + } + articles.add(article); + logger.debug("Added article: {}", article.getTitle()); + } + + public List
getAll() { + return new ArrayList<>(articles); + } + + public void clear() { + int count = articles.size(); + articles.clear(); + logger.info("Cleared {} articles", count); + } + + public int size() { + return articles.size(); + } + + public Article get(int index) { + if (index < 0 || index >= articles.size()) { + logger.warn("Invalid index: {} (size: {})", index, articles.size()); + throw new IndexOutOfBoundsException("Index out of bounds: " + index); + } + return articles.get(index); + } + + public boolean contains(Article article) { + if (article == null) { + return false; + } + return articles.contains(article); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/service/CrawlerService.java b/w11/src/main/java/com/example/datacollect/service/CrawlerService.java new file mode 100644 index 0000000..63121f5 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/service/CrawlerService.java @@ -0,0 +1,85 @@ +package com.example.datacollect.service; + +import com.example.datacollect.exception.CrawlerException; +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import java.util.List; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CrawlerService { + private static final Logger logger = LoggerFactory.getLogger(CrawlerService.class); + private static final int MAX_RETRY = 3; + private static final long RETRY_DELAY_MS = 1000; + + private final ArticleRepository repository; + private final StrategyFactory strategyFactory; + + public CrawlerService(ArticleRepository repository, StrategyFactory strategyFactory) { + this.repository = repository; + this.strategyFactory = strategyFactory; + } + + public List
crawl(String url) { + CrawlStrategy strategy = strategyFactory.getStrategy(url); + if (strategy == null) { + logger.warn("No strategy found for URL: {}", url); + throw new CrawlerException("No strategy found for: " + url); + } + + int retryCount = 0; + Exception lastException = null; + + while (retryCount < MAX_RETRY) { + try { + logger.info("Crawling URL: {} (attempt {}/{})", url, retryCount + 1, MAX_RETRY); + Document doc = Jsoup.connect(url).get(); + List
articles = strategy.parse(url, doc); + articles.forEach(repository::add); + logger.info("Successfully crawled {} articles from {}", articles.size(), url); + return articles; + } catch (ParseException e) { + logger.error("Parse error: {}", e.getMessage()); + throw e; + } catch (Exception e) { + lastException = e; + retryCount++; + logger.warn("Network error (attempt {}): {}, retrying...", retryCount, e.getMessage()); + if (retryCount < MAX_RETRY) { + try { + Thread.sleep(RETRY_DELAY_MS); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new CrawlerException("Crawl interrupted", ie); + } + } + } + } + + logger.error("Failed to crawl {} after {} attempts", url, MAX_RETRY); + throw new NetworkException("Failed to crawl after " + MAX_RETRY + " attempts", lastException); + } + + public List
getAllArticles() { + List
articles = repository.getAll(); + logger.debug("Retrieved {} articles from repository", articles.size()); + return articles; + } + + public int getArticleCount() { + int count = repository.size(); + logger.debug("Article count: {}", count); + return count; + } + + public void clearArticles() { + repository.clear(); + logger.info("Cleared all articles"); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java new file mode 100644 index 0000000..97b1078 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java @@ -0,0 +1,30 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.List; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BlogStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(BlogStrategy.class); + + @Override + public List
parse(String url, Document doc) throws ParseException { + List
articles = new ArrayList<>(); + Elements links = doc.select("article a, .post a, .entry a"); + for (Element link : links) { + String title = link.text(); + String href = link.attr("abs:href"); + if (!title.isEmpty() && !href.isEmpty()) { + articles.add(new Article(title, href, "")); + } + } + logger.debug("Parsed {} articles from blog", articles.size()); + return articles; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java new file mode 100644 index 0000000..8905336 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java @@ -0,0 +1,10 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import java.util.List; +import org.jsoup.nodes.Document; + +public interface CrawlStrategy { + List
parse(String url, Document doc) throws ParseException; +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java new file mode 100644 index 0000000..16710e2 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java @@ -0,0 +1,30 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.List; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class NewsStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(NewsStrategy.class); + + @Override + public List
parse(String url, Document doc) throws ParseException { + List
articles = new ArrayList<>(); + Elements links = doc.select(".news-item a, .headline a, h2 a, h3 a"); + for (Element link : links) { + String title = link.text(); + String href = link.attr("abs:href"); + if (!title.isEmpty() && !href.isEmpty()) { + articles.add(new Article(title, href, "")); + } + } + logger.debug("Parsed {} articles from news", articles.size()); + return articles; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java new file mode 100644 index 0000000..cf8dc94 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java @@ -0,0 +1,29 @@ +package com.example.datacollect.strategy; + +import java.util.HashMap; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class StrategyFactory { + private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class); + private final Map strategies = new HashMap<>(); + + public StrategyFactory() { + strategies.put("example.com", new BlogStrategy()); + strategies.put("news.ycombinator.com", new NewsStrategy()); + strategies.put("weibo.com", new WeiboStrategy()); + logger.info("Initialized {} strategies", strategies.size()); + } + + public CrawlStrategy getStrategy(String url) { + for (Map.Entry entry : strategies.entrySet()) { + if (url.contains(entry.getKey())) { + logger.debug("Found strategy for url: {}", url); + return entry.getValue(); + } + } + logger.debug("No strategy found for url: {}", url); + return null; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/WeiboStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/WeiboStrategy.java new file mode 100644 index 0000000..f76f64d --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/WeiboStrategy.java @@ -0,0 +1,38 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.List; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class WeiboStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(WeiboStrategy.class); + + @Override + public List
parse(String url, Document doc) throws ParseException { + List
articles = new ArrayList<>(); + + Elements cards = doc.select(".card-wrap, .WB_cardwrap, .feed-item"); + for (Element card : cards) { + Element titleElement = card.select(".txt, .WB_text, .content").first(); + Element linkElement = card.select("a[href*='/status/']").first(); + + if (titleElement != null) { + String title = titleElement.text().trim(); + String href = linkElement != null ? linkElement.attr("abs:href") : url; + + if (!title.isEmpty()) { + articles.add(new Article(title, href, "")); + } + } + } + + logger.debug("Parsed {} articles from weibo", articles.size()); + return articles; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/view/ConsoleView.java b/w11/src/main/java/com/example/datacollect/view/ConsoleView.java new file mode 100644 index 0000000..987b617 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/view/ConsoleView.java @@ -0,0 +1,42 @@ +package com.example.datacollect.view; + +import com.example.datacollect.model.Article; +import java.util.List; +import java.util.Scanner; + +public class ConsoleView { + private static final String ANSI_RESET = "\u001B[0m"; + private static final String ANSI_GREEN = "\u001B[32m"; + private static final String ANSI_RED = "\u001B[31m"; + private static final String ANSI_BLUE = "\u001B[34m"; + + private final Scanner scanner = new Scanner(System.in); + + public String readLine() { + System.out.print("> "); + return scanner.nextLine(); + } + + public void printSuccess(String msg) { + System.out.println(ANSI_GREEN + msg + ANSI_RESET); + } + + public void printError(String msg) { + System.out.println(ANSI_RED + msg + ANSI_RESET); + } + + public void printInfo(String msg) { + System.out.println(ANSI_BLUE + msg + ANSI_RESET); + } + + public void display(List
articles) { + if (articles.isEmpty()) { + printInfo("暂无文章,请先执行 crawl。"); + return; + } + for (int i = 0; i < articles.size(); i++) { + Article a = articles.get(i); + System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); + } + } +} \ No newline at end of file diff --git a/w11/src/main/resources/logback.xml b/w11/src/main/resources/logback.xml new file mode 100644 index 0000000..0ca4943 --- /dev/null +++ b/w11/src/main/resources/logback.xml @@ -0,0 +1,24 @@ + + + + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + ${LOG_PATH}/crawler.log + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + \ No newline at end of file diff --git a/w11/target/classes/com/example/datacollect/Main.class b/w11/target/classes/com/example/datacollect/Main.class new file mode 100644 index 0000000000000000000000000000000000000000..46c9dd62058f1c0fb03d546cef176c9f19334e7f GIT binary patch literal 1597 zcmaJ>+fEZv6kVq+Oc`5R%S8~ZgLr8vbSMHUMU*rlF)5f>QbUXnhv~5lOlO+u0P;mX z`$7|&_yK-^zv7#?Pg`t4na;zx?zQ%seL27W{`d*t1>UL%AgCY|!4QTS;>Y}q>lU}G z`uoFUVR#I~o2G4gTMWTex~w3~kTRT_F23?w-4c3*d)#m=i){5XQ=IDs$8I>5DAQVn zBFXPCtolJ*QFj`q=eQU8w(FUOB|iK=QK3rsD8sTJyy3ar6V(g7)LrawLz1b8O6XmN ztS?l!XQm;g&jE6>*HoV2m@Iq>6svL9+9{C_lA%8Q94IQ8FCcb7hd-Yq=EV z?um*COi9Jl44?fK-^|SbmA3|lG=?$5u=Y_{6cwR)j#enX)4Ji)Qs=o`dQRKFs0-~# zSat1-<7#AB<94Nym6h-yf+QX?MCz_-d!;I!dEySIL(I%ti1B^9*5E z@Ji9N1;cnM-ES=;@Faq#l6?4x{8>Uliec6-d%N7U8A1i8LX4$tWMlV1Q#K7cf)X{Xc7XdYTi^NXJQP2I(?PuUZ_Vm_iKmw9=8l8phj# zwmMF+j2zj9@d_(=P9o#9UnQ#mq0(5VH$O<&Bwmt5sLil}JpKD7p=Q9Y$aGl2YXyn| SRY6oiT%rB~QEkx}z?*;1lEDN3 literal 0 HcmV?d00001 diff --git a/w11/target/classes/com/example/datacollect/command/Command.class b/w11/target/classes/com/example/datacollect/command/Command.class new file mode 100644 index 0000000000000000000000000000000000000000..560e01c132d7fdce6ce575d0239c7a09309adc40 GIT binary patch literal 211 zcmZXOJqp4=5QX2ypVeq#Azs0xaZ3wJ!Ai6U*jTNbAtdZ35;l^vS$F^sB~DPVup0R0 z?Q$-#_tnUS=B>s3p^Q78cR@Ux-fvVNx}_u`~0{cYMFYGbtR!F%2K+IF1tn+5hXJK&s~3Zg|i4 zJzwBRwzLmnIpj^HkXuX>?2|f9;k3Z9s<|l{qh*FmcDSyh4NW%;1TMCdZB?f{&FDC* z-0Y2P&$??~3goE$c6%CHrGCmk91sBap+MT<+^3bAkc7K;M_ipv#hC#>D=)v^{OP& zFDzLLZO={N8gA&U;+srDMTT>fWm*MBvTe+1m=ZX#uk~@=4ehFI(l&`3DSV0<4YzdM z#vSI$^D9Q+tX^8& zSct8B8XgE->Xbgx)^Pd2d~EC*gEOus@KhugTp8t2eZAnX@Zquc5D`^dbk?V>Yz zz}xALFjWbfE{xYZQ=A2oJkk+uI;rsXIb!?vKOj}N4VyDVp-M8d@TjguG zc@|G7w<&(t^THtX&Dxv{*S*ys33B#DoSa-!8IeSV2t)+Anh}{@=qCnyV5hKwO%0A} z^D0r_$HI2EyiIAuKE=asuXTQIZkGmQZ2lpi45oFox&Cdzw)3&gk$dvoIA z=ZuZn;$+cuoU$ECf~?QFuJof8mK;Mh_I3+B3R(5?H7bka;udcv-bm4>= z2z(vV>OqZGzI9v^yzjU%AApdm_@&NL?)2)let!2w%pc zVuIYth2^n#-M^qe{~NUQ;cXmwJ(jpSr%vNL_z>8}86Nt_w{dP>Q7`|AO9DH%tgf!S z!s(d0jgKk&3WK}Hf>K2jBbdf5j(xSxW0EFS^izx%RNsS>jPMlWIzx_g1o}Mk7$t9x zQWrSWFEWfAB`?wSES~aKp)^CuJQ4c}kBQf9VpPTgcaNgolL*7*Jq(wzh$V(Sf(Q6I zO3hR1P|(CWmdXEZTHQ_bgZS>rE&B!1Np3 nRe2RrWHI=C)M%1+2rIC-4sf?lj@9V92A8xH&mMk6faCuH50s9- literal 0 HcmV?d00001 diff --git a/w11/target/classes/com/example/datacollect/command/ExitCommand.class b/w11/target/classes/com/example/datacollect/command/ExitCommand.class new file mode 100644 index 0000000000000000000000000000000000000000..f6129fd8207690e609b0b81b8fd5c5cc843c3671 GIT binary patch literal 908 zcma)5$!^n76g@9a4bDyt&{`&p{z*9UnP=KbRXkrZ|f$E_=mToBHvHNCtsQgr* z^el*j^tpg$J6;84RCE|7uD}#%`$^=gxr~lN<&I=3{Ui)o<f>T}Q*jCYl2FWx)i@qe&2_gPHHE z=~Q6T?w?INNEsV<9nZjZ+|Y5;goRswUC7{MnyQE_>{pJb8bS-UxuEX1ev{b7SneD2kNZsamwf1=jaOPW0mg9 zh^P(Qxp)^$P;=$aQYJL*gNq3{Q8^j2b^SkK0b`KNCcfGWGUnBD&7Wi!k1Z}!0Rp%A z*0P1-N@~>CaF^#jT6>HaD2>)vh*tYE^pCU(xX;sKE{IS>g^xirpt;D#18mb`U5+Y< zwu6UD%nqesOslPbpjd#Fa+j5Zn2YcdMBov2>B;E4W_Cu$YJGxU+xUX3+BaPLmr|W{ X8d&Fgo>8*?P_kgdAs+vOynFjM-B2-bpnZuFizY zSt!p5z0ta?Eba}Hc%CTf^LbE1gVRo!q=qJ8yP%rCae`EwD`#(!s{OvB!ny7{8LaQf zWvETqXc=f**g}WkUbh~hK1;(`zmGp9ge`Y;aT`aP1Jgm*H*klAtYc(<-ghCCue4wH!_Y>vWM9xkG>Oh1aTVvjva0-=17hM8SBEBYjybCI<*{^_~&9R>trD)pg= zn6>S8uQtg5xs<^|Goa%hUj^hsMLOlut^b4x=25ap19=c;%A1#pzshbDPk4$-Al&BH z&L)XxvCQWU+~xHiTL&C3@ox5hfOY#g!{ZU7z<{opw5qp2W)j#XgtItwm2_m zC1!gJr{LV>oj6vuZ+_-&MOMvSjhw_k32gf{OQqp(RXN*a3cU@3bijdk3N*7c}QPss@Ms%XVj*KDcU$s(_ zN?r5=`cYNy*q|bWH+N?4+&SmWnQQ;~`|CG=H}Fj4kT+1Uu!SN+?VQiKr+6^-J`T@? zA2Sr+%0R~N81hbYP{KA!222Y(uozl?IPt_4pIj*6jd;xcP$@d)QGUXMk=K*cxSP%y z3Uetg8QfmhDZTG?!(bXJF`#)z(^V`~QDZQsBAQEIFucwLSd@6(h+irZ_0tobZP>)F zNQ;R*+%eFwu#dY8?JOmmffy=d5uYe^62;P2V#;7U&0gZ!ES4&{ZpXxZJTUOk!XrFp zaI!eoZ%bDk$?1jS-x%u7I%;!Z;DF&U3yt!Lc!fw&U>ET)2n^EvfwwIC77qcKs}H*7PQ{cslf;=R<$Yb)e!{i=9M4VVMN6N-H$ZpDZfP(49{qFravSatL{%Qw{-)d&PSHkzqDgTFB?79Tj2DzS zNRQzqUeSsG5*n7wG?Au1gtMSIrcv+Z?cMJ*q|n7AwVS|M>GlGSp$V5VTHyz(&S_Zp mn#5hBe2e-m?)_J&{{=vUl#o2m_OubY@tr-R!>a z>&JfTXWI(Jp3~EBp3~pdM0->(>xPpklR$pn}GtA_$=FW_0`6v{Gs!;<~ zL9L1{s1rDpv2#g%Nz3I;JvpU$TE;d_T1;ka%X6gcBnKRAk?(=^9f1gs`w8!55%YGSaONK{rj)>vVUbP14;ZaHTv_ zNnFZ0=IYLZk!CcpQmFClw7tlV(_CG)(S>fb7q{w(GCg;0&Hu@Pdl7 z7^8n1kJ3NGnzlEmT12!EwD`D8%6S#zxInj=KY_i)POj;d9*tl^VB2|X&axM+jTA4b zxGedF7d6LVFXwGSKI%h;i^DY?PjGPvpW_j)g*tOUe!LEmP5*Y zbd@X4jZG&I&9e%Dk6q3~3q9c34)`3~P{BfsSI_0aJ6`RJ=6*JTqowEzO&Eh@TFk;7@-J<^Iy3lpQFA z%HdS@D=2j=#heV-Q?y!_HY|N~J~yd5=d?+aJ1oSEW{zu)A=ib?us35cJBJ?tQT}cr zno^i)0{hFFC^PnWxWHc^UiR=t7ui`eY!>NIrD6FqB~}_SIS2G5R(n`;vh;7;CMlNq zxv5~F)sFEton!RX}P_*}skvTwc=*y9gNf0`QBf<31vgQ5*uX}#2bEH1BtVgNSm z$Hd)}w(Yt3eDO#vRDV&7?mKwyT3+8oy*OT*42gn?0nIeW3{PhvsHZH8Q&5(??kf0( zy-?!QW>gh?C(!y}RpvbK3%fXBD>!dL*dfO(rI`FSg1{@3E8uX=uNlf=?(yd4)8*=C z$|1f_cCSEm{|RMc1=Y6@?M~c6J=YBri4`<0qlNO$iA2u|9t#%j{$fwC=#W~SlyCZA z9-r-q@Yz7Pt)$UNsR?apMlZJWifX|SiOy4_Er7=Y7P%r3pC)fKN=vv#>J{?qra<>J zwWXV}?%xqjtfGt8{3`Z^aN!=Z{LTb+61UYyXbMiOBgF73uG5~tYnTq~ibmS4;;Grh z9YkV>eu0u$#^I5kWu$&W_-0S+>17-r?F}7V#$aFgR}9C}E#Xy+h9HSZ@mvM|M!c`G zrScA1rNWDH_oA;*-x9u!E5I_en=O^StH@SB@(n>o>M&~g+{JXYGful1r@ds~i4Jt( z0J?D;2}Y}j7hf-~Vjnc@=cGR1`*ti~z3$sAUdJ2sX$!BRHz`%n(g@zd4Ptd*5N~rY zOp9?p29P2J8Q1?_QAJbqY^}sM08eSy0Kjc~&ohM7lRK*O} z-v3*580!oW^e Uwr`l7!>AMoILt~YPqdox zVI-)`#rD7E=?XJb`f+8RLPw1RLp6|z=uYFI(0$8fs0AwI(SU1d^)gv76PYqx1%I!; z-m1mm#v)DmID`M%ouL@=X-bdY#w8ibJvG&#xRZ7~Z=HW(i8z7%L3?r_A{>epQf<;U z?=+fou-EK;hvLFr%$&QV>kAjE*ds-L?4w2u%PK8d_sG))+(z?!4tE_^qM|Y^?08}O Tfw@Z*cMxsUtnl#aIDqzCq~ct zPzfq?vCVM)_dNI9#A0olTvv|-L)n*+xSNGTVfvQHQ1NxZ)qopm^(vW5CNieI{r@&< zGq|CMV?IvNzxHOx2YeROV{PMt48@+FnLyk~J6!eq8ZFDiG3?*jlL7JIP%M$^khXcJ z(OiH%r|&xym+pM(+$CLKx=_X*De_|<6=GOcVa2*dmM-8nnr92RKVbzbD#F5!mbNdL RyMj?EsyIkTtdTE+`UiXgjV1s9 literal 0 HcmV?d00001 diff --git a/w11/target/classes/com/example/datacollect/model/Article.class b/w11/target/classes/com/example/datacollect/model/Article.class new file mode 100644 index 0000000000000000000000000000000000000000..c46aa021e13b649f23fd075b0564ead3b56f01ea GIT binary patch literal 765 zcma)3%SyvQ6g|_Xjg8UPw!UAu5Zn03QbZ9%a8-2C+Wj;Rb;>l6I2Auj!6LZu1N;_Ta7tg%^poD+NH4h@C2<7*qJE8oGSj#{OhMbn##B$@0zo}yx zc25oCyo;it%MM~?>ID~NQ`QU3u_wtcpuQAorP8nvpLbuZARcrNeQRE;^+{TK*q2%)SNlr5w#DTbyMv6PK z>d5$m9r7SU|C1#f$Y;vR#-JpUF%&J3si@FhrApDXMX~C=gL(Ty*xtbqbKc<)rB~`_ zut@h3AwbL)3UnOI6L*E!mZ3ZvLrK`jhd?Y6w<`7inY;ZpQ4${d2g^^ oC`ni4lV&lYcGH0oHN*Nu@Z5iJWg>WEBDnM)ygU)Sndsa40iY^|ssI20 literal 0 HcmV?d00001 diff --git a/w11/target/classes/com/example/datacollect/repository/ArticleRepository.class b/w11/target/classes/com/example/datacollect/repository/ArticleRepository.class new file mode 100644 index 0000000000000000000000000000000000000000..732e8649fd96c7f30af3f1decf2745d36d3cea3a GIT binary patch literal 3143 zcmb7GTUQfT6#h=QOb8=FP|(K9fHy8`s-;*AwIWqaMQMd%(cUJ>5C$eQ>Er}KYw!2F zZ6Dgzr@s5ptB9`E_NiT~KdP&H&kSL>=t8oXoH^NN?{9y5e|zTN|L*<);26G*A%ZFe z)hcRGE6_Nt&+17_xAV!7%(Rh}0=507ZORh@RUMt<3Zeqdo>5tvR&u~~_4#45Bx6ue zRfl@03O1=|z-ED-tW!uDb9$j@8OfY3^{iuA$AMAU^E4`DDqO?3qpudwZ!UwhAN$q%;aesYyrEb2-f} zTb33$uSYAkDcG*!5hN(kY7YX{S9RAG*xKP^@HiQju4(5}%)?HB_GPltlvrN32HbqP zVA%5XT-GQ`)3Kx2EzlAqsbzKBcBGauJSZ{jKpXR(h+z-*D%huDKMn}&yB}`_CucC{ z9%V#S-jHLavGtRvfEH?{X8f~%BR3)hgBr;sDNUYP7`DEUbQbr zaa7>IDi#9*wX3F_(wN?PO)`imdIWYZrRr^k67~%1(TigWo>0+;;{pvUi%T)7N~;+6 zyD-&BSo#H;268zgw^YO9+VvY;#}g`^!byReoRKN#3Gq553$e50a-4hi6c=DnV0&=J z>*_(jPpN)XY6g8;#Tg6{UnTPzu}w-y^LARrGpw2#>R{cCVwkis$VN+jP*( zX{Mbs=9mwygCX*&laEYre_s~p+Ylw|k)+^7fjwc!6ltU^MWjj}@w0fKd%F8NV z!K(tT1%1XCbnL7yFHxJ&sw8z=lA^b+OfL{r3|G*`jAm8jzzkDsuQ3h^$98!YQ`|z) ztYb^vw29+s%qXx_6hNwNU9IOcdY0#nKplDZhXe^N%*vppmrAM5wX=lIjGB2{mt~ib z_4y=ueZnKDe|SS`O`S+l;RZ>-0-J|T+qh6JWDIvq&#-)Hf@O7UTz5^r4>qgilu3;B z-Z#L5#wbPW>bUR86@!F&oP;09n$)wwQv#btX{67b*NXw*TB3HqvM8OUX;fi};H1`U zJ`8m=S?V@w36bW+NFp0!owVsm<6+n!xkUT3R`Aq|jXGsFYn(B;PD_(~`Ehlal|i%h zl;g;fboJtSLryulQWPJOd75RZIc2FilUl~p%W+L?uEobOe1cCEe8z(LTww3Az)XAA znK6>SGJFkHn)kbK!v=nd^!$&|po!2}qn7|$ z#ra(W0*<#^M3mO_Dnr-TXy*i~XazKOEkJbLgEFyzy5AA|*|T=($}d1b9jeK5iB_s_ z4~a6}k=`*I_*x`iYe_#YYt&CVaSxIl??oztx~>TXQqrT*GxTV@i4hN>pZpp)6qFC)$sAftffU5u!p3 z9Qy>nPQTMLkFM-16HwC5eirexK)=5_E5NCpOWeWvMU48X`4Qrj6tJ6!*C?Sj`m~4A zXb+;rD7hPWgW?E&^ak;Iyn8BICEl%7$oGYi$0|k4;myFqO^VJ%97_Cv#ze)0y7Nnksx{B5@Zl0SnN*>LnJDiQq5nVdDIX`Dt~c30>%>Pah72(i*B%X9#b3 zX>oN3O49)XHifqXc;f_);Wbf@)l}VLDAXyF5pbSE?$2J=C5evwiBW1$0)+Pv=_m9c%QymQXkR(2l#^SVzl}aH}MU& F{|`Ta57z(y literal 0 HcmV?d00001 diff --git a/w11/target/classes/com/example/datacollect/service/CrawlerService.class b/w11/target/classes/com/example/datacollect/service/CrawlerService.class new file mode 100644 index 0000000000000000000000000000000000000000..6e8c8b70e8800c1d3315b050d51cdec078edd75f GIT binary patch literal 5511 zcmcIo33wD|8Gir0X0uEPgew8kE-j@=HqBC?1+s{y0SdT*nm}l*2$S7OGGw!p&dzca z@vKL^s8L1-y+-Jyi&z zNL(rMN4a(XHJ^LXUP>+Mbkdb-P~$LY6*aW(?c3 z_6b}bue80mD%x(@Mk1wm752rX4O3OrV46Tp#@6c26Q_wRbWkHyck9f8|i^q(|T<7J(Sddy`CQs%&b zZVA-JO=}>UN%b$=6^%P%u^{FPG;A`%dF#UcW;UJVrxo7X6>kggKQIO7;d}*Q6$@|y zb39&#fNzgxr3Ef7V|a-xE+S%0Jqi{IEU6SGeQ!b^u?;hw&;Hu{KrJ!1NX5ly6qqxt z4e1?bI-%Ly4117o8Cy%+8G%J*tY|7RBNx#SE=040ZA4&Jxh*|GTp|#5!paonqY-Y@ zY+D~5u^E}@{sT=RM6p=Gr7D(SslfDN>Ncc_EE#rXx0cH4oBIW3H*P33CAT6bBYc^P z%dwoXG}8U1Kw}w`t|{@N5X=x(qD?_e#VWjjh?d#el`*p;(T(zx+7o6v?UZ738JSHE zB-{-QqFl(()1nvRMGD$gT!9XOnMJ9^MJ#FJR$zW(sgSgp)HBgFW+FSRr)>&ai**XF zRIwf#Cad^dQMpOa!&tySqFEVTV09VWOH3c9H5cTpZKR@cBV)5hUyQhdP8FN5IiDqG z+6^ReGu6_s%^H@z$xLs`rc&d8l#lns6x2fCB`Uhm%}Ub}61)Oi8!Hyv4a537}qf&r1K3Dx0=LlpTNp;2`UytE`Afk zC+Q$L32PSV;3|oP0vQCsda7~8!n$RdmZYi>5*SorU>AD@!P!aYv;kT>lNc=;lcNA> zeoDo#WHrAdLjg-~MsZ<@wrd3B+Edy5Mr4)lUzc7nepeGDvTiS?E*nj_1@D za%m_8Ac&iY`0=_lN72%7j;=Ey+=v@OxCQT4a8SjqxJ_VLr6x34k+RGQt#(NfwRu<- zijWQUy#g0?5ebIAo4!w|rlM%+`TJFTz`HI^R@@^VJ0GXa0w^aj%^p)x8Nfx zJ}R^3=aELUg1Cdo?vSrkq@_}Yx|NbYE(R?kMTf~jDz9~E0r(0VjOq~N!LQC~%mLb3A27UIR z!G5r~vaYhHQ^v#ZP$+R<8owOblrp>1l1QORGa)dyavKu}=9!pO;Z|A^X^bRYsC;16 z^XI1MSmvam9)ml>C8b-ASGaCg$&y03aon4=B>GEcb6(Qod?qH1VA*a@!93uTLOUep z2`3`YE~w?{DQ=c#Yu`vdV=KpnW>v-14HF2d{G}kani{LSdF>eL)JB}_D|l!ODZ6eF z>D|UaTC=kjCnTE$|0(d-xC-7?pPs%I7Z&-E3Hff}H zn?t(f0(N7P5^o6wqPw*;eYu6MN9(n8GNor0#!Yi5I})3S?vjx)8@CXOWtNvauTK=w zLE`Q;@dpKeRPiVLS>S>RF2%LszNAKbwf_8-b;@w*w+H_J?oI0Zm`OLH zrJ)%!?ZmD0V3wOLghW8yz&nZM^dLQ>;BRbB6Jk;>dkUT!b8s4?Hp0swjLa=HVnq!tOSV;uOD8fTKn@Z&Hzm3Oe3LI_o%Nf1ie;fm&81katf2Pe}?{D*6?-g=t zq=C!jv^9#157M&mOk_`D&mqidqb;vGj@L;SYTr;FIDt2vIrQurx$i9zY3C^3=A(l* z_SU|$-gg4;I>XQ{#?9oFg!4Si6Zly?gokl}VU|JPjRt<6$AF)YYKFRwd0&KuXk>sQ zVS<~9EamJ%Jy==f#$G7nj2v4VP5QPWDC zLmM7J43A?qo=NjD zXk+NwNWo7|x(m!dV4f=w|4*ouazw+Lf|u*#+HI@7P+UM7DIS ztDFA$>Ts@^)UxLoRxy*xH681A>b$87M>iA0IrK>ARd61C42e@M8DzuL9q!o62(1Z^iwcq$Vu)J)a!(_62-l2+ zw;1}G$<^x*s)*qdt|+*QYZSygAxMCOj1*PD^KN>>(bt97+X_Z8Mqb?WsI03ef_Dth z7*t%B@Q#9aMVTQL)DNN}4R^j&_No#l7_Mk`N!53Gsch%D5Bz&%51rLcdp`~zk46Ow{Unw$N3*!<RufJ5<@unoXK4`xSnB8lKZy|=FJ-sHOK^&rA+)?^54Qq?~QmBO;y89F+QgKZB7Ho}S;F#S(0^;h|uNqa| zbwW1cB-$Dbg*e(RNnMAbtd<*aP9BLkbt>0M@ zO4wz%ei}u3!%p>Y)qxvnZrLbW+^aZ5RPlci^31w%009hT6z zU>if!XIjAU4gC#>UPCWzgvRu+()SU~RQjcsh}Mxku%Btr0#0AC4SE>!^i}Ex&`_Wm zgN}6y@`#3$WY)t8V~_t3sL8jsbWq@#&w4e@kqqIDnr49~G3MsG6wflKMOL|YA) z(^8@>K3u~{T24qcjQ@a(3EBVc8+(Zxs|hI~$KTt-&3&X>@bJffYQNxCd|_-KA2Yl_ zZXfp}82gD1kNOFZP*(@ykm;%ILJ!8!iwSyer_o1!t)FZLDE&d~(5oqmx=)r9WL3ll z{hKh2%P?pp)5-)ku|2(O0!hYW*eDYaUF7)~4&~v}z5uJf=h0q; zR(cS5g+2+?Z_Y_b{Eh5iKRJfjizGT#402OlGmf(8Hp literal 0 HcmV?d00001 diff --git a/w11/target/classes/com/example/datacollect/strategy/NewsStrategy.class b/w11/target/classes/com/example/datacollect/strategy/NewsStrategy.class new file mode 100644 index 0000000000000000000000000000000000000000..b06e06587592a63a48cc596cef1fc663ec3e498f GIT binary patch literal 2370 zcmbtWZBrXn6n-uV>=L#V0u%z(hT0c`gslawB!EQv-Ag%R( z)IZ=?UuHtFGmfAANjg(Kce5eO+jJa1u(>bioO{mmJojAweD&M!0B+&?2$~R55Z2I) z7KV;3zQc8s7xVhk`j*JL3@wvJ(Qv03LPLoa1u8?AXH<3#Q=hSIUdb4a8-aq5iZ+I; zqeZdnBn?*-;(R1Nx+!?hG>Sq_Hg9_0u{e+59NHC}*U*7ZhCa*A>syXhF6l)pCmj8b zl`R)UQJ(Jz(@R7j$~3ZDc5S1WPbV@B$C{l!XA0@)q$B7;w}KuG7tqTPJJXUurEG2P zS~kOlp+xN%uf7F;J>7;F`V|alxQI6xq74Y@CJf3ZcNWDSK)$dmk!DU?4a1GZfh?kZ)+*>?j~7a&&~x17S<94U=>;ND zs>3z1rcm)710op1`%-2T3=^khbCTSXD~Xjhq%o;rO2afhU}$gfAXP}n^IVRhCm6pb z}^f`(7z%>`eao`8G{(&jQADEM5%3LX+^QcJm3$P5|XP zQY#UxVokx98ot8UzB+>u=XsVcONF5|FWedgB!*z{S(7`Ce?5bqB=>I_Mqdm4tj%}L z2KM1u5){Mv3^kI)a$#NA5BR!CE5t>Xn=9NlalBiSx{g9wDb?YeI2Lj8Ok9s#`}8zbaBi8J!uDNW@~Mf; z?PW#DHLRkeV2|O(Srm!7o$lSLLpRdgvXL)xw`>zp`Ts@Ke|8(=W9|G!O5C{Y--rNmmILsbl?)R8b5NHhSG{s4CA)s#iu zC(CiN%43uMUzoxb7&KC8WgJ`BroyN!zhpTwX;(s0K>Ao2 z1go~^(q5TXx)FMXUIo-|x)j9z#=>9Zze77=vfZVK67(nJ_4JUHLGLV}H9GSGU;K*O Qk{K3Yrk!Trd5+Ki1w9{h*Z=?k literal 0 HcmV?d00001 diff --git a/w11/target/classes/com/example/datacollect/strategy/StrategyFactory.class b/w11/target/classes/com/example/datacollect/strategy/StrategyFactory.class new file mode 100644 index 0000000000000000000000000000000000000000..2a547eb545040418e291fcf48ed1baad71b3e083 GIT binary patch literal 2392 zcmb_dT~`}b6y28uCM1JE%0~-Q0gL6MkcvfWL#vjSk~V;v76i3*k_=(!FheI3Ag%TD zSM<>*U+ZTZS<5edhl5*U!HIn8sIOG$5cL zsG<=~0&SbxjutaDD;HZzZ|WIWplQai4EKUSV0dI%K}ew8+f;B3Gd8c~Hx{(*Fcbuu z5rL|pMa2tf6*!%-w_^I9wzX~Qv8?858QU}&F_w27&DC?oSju0_X&KjciUMK(k)h`W zT9ef@Oc4*ET_EBIoM3PTM+C+X&2rJSbCpbCL=lLf3*8EisyK!oftI>7EOWcSs?QFW z)7MB5KZuMRghF^xpu^I4^AkmKOBvuF{};ZCe6`BA4}lN{1e$ksBW;(%yeuFW z|1R!|++U3|Snkg&Do$dUO{FL~-4Tc;Z6_DYo9kybV@VIA(4zv!5^TDmnZ{i`+kfwV zf3-7GzEdj3F(DAl^SS4YL>Q;>s)9)sui;F^Bc6H*i`|yFlpW11=u7JYUBii5>HHn> z2wumug0m{#z&V=nS!zsZSnIaH$=dF)NW;oKuRF_<>x_!?pnXHS{{0;ct_w!H6CZ-#AR7Z1WR~NMM?%}%IWSky{O;@8G3uY z5VISalhW@LbStBWv4Zy%tg2YU2X&KP7LW>MY|GUQi^JDjD{yU;_{dra9|`o#*##?G zu|=_e-FEs5j(Lt=kzKr{;$zvu#;l$$;Xa;qv|Y2};}CKJ zeT#N&UP+@Sdl&{b72H-~VoRX=Sr0w^*ff(!+3^BHb>*5`J|7=BI2fG5l#$~+7aUSg z)qkI6YCZ)OM;}%IYID|A_GD z4+uxw_7UZMM+uz)tawoGKKcl~R0H+zW025^GV~pWq9bx^31b3}5i4OTfN#Czcldvt z$2FibZJ4xLFi3h+T+O2mH_;Bqd+Jd_9xm?$3fS@K{!aQqVjh_i*wqr=6j(S_!exO6 znBwA^z!NMjj*XY_u3TRK3Cj;~R1*7etc05Z@CgNcg6zX@{{B_s>t$R#WauEOG&@MI z6O;UspGP;Y@X)?WU2mY*quEa~gBZdt_P8P`pRimNahH_u5mk9D@A`zh?t4h!PqZqS zoBo4+3gOeI-5B6r`D>0w9%JKs3`yODM@Tk|5BS1oyf)x-e8Fp&=o0CD+0^_GW&42o literal 0 HcmV?d00001 diff --git a/w11/target/classes/com/example/datacollect/strategy/WeiboStrategy.class b/w11/target/classes/com/example/datacollect/strategy/WeiboStrategy.class new file mode 100644 index 0000000000000000000000000000000000000000..1a568b78d76753655b6413a3da7a2cb14a619303 GIT binary patch literal 2655 zcmbtWYf}?f7=8{UoXxVJ#tTN&f;T`7YpvCiAYKqH8qgMms9usN6a`n=Ek?*9G!k3Rt%!}lth z(999i5Jn5b`dQVteSf^nc&rRG&oPTB8F`oof@`d2Sc=uKuv^!&lv8wn5UQtsjE`sH4R

lt3z)qAc>YyBE@9HM}wBZlPx2bilkj5j!r zXn0eKHZZ0tAhjK{px_t*Yoy(T^dyZBXh?>)y&1cpi+Q6^vV@*BJR@UUk|}zD7$lkLnHftcIL!bRCvZkme1u_O6~8N)weqBI zsukz(Hpi%j^SHp!UMGR<04lqY%`$9mv|nwaWZ%^Y#xTxtQNty?!$2imJ$Tn6B8vQi zJ}u0&O^EtCY6Hv8=LNwQ^(*UIxg)cB%0Hu$PB84aWH_$KcHOz#WmHplyK;_Q=$ey; zWayVQOktYvcg@?vC+Vn)E4a#WO~d=R&ak15{wOib_s4G;R#{xk$<`PRdc)^$AVIBh z10Ql28q&xRA0+V_!y{T|MY^14=xIbIU{eFmHLRvo2;?~O8fIVy&CzJ_tdXH!U0{gh zg;&LZ=+J0<*fLx=^6HyxSOs^VWy`q5~s=sVV z2F;fV1`W-}H75o`t7R5%E=h*uKqvl`9gCh>n~J_|h4NB#atH)`Byy}0sQEK)|Er3xl87>xp$hpgX?4ObI~j4-^vl0&i1bS?pqmC!6BuUc zf14ae^c~PlYkK|D)1sYDzce!u-v1cvS6VcoKu>uMdTkf!sZ|AFqeMG~kKm9IQf7-J z*Ggv(pMqa`jL7)Gs9J%R2*pAbL=)jycoAC}enwudb~5~hU5S=hOLR{K`zq*3a9SS- z)=G>&Lo&uIh*$7>A`(-cp)ICFk5=&3BZLkl)L5i~!H4LGKgEeFu}Dmfo_dVoMVxKI z^rII){)SZa-S{FV8J^(1MSReV_%9S)$j|5~WdlT^U=w|YY(|{E6b8_N5qfo>CvYaw zi5zxN#yfG3fV+>~_z8P_1akCFnM6Nac!-eHc9dzQlKv=eVUE6rMsOVSv{K0KWh~ID zMdA`*CRevfLt^?6cknTxtl$US#V3U1dOX0V_>AKGobDys)w_Fi_XV9yAp9I19OpPT za>QQX0#uGMj>8;nOUE3$YJZ1;lXUxnvT0c<2>e6tza+yUa&;eH)2a#IkkfCMSkdi= Z3Phq~Pmui`Daj1hz>Idjh47__Db>;jh=zis1feS2ZkBfGW;gC`%7-Y3 zAmH%q*Ax1}gAWR$WkARAnMeNx$I6@Uj-#G?H>KM)6(*C-o-^nE&N;vHyZ7eNKM#Kf zunV8W5JH23u!;p}WQd>P<2-G0Yb1SW_>8W342|7}Wq3Ud4V}rNf(XN68C3NQGhNWQ zW$8{7F)UP|s%XNZ87PJGuBVq7LWae#G%r^gb~tFJ@TYSvMqE zjG_fg6f9NIie(I)nq5xo=Xkkd>gf{qxMrIsm75+n^t0(c+j4DFFVZ`M;`%i*wD{P; z!ZV`s6)IL@6*XvAso&`$4?Bi6Rb&LKsfnKU7@kABg0(8vVLd@kH57R_0?mwWe|JnBk3z zCOvL>F2kmJdFFzN+Bd7%A_`YziKdkRLoz`kNG`;7>`<^%#S3_mVbS!E^M*^!D2AIY zSG@C52qz+ViDAR!msch~yFK~o^*_G3{pU}&9)ElP_iHyMZ{B(Q)vb2T;b+a?E?h5m}=$YtA}ti)?#T^xzeSuxnf(0!Uvu;qFzj7ky-p=#s-+kQZN-83kE}jq{11 zY?pL1-RpRUX6iBYW50?6l5T<=G(6w=nu^zjvyq%f9W#Lj928U!FzoKk&82UyCsfK5UXFk+(H&7P(^xyXFjg zqXtPRm7kBy**hvj<J#66U{sX)if z*g;;YF&8ZuBNq3n4pG}ZyR~Nr797M1cGc1JeTEoQJaBw%5xbCFdu`iuJ%?8g>fWeb zawDLVE#|93NGAf~SJs+G)K-3L4~77o&M>Vyvk z;%l2exZP?HecUt)hNshb)SS49`b_S+x~t%0Iv?irRIgM8HyGCZe<&RieqUHir&)*& zMWJgd=1R2JE9`9}C!Tl6@jz4V3%Pn01h@}#{ zCb0W%!=5&Bbu)a8R9D*ryc)tU=pP_{+QO$cPhcN?a}SZ1HCzp`JOBPF_zFwn*Ae9) zJw@rCL^G8i$40b?e+Bf}L+^b^$_i5y-$9&?!k`-}fo(X0G0AQJ6K?xqqD%x>gy@Sw zcx>`i$P+_D{BJP!7q;SEvO{nP$(3&&s`R^(a(qD18#4A*cSH?iVB Dl<02k literal 0 HcmV?d00001 diff --git a/w11/target/classes/logback.xml b/w11/target/classes/logback.xml new file mode 100644 index 0000000..0ca4943 --- /dev/null +++ b/w11/target/classes/logback.xml @@ -0,0 +1,24 @@ + + + + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + ${LOG_PATH}/crawler.log + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + \ No newline at end of file diff --git a/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..d178717 --- /dev/null +++ b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,19 @@ +com\example\datacollect\command\ListCommand.class +com\example\datacollect\command\CrawlCommand.class +com\example\datacollect\strategy\BlogStrategy.class +com\example\datacollect\repository\ArticleRepository.class +com\example\datacollect\Main.class +com\example\datacollect\view\ConsoleView.class +com\example\datacollect\command\ExitCommand.class +com\example\datacollect\command\HelpCommand.class +com\example\datacollect\strategy\WeiboStrategy.class +com\example\datacollect\strategy\NewsStrategy.class +com\example\datacollect\command\Command.class +com\example\datacollect\controller\CrawlerController.class +com\example\datacollect\exception\CrawlerException.class +com\example\datacollect\exception\NetworkException.class +com\example\datacollect\strategy\StrategyFactory.class +com\example\datacollect\service\CrawlerService.class +com\example\datacollect\exception\ParseException.class +com\example\datacollect\strategy\CrawlStrategy.class +com\example\datacollect\model\Article.class diff --git a/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..b100aff --- /dev/null +++ b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,19 @@ +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\ExitCommand.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\Command.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\StrategyFactory.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\WeiboStrategy.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\NewsStrategy.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\repository\ArticleRepository.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\view\ConsoleView.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\exception\CrawlerException.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\exception\NetworkException.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\HelpCommand.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\ListCommand.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\CrawlCommand.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\model\Article.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\service\CrawlerService.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\Main.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\controller\CrawlerController.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\exception\ParseException.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\BlogStrategy.java