diff --git a/w11/pom.xml b/w11/pom.xml new file mode 100644 index 0000000..79b3cec --- /dev/null +++ b/w11/pom.xml @@ -0,0 +1,43 @@ + + + 4.0.0 + + com.example + datacollect + 1.0-SNAPSHOT + + + 11 + 11 + UTF-8 + + + + + org.jsoup + jsoup + 1.15.3 + + + ch.qos.logback + logback-classic + 1.4.8 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + 11 + 11 + + + + + \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/Main.java b/w11/src/main/java/com/example/datacollect/Main.java new file mode 100644 index 0000000..b4dc6f3 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/Main.java @@ -0,0 +1,23 @@ +package com.example.datacollect; + +import com.example.datacollect.controller.CrawlerController; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.service.CrawlerService; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; + +public class Main { + + public static void main(String[] args) { + ConsoleView view = new ConsoleView(); + ArticleRepository repository = new ArticleRepository(); + StrategyFactory strategyFactory = new StrategyFactory(); + CrawlerService service = new CrawlerService(repository, strategyFactory); + CrawlerController controller = new CrawlerController(view, service); + + view.printSuccess("Welcome to CLI Crawler (w11)! Type help for commands."); + while (true) { + controller.handle(view.readLine()); + } + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/Command.java b/w11/src/main/java/com/example/datacollect/command/Command.java new file mode 100644 index 0000000..2ef4a45 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/Command.java @@ -0,0 +1,6 @@ +package com.example.datacollect.command; + +public interface Command { + String getName(); + void execute(String[] args); +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/CrawlCommand.java b/w11/src/main/java/com/example/datacollect/command/CrawlCommand.java new file mode 100644 index 0000000..cc9c927 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/CrawlCommand.java @@ -0,0 +1,43 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.service.CrawlerService; +import com.example.datacollect.view.ConsoleView; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CrawlCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); + private final ConsoleView view; + private final CrawlerService service; + + public CrawlCommand(ConsoleView view, CrawlerService service) { + this.view = view; + this.service = service; + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public void execute(String[] args) { + if (args.length < 2) { + view.printError("Usage: crawl "); + return; + } + String url = args[1]; + + try { + view.printInfo("Crawling: " + url); + List
articles = service.crawl(url); + view.printSuccess("Crawled " + articles.size() + " articles."); + } catch (IllegalArgumentException e) { + view.printError(e.getMessage()); + } catch (RuntimeException e) { + view.printError(e.getMessage()); + } + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/ExitCommand.java b/w11/src/main/java/com/example/datacollect/command/ExitCommand.java new file mode 100644 index 0000000..1d2aa11 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/ExitCommand.java @@ -0,0 +1,22 @@ +package com.example.datacollect.command; + +import com.example.datacollect.view.ConsoleView; + +public class ExitCommand implements Command { + private final ConsoleView view; + + public ExitCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "exit"; + } + + @Override + public void execute(String[] args) { + view.printSuccess("Bye!"); + System.exit(0); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/HelpCommand.java b/w11/src/main/java/com/example/datacollect/command/HelpCommand.java new file mode 100644 index 0000000..80a80c2 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/HelpCommand.java @@ -0,0 +1,21 @@ +package com.example.datacollect.command; + +import com.example.datacollect.view.ConsoleView; + +public class HelpCommand implements Command { + private final ConsoleView view; + + public HelpCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "help"; + } + + @Override + public void execute(String[] args) { + view.printInfo("Commands: crawl , list, help, exit"); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/command/ListCommand.java b/w11/src/main/java/com/example/datacollect/command/ListCommand.java new file mode 100644 index 0000000..1bcc30a --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/command/ListCommand.java @@ -0,0 +1,24 @@ +package com.example.datacollect.command; + +import com.example.datacollect.service.CrawlerService; +import com.example.datacollect.view.ConsoleView; + +public class ListCommand implements Command { + private final ConsoleView view; + private final CrawlerService service; + + public ListCommand(ConsoleView view, CrawlerService service) { + this.view = view; + this.service = service; + } + + @Override + public String getName() { + return "list"; + } + + @Override + public void execute(String[] args) { + view.display(service.getAllArticles()); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/controller/CrawlerController.java b/w11/src/main/java/com/example/datacollect/controller/CrawlerController.java new file mode 100644 index 0000000..dee257d --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/controller/CrawlerController.java @@ -0,0 +1,50 @@ +package com.example.datacollect.controller; + +import com.example.datacollect.command.Command; +import com.example.datacollect.command.CrawlCommand; +import com.example.datacollect.command.ExitCommand; +import com.example.datacollect.command.HelpCommand; +import com.example.datacollect.command.ListCommand; +import com.example.datacollect.service.CrawlerService; +import com.example.datacollect.view.ConsoleView; +import java.util.HashMap; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CrawlerController { + private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); + private final Map commands = new HashMap<>(); + private final ConsoleView view; + + public CrawlerController(ConsoleView view, CrawlerService service) { + this.view = view; + register(new HelpCommand(view)); + register(new ListCommand(view, service)); + register(new CrawlCommand(view, service)); + register(new ExitCommand(view)); + } + + private void register(Command command) { + commands.put(command.getName(), command); + } + + public void handle(String input) { + String text = input == null ? "" : input.trim(); + if (text.isEmpty()) { + return; + } + + String[] args = text.split("\\s+"); + String cmdName = args[0].toLowerCase(); + logger.info("Received command: {}", cmdName); + + Command command = commands.get(cmdName); + if (command == null) { + logger.warn("Unknown command: {}", cmdName); + view.printError("Unknown command: " + cmdName); + return; + } + command.execute(args); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/exception/CrawlerException.java b/w11/src/main/java/com/example/datacollect/exception/CrawlerException.java new file mode 100644 index 0000000..d9c9c2e --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/exception/CrawlerException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class CrawlerException extends RuntimeException { + public CrawlerException(String message) { + super(message); + } + + public CrawlerException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/exception/NetworkException.java b/w11/src/main/java/com/example/datacollect/exception/NetworkException.java new file mode 100644 index 0000000..0fb8e5e --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/exception/NetworkException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class NetworkException extends CrawlerException { + public NetworkException(String message) { + super(message); + } + + public NetworkException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/exception/ParseException.java b/w11/src/main/java/com/example/datacollect/exception/ParseException.java new file mode 100644 index 0000000..205665a --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/exception/ParseException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class ParseException extends CrawlerException { + public ParseException(String message) { + super(message); + } + + public ParseException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/model/Article.java b/w11/src/main/java/com/example/datacollect/model/Article.java new file mode 100644 index 0000000..b98ff71 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/model/Article.java @@ -0,0 +1,25 @@ +package com.example.datacollect.model; + +public class Article { + private final String title; + private final String url; + private final String content; + + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + } + + public String getTitle() { + return title; + } + + public String getUrl() { + return url; + } + + public String getContent() { + return content; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java new file mode 100644 index 0000000..36d2ebd --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/repository/ArticleRepository.java @@ -0,0 +1,54 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.List; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class ArticleRepository { + private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + logger.warn("Attempt to add null article"); + throw new IllegalArgumentException("Article cannot be null"); + } + if (article.getTitle() == null || article.getTitle().trim().isEmpty()) { + logger.warn("Attempt to add article with empty title"); + throw new IllegalArgumentException("Article title cannot be empty"); + } + articles.add(article); + logger.debug("Added article: {}", article.getTitle()); + } + + public List
getAll() { + return new ArrayList<>(articles); + } + + public void clear() { + int count = articles.size(); + articles.clear(); + logger.info("Cleared {} articles", count); + } + + public int size() { + return articles.size(); + } + + public Article get(int index) { + if (index < 0 || index >= articles.size()) { + logger.warn("Invalid index: {} (size: {})", index, articles.size()); + throw new IndexOutOfBoundsException("Index out of bounds: " + index); + } + return articles.get(index); + } + + public boolean contains(Article article) { + if (article == null) { + return false; + } + return articles.contains(article); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/service/CrawlerService.java b/w11/src/main/java/com/example/datacollect/service/CrawlerService.java new file mode 100644 index 0000000..63121f5 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/service/CrawlerService.java @@ -0,0 +1,85 @@ +package com.example.datacollect.service; + +import com.example.datacollect.exception.CrawlerException; +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import com.example.datacollect.repository.ArticleRepository; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import java.util.List; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class CrawlerService { + private static final Logger logger = LoggerFactory.getLogger(CrawlerService.class); + private static final int MAX_RETRY = 3; + private static final long RETRY_DELAY_MS = 1000; + + private final ArticleRepository repository; + private final StrategyFactory strategyFactory; + + public CrawlerService(ArticleRepository repository, StrategyFactory strategyFactory) { + this.repository = repository; + this.strategyFactory = strategyFactory; + } + + public List
crawl(String url) { + CrawlStrategy strategy = strategyFactory.getStrategy(url); + if (strategy == null) { + logger.warn("No strategy found for URL: {}", url); + throw new CrawlerException("No strategy found for: " + url); + } + + int retryCount = 0; + Exception lastException = null; + + while (retryCount < MAX_RETRY) { + try { + logger.info("Crawling URL: {} (attempt {}/{})", url, retryCount + 1, MAX_RETRY); + Document doc = Jsoup.connect(url).get(); + List
articles = strategy.parse(url, doc); + articles.forEach(repository::add); + logger.info("Successfully crawled {} articles from {}", articles.size(), url); + return articles; + } catch (ParseException e) { + logger.error("Parse error: {}", e.getMessage()); + throw e; + } catch (Exception e) { + lastException = e; + retryCount++; + logger.warn("Network error (attempt {}): {}, retrying...", retryCount, e.getMessage()); + if (retryCount < MAX_RETRY) { + try { + Thread.sleep(RETRY_DELAY_MS); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new CrawlerException("Crawl interrupted", ie); + } + } + } + } + + logger.error("Failed to crawl {} after {} attempts", url, MAX_RETRY); + throw new NetworkException("Failed to crawl after " + MAX_RETRY + " attempts", lastException); + } + + public List
getAllArticles() { + List
articles = repository.getAll(); + logger.debug("Retrieved {} articles from repository", articles.size()); + return articles; + } + + public int getArticleCount() { + int count = repository.size(); + logger.debug("Article count: {}", count); + return count; + } + + public void clearArticles() { + repository.clear(); + logger.info("Cleared all articles"); + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java new file mode 100644 index 0000000..97b1078 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java @@ -0,0 +1,30 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.List; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class BlogStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(BlogStrategy.class); + + @Override + public List
parse(String url, Document doc) throws ParseException { + List
articles = new ArrayList<>(); + Elements links = doc.select("article a, .post a, .entry a"); + for (Element link : links) { + String title = link.text(); + String href = link.attr("abs:href"); + if (!title.isEmpty() && !href.isEmpty()) { + articles.add(new Article(title, href, "")); + } + } + logger.debug("Parsed {} articles from blog", articles.size()); + return articles; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java new file mode 100644 index 0000000..8905336 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java @@ -0,0 +1,10 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import java.util.List; +import org.jsoup.nodes.Document; + +public interface CrawlStrategy { + List
parse(String url, Document doc) throws ParseException; +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java new file mode 100644 index 0000000..16710e2 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java @@ -0,0 +1,30 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.List; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class NewsStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(NewsStrategy.class); + + @Override + public List
parse(String url, Document doc) throws ParseException { + List
articles = new ArrayList<>(); + Elements links = doc.select(".news-item a, .headline a, h2 a, h3 a"); + for (Element link : links) { + String title = link.text(); + String href = link.attr("abs:href"); + if (!title.isEmpty() && !href.isEmpty()) { + articles.add(new Article(title, href, "")); + } + } + logger.debug("Parsed {} articles from news", articles.size()); + return articles; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java new file mode 100644 index 0000000..cf8dc94 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java @@ -0,0 +1,29 @@ +package com.example.datacollect.strategy; + +import java.util.HashMap; +import java.util.Map; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class StrategyFactory { + private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class); + private final Map strategies = new HashMap<>(); + + public StrategyFactory() { + strategies.put("example.com", new BlogStrategy()); + strategies.put("news.ycombinator.com", new NewsStrategy()); + strategies.put("weibo.com", new WeiboStrategy()); + logger.info("Initialized {} strategies", strategies.size()); + } + + public CrawlStrategy getStrategy(String url) { + for (Map.Entry entry : strategies.entrySet()) { + if (url.contains(entry.getKey())) { + logger.debug("Found strategy for url: {}", url); + return entry.getValue(); + } + } + logger.debug("No strategy found for url: {}", url); + return null; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/strategy/WeiboStrategy.java b/w11/src/main/java/com/example/datacollect/strategy/WeiboStrategy.java new file mode 100644 index 0000000..f76f64d --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/strategy/WeiboStrategy.java @@ -0,0 +1,38 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import java.util.ArrayList; +import java.util.List; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +public class WeiboStrategy implements CrawlStrategy { + private static final Logger logger = LoggerFactory.getLogger(WeiboStrategy.class); + + @Override + public List
parse(String url, Document doc) throws ParseException { + List
articles = new ArrayList<>(); + + Elements cards = doc.select(".card-wrap, .WB_cardwrap, .feed-item"); + for (Element card : cards) { + Element titleElement = card.select(".txt, .WB_text, .content").first(); + Element linkElement = card.select("a[href*='/status/']").first(); + + if (titleElement != null) { + String title = titleElement.text().trim(); + String href = linkElement != null ? linkElement.attr("abs:href") : url; + + if (!title.isEmpty()) { + articles.add(new Article(title, href, "")); + } + } + } + + logger.debug("Parsed {} articles from weibo", articles.size()); + return articles; + } +} \ No newline at end of file diff --git a/w11/src/main/java/com/example/datacollect/view/ConsoleView.java b/w11/src/main/java/com/example/datacollect/view/ConsoleView.java new file mode 100644 index 0000000..987b617 --- /dev/null +++ b/w11/src/main/java/com/example/datacollect/view/ConsoleView.java @@ -0,0 +1,42 @@ +package com.example.datacollect.view; + +import com.example.datacollect.model.Article; +import java.util.List; +import java.util.Scanner; + +public class ConsoleView { + private static final String ANSI_RESET = "\u001B[0m"; + private static final String ANSI_GREEN = "\u001B[32m"; + private static final String ANSI_RED = "\u001B[31m"; + private static final String ANSI_BLUE = "\u001B[34m"; + + private final Scanner scanner = new Scanner(System.in); + + public String readLine() { + System.out.print("> "); + return scanner.nextLine(); + } + + public void printSuccess(String msg) { + System.out.println(ANSI_GREEN + msg + ANSI_RESET); + } + + public void printError(String msg) { + System.out.println(ANSI_RED + msg + ANSI_RESET); + } + + public void printInfo(String msg) { + System.out.println(ANSI_BLUE + msg + ANSI_RESET); + } + + public void display(List
articles) { + if (articles.isEmpty()) { + printInfo("暂无文章,请先执行 crawl。"); + return; + } + for (int i = 0; i < articles.size(); i++) { + Article a = articles.get(i); + System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); + } + } +} \ No newline at end of file diff --git a/w11/src/main/resources/logback.xml b/w11/src/main/resources/logback.xml new file mode 100644 index 0000000..0ca4943 --- /dev/null +++ b/w11/src/main/resources/logback.xml @@ -0,0 +1,24 @@ + + + + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + ${LOG_PATH}/crawler.log + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + \ No newline at end of file diff --git a/w11/target/classes/com/example/datacollect/Main.class b/w11/target/classes/com/example/datacollect/Main.class new file mode 100644 index 0000000..46c9dd6 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/Main.class differ diff --git a/w11/target/classes/com/example/datacollect/command/Command.class b/w11/target/classes/com/example/datacollect/command/Command.class new file mode 100644 index 0000000..560e01c Binary files /dev/null and b/w11/target/classes/com/example/datacollect/command/Command.class differ diff --git a/w11/target/classes/com/example/datacollect/command/CrawlCommand.class b/w11/target/classes/com/example/datacollect/command/CrawlCommand.class new file mode 100644 index 0000000..769f007 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/command/CrawlCommand.class differ diff --git a/w11/target/classes/com/example/datacollect/command/ExitCommand.class b/w11/target/classes/com/example/datacollect/command/ExitCommand.class new file mode 100644 index 0000000..f6129fd Binary files /dev/null and b/w11/target/classes/com/example/datacollect/command/ExitCommand.class differ diff --git a/w11/target/classes/com/example/datacollect/command/HelpCommand.class b/w11/target/classes/com/example/datacollect/command/HelpCommand.class new file mode 100644 index 0000000..6a474ef Binary files /dev/null and b/w11/target/classes/com/example/datacollect/command/HelpCommand.class differ diff --git a/w11/target/classes/com/example/datacollect/command/ListCommand.class b/w11/target/classes/com/example/datacollect/command/ListCommand.class new file mode 100644 index 0000000..a777af0 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/command/ListCommand.class differ diff --git a/w11/target/classes/com/example/datacollect/controller/CrawlerController.class b/w11/target/classes/com/example/datacollect/controller/CrawlerController.class new file mode 100644 index 0000000..10fe1ae Binary files /dev/null and b/w11/target/classes/com/example/datacollect/controller/CrawlerController.class differ diff --git a/w11/target/classes/com/example/datacollect/exception/CrawlerException.class b/w11/target/classes/com/example/datacollect/exception/CrawlerException.class new file mode 100644 index 0000000..3483b9f Binary files /dev/null and b/w11/target/classes/com/example/datacollect/exception/CrawlerException.class differ diff --git a/w11/target/classes/com/example/datacollect/exception/NetworkException.class b/w11/target/classes/com/example/datacollect/exception/NetworkException.class new file mode 100644 index 0000000..438de1a Binary files /dev/null and b/w11/target/classes/com/example/datacollect/exception/NetworkException.class differ diff --git a/w11/target/classes/com/example/datacollect/exception/ParseException.class b/w11/target/classes/com/example/datacollect/exception/ParseException.class new file mode 100644 index 0000000..e0d389e Binary files /dev/null and b/w11/target/classes/com/example/datacollect/exception/ParseException.class differ diff --git a/w11/target/classes/com/example/datacollect/model/Article.class b/w11/target/classes/com/example/datacollect/model/Article.class new file mode 100644 index 0000000..c46aa02 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/model/Article.class differ diff --git a/w11/target/classes/com/example/datacollect/repository/ArticleRepository.class b/w11/target/classes/com/example/datacollect/repository/ArticleRepository.class new file mode 100644 index 0000000..732e864 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/repository/ArticleRepository.class differ diff --git a/w11/target/classes/com/example/datacollect/service/CrawlerService.class b/w11/target/classes/com/example/datacollect/service/CrawlerService.class new file mode 100644 index 0000000..6e8c8b7 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/service/CrawlerService.class differ diff --git a/w11/target/classes/com/example/datacollect/strategy/BlogStrategy.class b/w11/target/classes/com/example/datacollect/strategy/BlogStrategy.class new file mode 100644 index 0000000..570cdda Binary files /dev/null and b/w11/target/classes/com/example/datacollect/strategy/BlogStrategy.class differ diff --git a/w11/target/classes/com/example/datacollect/strategy/CrawlStrategy.class b/w11/target/classes/com/example/datacollect/strategy/CrawlStrategy.class new file mode 100644 index 0000000..b092576 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/strategy/CrawlStrategy.class differ diff --git a/w11/target/classes/com/example/datacollect/strategy/NewsStrategy.class b/w11/target/classes/com/example/datacollect/strategy/NewsStrategy.class new file mode 100644 index 0000000..b06e065 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/strategy/NewsStrategy.class differ diff --git a/w11/target/classes/com/example/datacollect/strategy/StrategyFactory.class b/w11/target/classes/com/example/datacollect/strategy/StrategyFactory.class new file mode 100644 index 0000000..2a547eb Binary files /dev/null and b/w11/target/classes/com/example/datacollect/strategy/StrategyFactory.class differ diff --git a/w11/target/classes/com/example/datacollect/strategy/WeiboStrategy.class b/w11/target/classes/com/example/datacollect/strategy/WeiboStrategy.class new file mode 100644 index 0000000..1a568b7 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/strategy/WeiboStrategy.class differ diff --git a/w11/target/classes/com/example/datacollect/view/ConsoleView.class b/w11/target/classes/com/example/datacollect/view/ConsoleView.class new file mode 100644 index 0000000..8ed8554 Binary files /dev/null and b/w11/target/classes/com/example/datacollect/view/ConsoleView.class differ diff --git a/w11/target/classes/logback.xml b/w11/target/classes/logback.xml new file mode 100644 index 0000000..0ca4943 --- /dev/null +++ b/w11/target/classes/logback.xml @@ -0,0 +1,24 @@ + + + + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + ${LOG_PATH}/crawler.log + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + + + + + + + + + + \ No newline at end of file diff --git a/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..d178717 --- /dev/null +++ b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,19 @@ +com\example\datacollect\command\ListCommand.class +com\example\datacollect\command\CrawlCommand.class +com\example\datacollect\strategy\BlogStrategy.class +com\example\datacollect\repository\ArticleRepository.class +com\example\datacollect\Main.class +com\example\datacollect\view\ConsoleView.class +com\example\datacollect\command\ExitCommand.class +com\example\datacollect\command\HelpCommand.class +com\example\datacollect\strategy\WeiboStrategy.class +com\example\datacollect\strategy\NewsStrategy.class +com\example\datacollect\command\Command.class +com\example\datacollect\controller\CrawlerController.class +com\example\datacollect\exception\CrawlerException.class +com\example\datacollect\exception\NetworkException.class +com\example\datacollect\strategy\StrategyFactory.class +com\example\datacollect\service\CrawlerService.class +com\example\datacollect\exception\ParseException.class +com\example\datacollect\strategy\CrawlStrategy.class +com\example\datacollect\model\Article.class diff --git a/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..b100aff --- /dev/null +++ b/w11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,19 @@ +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\ExitCommand.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\Command.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\StrategyFactory.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\WeiboStrategy.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\NewsStrategy.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\repository\ArticleRepository.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\view\ConsoleView.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\exception\CrawlerException.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\exception\NetworkException.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\HelpCommand.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\ListCommand.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\command\CrawlCommand.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\model\Article.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\service\CrawlerService.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\Main.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\controller\CrawlerController.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\exception\ParseException.java +C:\Users\朱彦硕\Java\java\w11\src\main\java\com\example\datacollect\strategy\BlogStrategy.java