diff --git a/W11/.gitignore b/W11/.gitignore new file mode 100644 index 0000000..0ebcf1a --- /dev/null +++ b/W11/.gitignore @@ -0,0 +1,4 @@ +*.jar +*.jar +*.class +*.log \ No newline at end of file diff --git a/W11/.idea/.gitignore b/W11/.idea/.gitignore new file mode 100644 index 0000000..b6b1ecf --- /dev/null +++ b/W11/.idea/.gitignore @@ -0,0 +1,10 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 已忽略包含查询文件的默认文件夹 +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ diff --git a/W11/.idea/.name b/W11/.idea/.name new file mode 100644 index 0000000..4565c2f --- /dev/null +++ b/W11/.idea/.name @@ -0,0 +1 @@ +Command.java \ No newline at end of file diff --git a/W11/.idea/compiler.xml b/W11/.idea/compiler.xml new file mode 100644 index 0000000..a43201e --- /dev/null +++ b/W11/.idea/compiler.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/W11/.idea/jarRepositories.xml b/W11/.idea/jarRepositories.xml new file mode 100644 index 0000000..712ab9d --- /dev/null +++ b/W11/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/W11/.idea/misc.xml b/W11/.idea/misc.xml new file mode 100644 index 0000000..f88c0a7 --- /dev/null +++ b/W11/.idea/misc.xml @@ -0,0 +1,12 @@ + + + + + + + + \ No newline at end of file diff --git a/W11/.idea/vcs.xml b/W11/.idea/vcs.xml new file mode 100644 index 0000000..6c0b863 --- /dev/null +++ b/W11/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/W11/pom.xml b/W11/pom.xml new file mode 100644 index 0000000..23d9dfa --- /dev/null +++ b/W11/pom.xml @@ -0,0 +1,71 @@ + + + 4.0.0 + + com.example + W9 + 0.1.0 + + + 11 + 11 + + + + + + + org.jsoup + jsoup + 1.17.2 + + + + com.fasterxml.jackson.core + jackson-databind + 2.15.2 + + + + ch.qos.logback + logback-classic + 1.4.11 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + + org.apache.maven.plugins + maven-assembly-plugin + 3.3.0 + + + + com.example.datacollect.Main + + + + jar-with-dependencies + + + + + make-assembly + package + + single + + + + + + + diff --git a/W11/src/.idea/.gitignore b/W11/src/.idea/.gitignore new file mode 100644 index 0000000..b6b1ecf --- /dev/null +++ b/W11/src/.idea/.gitignore @@ -0,0 +1,10 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 已忽略包含查询文件的默认文件夹 +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ diff --git a/W11/src/.idea/misc.xml b/W11/src/.idea/misc.xml new file mode 100644 index 0000000..188022c --- /dev/null +++ b/W11/src/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/W11/src/.idea/modules.xml b/W11/src/.idea/modules.xml new file mode 100644 index 0000000..fe93b77 --- /dev/null +++ b/W11/src/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/W11/src/.idea/vcs.xml b/W11/src/.idea/vcs.xml new file mode 100644 index 0000000..b2bdec2 --- /dev/null +++ b/W11/src/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/W11/src/java-cli.iml b/W11/src/java-cli.iml new file mode 100644 index 0000000..5b22c51 --- /dev/null +++ b/W11/src/java-cli.iml @@ -0,0 +1,11 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/Main.java b/W11/src/main/java/com/example/datacollect/Main.java new file mode 100644 index 0000000..1e13b5f --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/Main.java @@ -0,0 +1,26 @@ +package com.example.datacollect; + +import com.example.datacollect.controller.CrawlerController; +import com.example.datacollect.model.Article; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.List; + +public class Main { + private static final Logger logger = LoggerFactory.getLogger(Main.class); + + public static void main(String[] args) { + logger.info("应用启动"); + ConsoleView view = new ConsoleView(); + List
articles = new ArrayList<>(); + CrawlerController controller = new CrawlerController(view, articles); + + view.printSuccess("Welcome to CLI Crawler (W11)! Type help for commands."); + logger.info("CLI Crawler 启动成功"); + while (true) { + controller.handle(view.readLine()); + } + } +} diff --git a/W11/src/main/java/com/example/datacollect/command/AnalyzeCommand.java b/W11/src/main/java/com/example/datacollect/command/AnalyzeCommand.java new file mode 100644 index 0000000..559fa06 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/command/AnalyzeCommand.java @@ -0,0 +1,135 @@ +package com.example.datacollect.command; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.List; +import java.util.regex.Pattern; + +public class AnalyzeCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class); + private final ConsoleView view; + private final StrategyFactory strategyFactory; + private static final String URL_PATTERN = + "^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?$"; + + public AnalyzeCommand(ConsoleView view) { + this.view = view; + this.strategyFactory = new StrategyFactory(); + } + + @Override + public String getName() { + return "analyze"; + } + + @Override + public void execute(String[] args, List
articles) { + if (args.length < 1) { + view.printError("Usage: analyze "); + return; + } + + String url = args[0]; + + if (!isValidUrl(url)) { + view.printError("无效的URL格式:" + url); + view.printInfo("请输入正确的URL,例如:https://www.example.com"); + return; + } + + view.printInfo("正在分析:" + url); + logger.info("开始分析URL: {}", url); + + try { + Document doc = Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .timeout(15000) + .get(); + + CrawlStrategy strategy = strategyFactory.getStrategy(url); + if (strategy == null) { + view.printError("未找到匹配的解析策略"); + logger.warn("未找到匹配的解析策略: {}", url); + return; + } + + view.printInfo("匹配策略:" + strategy.getClass().getSimpleName()); + logger.debug("匹配策略: {}", strategy.getClass().getSimpleName()); + + List
parsedArticles = strategy.parse(url, doc); + + view.printSuccess("分析完成!"); + view.printInfo("📊 统计信息:"); + view.printInfo(" - 解析到文章数量:" + parsedArticles.size()); + logger.info("分析完成,解析到 {} 篇文章", parsedArticles.size()); + + if (!parsedArticles.isEmpty()) { + int totalContentLength = 0; + int titlesWithContent = 0; + int titlesWithoutContent = 0; + + for (Article article : parsedArticles) { + if (article.getContent() != null && !article.getContent().isBlank()) { + totalContentLength += article.getContent().length(); + titlesWithContent++; + } else { + titlesWithoutContent++; + } + } + + view.printInfo(" - 包含内容的文章:" + titlesWithContent); + view.printInfo(" - 仅标题的文章:" + titlesWithoutContent); + view.printInfo(" - 总内容长度:" + totalContentLength + " 字符"); + + if (titlesWithContent > 0) { + int avgLength = totalContentLength / titlesWithContent; + view.printInfo(" - 平均内容长度:" + avgLength + " 字符"); + } + + view.printInfo("\n📝 文章标题列表:"); + for (int i = 0; i < Math.min(parsedArticles.size(), 10); i++) { + Article article = parsedArticles.get(i); + String title = article.getTitle(); + if (title.length() > 40) { + title = title.substring(0, 37) + "..."; + } + view.printInfo(" " + (i + 1) + ". " + title); + } + + if (parsedArticles.size() > 10) { + view.printInfo(" ... 还有 " + (parsedArticles.size() - 10) + " 篇文章"); + } + } + + view.printInfo("\n💡 提示:分析结果未保存,如需保存请使用 crawl 命令"); + + } catch (IOException e) { + view.printError("分析失败:" + e.getMessage()); + logger.error("分析失败: {} - {}", url, e.getMessage(), e); + } catch (ParseException e) { + view.printError("解析失败:" + e.getMessage()); + logger.error("解析失败: {} - {}", e.getUrl(), e.getMessage(), e); + } + } + + @Override + public String getDescription() { + return "analyze - 分析URL内容(不保存),输出统计信息"; + } + + private boolean isValidUrl(String url) { + if (url == null || url.isBlank()) { + return false; + } + return Pattern.matches(URL_PATTERN, url); + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/command/Command.java b/W11/src/main/java/com/example/datacollect/command/Command.java new file mode 100644 index 0000000..b317fb0 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/command/Command.java @@ -0,0 +1,11 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import java.util.List; + +public interface Command { + String getName(); + void execute(String[] args, List
articles); + + String getDescription(); +} diff --git a/W11/src/main/java/com/example/datacollect/command/CrawlCommand.java b/W11/src/main/java/com/example/datacollect/command/CrawlCommand.java new file mode 100644 index 0000000..9da9ba0 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/command/CrawlCommand.java @@ -0,0 +1,119 @@ +package com.example.datacollect.command; + +import com.example.datacollect.exception.NetworkException; +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import com.example.datacollect.strategy.CrawlStrategy; +import com.example.datacollect.strategy.StrategyFactory; +import com.example.datacollect.view.ConsoleView; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.io.IOException; +import java.util.List; +import java.util.regex.Pattern; + +public class CrawlCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); + private final ConsoleView view; + private final StrategyFactory strategyFactory; + private static final String URL_PATTERN = + "^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?$"; + private static final int MAX_RETRY = 3; + private static final long RETRY_DELAY_MS = 2000; + + public CrawlCommand(ConsoleView view) { + this.view = view; + this.strategyFactory = new StrategyFactory(); + } + + @Override + public String getName() { + return "crawl"; + } + + @Override + public void execute(String[] args, List
articles) { + if (args.length < 1) { + view.printError("Usage: crawl "); + return; + } + + String url = args[0]; + + if (!isValidUrl(url)) { + view.printError("无效的URL格式:" + url); + view.printInfo("请输入正确的URL,例如:https://www.example.com"); + return; + } + + view.printInfo("正在爬取:" + url); + logger.info("开始爬取URL: {}", url); + + try { + Document doc = fetchWithRetry(url); + List
parsedArticles = parseWithStrategy(url, doc); + + for (Article article : parsedArticles) { + articles.add(article); + } + + view.printSuccess("爬取成功!共获取 " + parsedArticles.size() + " 篇文章"); + view.printInfo("输入 list 查看"); + logger.info("爬取完成,获取 {} 篇文章", parsedArticles.size()); + + } catch (NetworkException e) { + view.printError("网络请求失败:" + e.getMessage()); + logger.error("网络请求失败: {} - {}", e.getUrl(), e.getMessage(), e); + } catch (ParseException e) { + view.printError("解析失败:" + e.getMessage()); + logger.error("解析失败: {} - {}", e.getUrl(), e.getMessage(), e); + } + } + + private Document fetchWithRetry(String url) throws NetworkException { + int retryCount = 0; + IOException lastException = null; + + while (retryCount < MAX_RETRY) { + try { + return Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .timeout(15000) + .get(); + } catch (IOException e) { + lastException = e; + retryCount++; + if (retryCount < MAX_RETRY) { + view.printInfo("重试第 " + retryCount + " 次..."); + try { + Thread.sleep(RETRY_DELAY_MS); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + throw new NetworkException("请求被中断", url, ie); + } + } + } + } + + throw new NetworkException("网络请求失败,已重试 " + MAX_RETRY + " 次: " + lastException.getMessage(), url, lastException); + } + + private List
parseWithStrategy(String url, Document doc) throws ParseException { + CrawlStrategy strategy = strategyFactory.getStrategy(url); + return strategy.parse(url, doc); + } + + @Override + public String getDescription() { + return "crawl - 爬取指定URL的文章"; + } + + private boolean isValidUrl(String url) { + if (url == null || url.isBlank()) { + return false; + } + return Pattern.matches(URL_PATTERN, url); + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/command/ExitCommand.java b/W11/src/main/java/com/example/datacollect/command/ExitCommand.java new file mode 100644 index 0000000..f2c542b --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/command/ExitCommand.java @@ -0,0 +1,33 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.List; + +public class ExitCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class); + private final ConsoleView view; + + public ExitCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "exit"; + } + + @Override + public void execute(String[] args, List
articles) { + view.printSuccess("Bye!"); + logger.info("应用退出"); + System.exit(0); + } + + @Override + public String getDescription() { + return "exit - 退出程序"; + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/command/HelpCommand.java b/W11/src/main/java/com/example/datacollect/command/HelpCommand.java new file mode 100644 index 0000000..bb0d2f6 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/command/HelpCommand.java @@ -0,0 +1,39 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.List; + +public class HelpCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class); + private final ConsoleView view; + + public HelpCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "help"; + } + + @Override + public void execute(String[] args, List
articles) { + logger.debug("显示帮助信息"); + view.printInfo("Commands:"); + view.printInfo(" crawl - 爬取指定URL的文章并保存"); + view.printInfo(" analyze - 分析URL内容(不保存),输出统计信息"); + view.printInfo(" list - 列出已保存的文章"); + view.printInfo(" history - 查看命令历史"); + view.printInfo(" help - 显示帮助信息"); + view.printInfo(" exit - 退出程序"); + view.printInfo("Aliases: h=help, c=crawl, l=list, hi=history, e=exit, a=analyze"); + } + + @Override + public String getDescription() { + return "显示帮助信息"; + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/command/HistoryCommand.java b/W11/src/main/java/com/example/datacollect/command/HistoryCommand.java new file mode 100644 index 0000000..727c6c7 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/command/HistoryCommand.java @@ -0,0 +1,46 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.List; + +public class HistoryCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(HistoryCommand.class); + private static final List commandHistory = new ArrayList<>(); + private final ConsoleView view; + + public HistoryCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "history"; + } + + @Override + public void execute(String[] args, List
articles) { + if (commandHistory.isEmpty()) { + view.printInfo("📜 还没有输入过任何命令哦~"); + return; + } + logger.debug("显示命令历史,共 {} 条", commandHistory.size()); + view.printInfo("📜 你的历史命令列表:"); + for (int i = 0; i < commandHistory.size(); i++) { + view.printInfo((i + 1) + ". " + commandHistory.get(i)); + } + } + + @Override + public String getDescription() { + return "history - 查看你输入过的所有命令"; + } + + public static void record(String commandLine) { + commandHistory.add(commandLine); + logger.debug("记录命令: {}", commandLine); + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/command/ListCommand.java b/W11/src/main/java/com/example/datacollect/command/ListCommand.java new file mode 100644 index 0000000..c22fbfa --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/command/ListCommand.java @@ -0,0 +1,32 @@ +package com.example.datacollect.command; + +import com.example.datacollect.model.Article; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.List; + +public class ListCommand implements Command { + private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); + private final ConsoleView view; + + public ListCommand(ConsoleView view) { + this.view = view; + } + + @Override + public String getName() { + return "list"; + } + + @Override + public void execute(String[] args, List
articles) { + logger.debug("显示文章列表,共 {} 篇", articles.size()); + view.display(articles); + } + + @Override + public String getDescription() { + return "list - 显示所有已爬取的文章"; + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/controller/CrawlerController.java b/W11/src/main/java/com/example/datacollect/controller/CrawlerController.java new file mode 100644 index 0000000..a6e071c --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/controller/CrawlerController.java @@ -0,0 +1,65 @@ +package com.example.datacollect.controller; + +import com.example.datacollect.command.*; +import com.example.datacollect.model.Article; +import com.example.datacollect.view.ConsoleView; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +public class CrawlerController { + private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); + private final Map commands = new HashMap<>(); + private final ConsoleView view; + private final List
articles; + private final Map aliasMap = new HashMap<>(); + + public CrawlerController(ConsoleView view, List
articles) { + this.view = view; + this.articles = articles; + + register(new HelpCommand(view)); + register(new ListCommand(view)); + register(new CrawlCommand(view)); + register(new ExitCommand(view)); + register(new HistoryCommand(view)); + register(new AnalyzeCommand(view)); + + aliasMap.put("h", "help"); + aliasMap.put("c", "crawl"); + aliasMap.put("l", "list"); + aliasMap.put("hi", "history"); + aliasMap.put("e", "exit"); + aliasMap.put("a", "analyze"); + } + + private void register(Command command) { + commands.put(command.getName(), command); + } + + public void handle(String input) { + if (input == null || input.isBlank()) { + return; + } + + HistoryCommand.record(input); + + String[] parts = input.split("\\s+", 2); + String commandName = parts[0]; + String[] args = parts.length > 1 ? new String[]{parts[1]} : new String[0]; + + if (aliasMap.containsKey(commandName)) { + commandName = aliasMap.get(commandName); + } + + if (commands.containsKey(commandName)) { + logger.debug("执行命令: {} with args: {}", commandName, args.length > 0 ? args[0] : "无参数"); + commands.get(commandName).execute(args, articles); + } else { + view.printError("未知命令/别名:" + commandName + ",输入 help 查看所有命令"); + logger.warn("未知命令: {}", commandName); + } + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/exception/CrawlerException.java b/W11/src/main/java/com/example/datacollect/exception/CrawlerException.java new file mode 100644 index 0000000..e81c3c9 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/exception/CrawlerException.java @@ -0,0 +1,11 @@ +package com.example.datacollect.exception; + +public class CrawlerException extends Exception { + public CrawlerException(String message) { + super(message); + } + + public CrawlerException(String message, Throwable cause) { + super(message, cause); + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/exception/NetworkException.java b/W11/src/main/java/com/example/datacollect/exception/NetworkException.java new file mode 100644 index 0000000..8069ca2 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/exception/NetworkException.java @@ -0,0 +1,19 @@ +package com.example.datacollect.exception; + +public class NetworkException extends CrawlerException { + private final String url; + + public NetworkException(String message, String url) { + super(message); + this.url = url; + } + + public NetworkException(String message, String url, Throwable cause) { + super(message, cause); + this.url = url; + } + + public String getUrl() { + return url; + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/exception/ParseException.java b/W11/src/main/java/com/example/datacollect/exception/ParseException.java new file mode 100644 index 0000000..4bf6f73 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/exception/ParseException.java @@ -0,0 +1,19 @@ +package com.example.datacollect.exception; + +public class ParseException extends CrawlerException { + private final String url; + + public ParseException(String message, String url) { + super(message); + this.url = url; + } + + public ParseException(String message, String url, Throwable cause) { + super(message, cause); + this.url = url; + } + + public String getUrl() { + return url; + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/model/Article.java b/W11/src/main/java/com/example/datacollect/model/Article.java new file mode 100644 index 0000000..a2e2637 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/model/Article.java @@ -0,0 +1,77 @@ +package com.example.datacollect.model; + +public class Article { + private String title; + private String url; + private String content; + private String author; + private String publishDate; + + // 修正后的构造方法:参数 → 成员变量 + public Article(String title, String url, String content) { + this.title = title; + this.url = url; + this.content = content; + this.author = ""; + this.publishDate = ""; + } + + public Article(String title, String url, String content, String author, String publishDate) { + this.title = title; + this.url = url; + this.content = content; + this.author = author; + this.publishDate = publishDate; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getUrl() { + return url; + } + + public void setUrl(String url) { + this.url = url; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public String getAuthor() { + return author; + } + + public void setAuthor(String author) { + this.author = author; + } + + public String getPublishDate() { + return publishDate; + } + + public void setPublishDate(String publishDate) { + this.publishDate = publishDate; + } + + @Override + public String toString() { + return "Article{" + + "title='" + title + '\'' + + ", author='" + author + '\'' + + ", publishDate='" + publishDate + '\'' + + ", url='" + url + '\'' + + ", content='" + content + '\'' + + '}'; + } +} diff --git a/W11/src/main/java/com/example/datacollect/model/RouteInfo.java b/W11/src/main/java/com/example/datacollect/model/RouteInfo.java new file mode 100644 index 0000000..af9609a --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/model/RouteInfo.java @@ -0,0 +1,29 @@ +package com.example.datacollect.model; + +/** + * 路线信息类:存储路线的基本数据 + * 包含出发地、目的地、地图类型、交通方式、距离、时间等信息 + */ +public class RouteInfo { + private final String city; // 目的地城市 + private final String mapType; // 地图类型(高德/百度) + private final String transportType; // 交通方式(驾车/公交) + private final double distance; // 距离(公里) + private final double time; // 时间(小时) + + // 构造方法:初始化所有属性 + public RouteInfo(String city, String mapType, String transportType, double distance, double time) { + this.city = city; + this.mapType = mapType; + this.transportType = transportType; + this.distance = distance; + this.time = time; + } + + // Getter方法:获取各个属性值 + public String getCity() { return city; } + public String getMapType() { return mapType; } + public String getTransportType() { return transportType; } + public double getDistance() { return distance; } + public double getTime() { return time; } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/repository/ArticleRepository.java b/W11/src/main/java/com/example/datacollect/repository/ArticleRepository.java new file mode 100644 index 0000000..717f7ba --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/repository/ArticleRepository.java @@ -0,0 +1,72 @@ +package com.example.datacollect.repository; + +import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; + +public class ArticleRepository { + private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); + private final List
articles = new ArrayList<>(); + + public void add(Article article) { + if (article == null) { + logger.warn("尝试添加空文章对象"); + throw new IllegalArgumentException("Article cannot be null"); + } + if (article.getTitle() == null || article.getTitle().isBlank()) { + logger.warn("尝试添加标题为空的文章: {}", article.getUrl()); + throw new IllegalArgumentException("Article title cannot be null or blank"); + } + if (article.getUrl() == null || article.getUrl().isBlank()) { + logger.warn("尝试添加URL为空的文章: {}", article.getTitle()); + throw new IllegalArgumentException("Article URL cannot be null or blank"); + } + articles.add(article); + logger.debug("添加文章: {}", article.getTitle()); + } + + public List
getAll() { + logger.debug("获取所有文章,共 {} 篇", articles.size()); + return Collections.unmodifiableList(articles); + } + + public int size() { + return articles.size(); + } + + public void clear() { + int count = articles.size(); + articles.clear(); + logger.info("清空所有文章,共 {} 篇", count); + } + + public void addAll(List
articleList) { + if (articleList == null) { + logger.warn("尝试添加空的文章列表"); + throw new IllegalArgumentException("Article list cannot be null"); + } + if (articleList.isEmpty()) { + logger.debug("添加空的文章列表"); + return; + } + for (Article article : articleList) { + add(article); + } + logger.info("批量添加文章,共 {} 篇", articleList.size()); + } + + public Article getByIndex(int index) { + if (index < 0 || index >= articles.size()) { + logger.warn("无效的索引: {},列表大小: {}", index, articles.size()); + throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + articles.size()); + } + return articles.get(index); + } + + public boolean isEmpty() { + return articles.isEmpty(); + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java b/W11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java new file mode 100644 index 0000000..a2801fe --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java @@ -0,0 +1,26 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import java.util.ArrayList; +import java.util.List; + +public class BlogStrategy implements CrawlStrategy { + @Override + public boolean supports(String url) { + return url.contains("blog.example.com"); + } + + @Override + public List
parse(String url, Document doc) throws ParseException { + List
articles = new ArrayList<>(); + Elements titles = doc.select(".post-title"); + for (Element e : titles) { + articles.add(new Article(e.text(), url, "")); + } + return articles; + } +} diff --git a/W11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java b/W11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java new file mode 100644 index 0000000..de1c01a --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java @@ -0,0 +1,14 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import java.util.List; + +public interface CrawlStrategy { + List
parse(String url, Document doc) throws ParseException; + boolean supports(String url); + default int getPriority() { + return 100; + } +} diff --git a/W11/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java b/W11/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java new file mode 100644 index 0000000..0188c83 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java @@ -0,0 +1,50 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import java.util.ArrayList; +import java.util.List; + +public class HnuNewsStrategy implements CrawlStrategy { + @Override + public boolean supports(String url) { + return url.contains("news.hnu.edu.cn"); + } + + @Override + public List
parse(String url, Document doc) throws ParseException { + List
articles = new ArrayList<>(); + Elements listItems = doc.select("ul.list11 li"); + + for (Element li : listItems) { + Element link = li.selectFirst("a"); + if (link == null) continue; + + String articleUrl = link.attr("href"); + if (!articleUrl.startsWith("http")) { + articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", ""); + } + + String title = ""; + Element titleEl = link.selectFirst("h4.l2.h4s2"); + if (titleEl != null) { + title = titleEl.text().trim(); + } + + String content = ""; + Element contentEl = link.selectFirst("p.l3.ps3"); + if (contentEl != null) { + content = contentEl.text().trim(); + } + + if (!title.isEmpty()) { + articles.add(new Article(title, articleUrl, content)); + } + } + + return articles; + } +} diff --git a/W11/src/main/java/com/example/datacollect/strategy/Map/AmapPlatform.java b/W11/src/main/java/com/example/datacollect/strategy/Map/AmapPlatform.java new file mode 100644 index 0000000..6d0dfac --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/Map/AmapPlatform.java @@ -0,0 +1,40 @@ +package com.example.datacollect.strategy.Map; + +import com.example.datacollect.model.RouteInfo; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import java.io.IOException; + +public class AmapPlatform implements MapPlatform { + private final ObjectMapper mapper = new ObjectMapper(); + + @Override + public String getName() { + return "高德地图"; + } + + @Override + public String getBaseUrl() { + return "https://restapi.amap.com/v3/direction/"; + } + + @Override + public String getApiKeyParam() { + return "key"; + } + + @Override + public RouteInfo parseResponse(String responseBody, String city, String transportType) throws IOException { + JsonNode root = mapper.readTree(responseBody); + + if (!root.get("status").asText().equals("1")) { + throw new IOException("API错误: " + root.get("info").asText()); + } + + JsonNode path = root.get("route").get("paths").get(0); + double distance = path.get("distance").asInt() / 1000.0; + double time = path.get("duration").asInt() / 3600.0; + + return new RouteInfo(city, getName(), transportType, distance, time); + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/strategy/Map/BusStrategy.java b/W11/src/main/java/com/example/datacollect/strategy/Map/BusStrategy.java new file mode 100644 index 0000000..3cc3e72 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/Map/BusStrategy.java @@ -0,0 +1,19 @@ +package com.example.datacollect.strategy.Map; + +import java.util.HashMap; +import java.util.Map; + +public class BusStrategy implements TransportStrategy { + @Override + public String getPath() { + return "transit/integrated"; + } + + @Override + public Map getParams() { + Map params = new HashMap<>(); + params.put("city", "长沙"); + params.put("strategy", "0"); + return params; + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/strategy/Map/DrivingStrategy.java b/W11/src/main/java/com/example/datacollect/strategy/Map/DrivingStrategy.java new file mode 100644 index 0000000..fc3be9c --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/Map/DrivingStrategy.java @@ -0,0 +1,18 @@ +package com.example.datacollect.strategy.Map; + +import java.util.HashMap; +import java.util.Map; + +public class DrivingStrategy implements TransportStrategy { + @Override + public String getPath() { + return "driving"; + } + + @Override + public Map getParams() { + Map params = new HashMap<>(); + params.put("strategy", "0"); + return params; + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/strategy/Map/JsonNode.java b/W11/src/main/java/com/example/datacollect/strategy/Map/JsonNode.java new file mode 100644 index 0000000..e6c3756 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/Map/JsonNode.java @@ -0,0 +1,5 @@ +package com.example.datacollect.strategy.Map; + +public class JsonNode { + +} diff --git a/W11/src/main/java/com/example/datacollect/strategy/Map/MapPlatform.java b/W11/src/main/java/com/example/datacollect/strategy/Map/MapPlatform.java new file mode 100644 index 0000000..0dc36eb --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/Map/MapPlatform.java @@ -0,0 +1,11 @@ +package com.example.datacollect.strategy.Map; + +import com.example.datacollect.model.RouteInfo; +import java.io.IOException; + +public interface MapPlatform { + String getName(); + String getBaseUrl(); + String getApiKeyParam(); + RouteInfo parseResponse(String responseBody, String city, String transportType) throws IOException; +} diff --git a/W11/src/main/java/com/example/datacollect/strategy/Map/TransportStrategy.java b/W11/src/main/java/com/example/datacollect/strategy/Map/TransportStrategy.java new file mode 100644 index 0000000..1546870 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/Map/TransportStrategy.java @@ -0,0 +1,9 @@ +package com.example.datacollect.strategy.Map; + +import java.util.Map; + +public interface TransportStrategy { + String getPath(); + Map getParams(); +} + diff --git a/W11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java b/W11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java new file mode 100644 index 0000000..6f87553 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java @@ -0,0 +1,26 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import java.util.ArrayList; +import java.util.List; + +public class NewsStrategy implements CrawlStrategy { + @Override + public boolean supports(String url) { + return url.contains("news.example.com"); + } + + @Override + public List
parse(String url, Document doc) throws ParseException { + List
articles = new ArrayList<>(); + Elements items = doc.select(".article-headline"); + for (Element e : items) { + articles.add(new Article(e.text(), url, "")); + } + return articles; + } +} diff --git a/W11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java b/W11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java new file mode 100644 index 0000000..92dc1b9 --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java @@ -0,0 +1,90 @@ +package com.example.datacollect.strategy; + +import com.example.datacollect.exception.ParseException; +import com.example.datacollect.model.Article; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.List; +import java.util.regex.Pattern; + +public class StrategyFactory { + private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class); + private final List strategies = new ArrayList<>(); + private CrawlStrategy defaultStrategy; + + public StrategyFactory() { + strategies.add(new HnuNewsStrategy()); + strategies.add(new BlogStrategy()); + strategies.add(new NewsStrategy()); + this.defaultStrategy = new DefaultStrategy(); + logger.info("策略工厂初始化,注册了 {} 个策略", strategies.size()); + } + + public CrawlStrategy getStrategy(String url) { + List matchingStrategies = new ArrayList<>(); + for (CrawlStrategy s : strategies) { + if (s.supports(url)) { + matchingStrategies.add(s); + } + } + + if (matchingStrategies.isEmpty()) { + logger.debug("未找到匹配策略,使用默认策略: {}", url); + return defaultStrategy; + } + + if (matchingStrategies.size() > 1) { + matchingStrategies.sort(Comparator.comparingInt(CrawlStrategy::getPriority)); + logger.debug("找到多个匹配策略,选择优先级最高的: {}", matchingStrategies.get(0).getClass().getSimpleName()); + } + + return matchingStrategies.get(0); + } + + public void register(CrawlStrategy strategy) { + strategies.add(strategy); + logger.info("注册新策略: {}", strategy.getClass().getSimpleName()); + } + + public void setDefaultStrategy(CrawlStrategy defaultStrategy) { + this.defaultStrategy = defaultStrategy; + logger.info("设置默认策略: {}", defaultStrategy.getClass().getSimpleName()); + } + + public List getMatchingStrategies(String url) { + List matching = new ArrayList<>(); + for (CrawlStrategy s : strategies) { + if (s.supports(url)) { + matching.add(s); + } + } + matching.sort(Comparator.comparingInt(CrawlStrategy::getPriority)); + return matching; + } + + private static class DefaultStrategy implements CrawlStrategy { + private static final Pattern TITLE_PATTERN = Pattern.compile("]*>([^<]+)", Pattern.CASE_INSENSITIVE); + + @Override + public List
parse(String url, org.jsoup.nodes.Document doc) throws ParseException { + List
articles = new ArrayList<>(); + String title = doc.title(); + if (title != null && !title.isBlank()) { + articles.add(new Article(title.trim(), url, "")); + } + return articles; + } + + @Override + public boolean supports(String url) { + return true; + } + + @Override + public int getPriority() { + return Integer.MAX_VALUE; + } + } +} \ No newline at end of file diff --git a/W11/src/main/java/com/example/datacollect/view/ConsoleView.java b/W11/src/main/java/com/example/datacollect/view/ConsoleView.java new file mode 100644 index 0000000..3c1d47a --- /dev/null +++ b/W11/src/main/java/com/example/datacollect/view/ConsoleView.java @@ -0,0 +1,42 @@ +package com.example.datacollect.view; + +import com.example.datacollect.model.Article; +import java.util.List; +import java.util.Scanner; + +public class ConsoleView { + private static final String ANSI_RESET = "\u001B[0m"; + private static final String ANSI_GREEN = "\u001B[32m"; + private static final String ANSI_RED = "\u001B[31m"; + private static final String ANSI_BLUE = "\u001B[34m"; + + private final Scanner scanner = new Scanner(System.in); + + public String readLine() { + System.out.print("> "); + return scanner.nextLine(); + } + + public void printSuccess(String msg) { + System.out.println(ANSI_GREEN + msg + ANSI_RESET); + } + + public void printError(String msg) { + System.out.println(ANSI_RED + msg + ANSI_RESET); + } + + public void printInfo(String msg) { + System.out.println(ANSI_BLUE + msg + ANSI_RESET); + } + + public void display(List
articles) { + if (articles.isEmpty()) { + printInfo("暂无文章,请先执行 crawl。"); + return; + } + for (int i = 0; i < articles.size(); i++) { + Article a = articles.get(i); + System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); + } + } +} diff --git a/W11/src/main/resources/logback.xml b/W11/src/main/resources/logback.xml new file mode 100644 index 0000000..2b6fe59 --- /dev/null +++ b/W11/src/main/resources/logback.xml @@ -0,0 +1,31 @@ + + + + + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + UTF-8 + + + + + ${LOG_PATH}/${APP_NAME}.log + + ${LOG_PATH}/${APP_NAME}.%d{yyyy-MM-dd}.log + 30 + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + UTF-8 + + + + + + + + + + \ No newline at end of file diff --git a/W11/target/classes/logback.xml b/W11/target/classes/logback.xml new file mode 100644 index 0000000..2b6fe59 --- /dev/null +++ b/W11/target/classes/logback.xml @@ -0,0 +1,31 @@ + + + + + + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + UTF-8 + + + + + ${LOG_PATH}/${APP_NAME}.log + + ${LOG_PATH}/${APP_NAME}.%d{yyyy-MM-dd}.log + 30 + + + %d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n + UTF-8 + + + + + + + + + + \ No newline at end of file diff --git a/W11/target/maven-archiver/pom.properties b/W11/target/maven-archiver/pom.properties new file mode 100644 index 0000000..977a4fc --- /dev/null +++ b/W11/target/maven-archiver/pom.properties @@ -0,0 +1,3 @@ +artifactId=W9 +groupId=com.example +version=0.1.0 diff --git a/W11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/W11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst new file mode 100644 index 0000000..cb06b81 --- /dev/null +++ b/W11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -0,0 +1,28 @@ +com\example\datacollect\command\ListCommand.class +com\example\datacollect\command\CrawlCommand.class +com\example\datacollect\view\ConsoleView.class +com\example\datacollect\strategy\NewsStrategy.class +com\example\datacollect\command\Command.class +com\example\datacollect\exception\CrawlerException.class +com\example\datacollect\exception\NetworkException.class +com\example\datacollect\command\AnalyzeCommand.class +com\example\datacollect\strategy\Map\TransportStrategy.class +com\example\datacollect\strategy\CrawlStrategy.class +com\example\datacollect\model\Article.class +com\example\datacollect\strategy\BlogStrategy.class +com\example\datacollect\strategy\StrategyFactory$DefaultStrategy.class +com\example\datacollect\strategy\Map\JsonNode.class +com\example\datacollect\repository\ArticleRepository.class +com\example\datacollect\strategy\Map\BusStrategy.class +com\example\datacollect\Main.class +com\example\datacollect\command\ExitCommand.class +com\example\datacollect\command\HelpCommand.class +com\example\datacollect\command\HistoryCommand.class +com\example\datacollect\controller\CrawlerController.class +com\example\datacollect\strategy\Map\MapPlatform.class +com\example\datacollect\strategy\Map\DrivingStrategy.class +com\example\datacollect\strategy\StrategyFactory.class +com\example\datacollect\strategy\HnuNewsStrategy.class +com\example\datacollect\exception\ParseException.class +com\example\datacollect\model\RouteInfo.class +com\example\datacollect\strategy\Map\AmapPlatform.class diff --git a/W11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/W11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst new file mode 100644 index 0000000..79f1d65 --- /dev/null +++ b/W11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -0,0 +1,27 @@ +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\HistoryCommand.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\AnalyzeCommand.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\exception\NetworkException.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\BusStrategy.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\CrawlCommand.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\BlogStrategy.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\ListCommand.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\DrivingStrategy.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\TransportStrategy.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\JsonNode.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\Main.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\AmapPlatform.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\HelpCommand.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\StrategyFactory.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\model\Article.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\view\ConsoleView.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\exception\CrawlerException.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\model\RouteInfo.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\MapPlatform.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\ExitCommand.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\NewsStrategy.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\repository\ArticleRepository.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\Command.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\exception\ParseException.java +C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\controller\CrawlerController.java