Browse Source

完善代码健壮性

main
Lanyutong 1 month ago
parent
commit
79c9e0a900
  1. 4
      W11/.gitignore
  2. 10
      W11/.idea/.gitignore
  3. 1
      W11/.idea/.name
  4. 13
      W11/.idea/compiler.xml
  5. 20
      W11/.idea/jarRepositories.xml
  6. 12
      W11/.idea/misc.xml
  7. 6
      W11/.idea/vcs.xml
  8. 71
      W11/pom.xml
  9. 10
      W11/src/.idea/.gitignore
  10. 6
      W11/src/.idea/misc.xml
  11. 8
      W11/src/.idea/modules.xml
  12. 6
      W11/src/.idea/vcs.xml
  13. 11
      W11/src/java-cli.iml
  14. 26
      W11/src/main/java/com/example/datacollect/Main.java
  15. 135
      W11/src/main/java/com/example/datacollect/command/AnalyzeCommand.java
  16. 11
      W11/src/main/java/com/example/datacollect/command/Command.java
  17. 119
      W11/src/main/java/com/example/datacollect/command/CrawlCommand.java
  18. 33
      W11/src/main/java/com/example/datacollect/command/ExitCommand.java
  19. 39
      W11/src/main/java/com/example/datacollect/command/HelpCommand.java
  20. 46
      W11/src/main/java/com/example/datacollect/command/HistoryCommand.java
  21. 32
      W11/src/main/java/com/example/datacollect/command/ListCommand.java
  22. 65
      W11/src/main/java/com/example/datacollect/controller/CrawlerController.java
  23. 11
      W11/src/main/java/com/example/datacollect/exception/CrawlerException.java
  24. 19
      W11/src/main/java/com/example/datacollect/exception/NetworkException.java
  25. 19
      W11/src/main/java/com/example/datacollect/exception/ParseException.java
  26. 77
      W11/src/main/java/com/example/datacollect/model/Article.java
  27. 29
      W11/src/main/java/com/example/datacollect/model/RouteInfo.java
  28. 72
      W11/src/main/java/com/example/datacollect/repository/ArticleRepository.java
  29. 26
      W11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java
  30. 14
      W11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java
  31. 50
      W11/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java
  32. 40
      W11/src/main/java/com/example/datacollect/strategy/Map/AmapPlatform.java
  33. 19
      W11/src/main/java/com/example/datacollect/strategy/Map/BusStrategy.java
  34. 18
      W11/src/main/java/com/example/datacollect/strategy/Map/DrivingStrategy.java
  35. 5
      W11/src/main/java/com/example/datacollect/strategy/Map/JsonNode.java
  36. 11
      W11/src/main/java/com/example/datacollect/strategy/Map/MapPlatform.java
  37. 9
      W11/src/main/java/com/example/datacollect/strategy/Map/TransportStrategy.java
  38. 26
      W11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java
  39. 90
      W11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java
  40. 42
      W11/src/main/java/com/example/datacollect/view/ConsoleView.java
  41. 31
      W11/src/main/resources/logback.xml
  42. 31
      W11/target/classes/logback.xml
  43. 3
      W11/target/maven-archiver/pom.properties
  44. 28
      W11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst
  45. 27
      W11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst

4
W11/.gitignore

@ -0,0 +1,4 @@
*.jar
*.jar
*.class
*.log

10
W11/.idea/.gitignore

@ -0,0 +1,10 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 已忽略包含查询文件的默认文件夹
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/

1
W11/.idea/.name

@ -0,0 +1 @@
Command.java

13
W11/.idea/compiler.xml

@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="CompilerConfiguration">
<annotationProcessing>
<profile name="Maven default annotation processors profile" enabled="true">
<sourceOutputDir name="target/generated-sources/annotations" />
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" />
<outputRelativeToContentRoot value="true" />
<module name="W9" />
</profile>
</annotationProcessing>
</component>
</project>

20
W11/.idea/jarRepositories.xml

@ -0,0 +1,20 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="RemoteRepositoriesConfiguration">
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Central Repository" />
<option name="url" value="https://repo.maven.apache.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="central" />
<option name="name" value="Maven Central repository" />
<option name="url" value="https://repo1.maven.org/maven2" />
</remote-repository>
<remote-repository>
<option name="id" value="jboss.community" />
<option name="name" value="JBoss Community repository" />
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" />
</remote-repository>
</component>
</project>

12
W11/.idea/misc.xml

@ -0,0 +1,12 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ExternalStorageConfigurationManager" enabled="true" />
<component name="MavenProjectsManager">
<option name="originalFiles">
<list>
<option value="$PROJECT_DIR$/pom.xml" />
</list>
</option>
</component>
<component name="ProjectRootManager" version="2" languageLevel="JDK_25" default="true" project-jdk-name="25" project-jdk-type="JavaSDK" />
</project>

6
W11/.idea/vcs.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/.." vcs="Git" />
</component>
</project>

71
W11/pom.xml

@ -0,0 +1,71 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.example</groupId>
<artifactId>W9</artifactId>
<version>0.1.0</version>
<properties>
<maven.compiler.source>11</maven.compiler.source>
<maven.compiler.target>11</maven.compiler.target>
</properties>
<!-- 新增:所有依赖必须放在这里 -->
<dependencies>
<!-- Jsoup 爬虫依赖 -->
<dependency>
<groupId>org.jsoup</groupId>
<artifactId>jsoup</artifactId>
<version>1.17.2</version>
</dependency>
<!-- Jackson JSON 解析依赖 -->
<dependency>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
<version>2.15.2</version>
</dependency>
<!-- Logback 日志框架依赖 -->
<dependency>
<groupId>ch.qos.logback</groupId>
<artifactId>logback-classic</artifactId>
<version>1.4.11</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.8.1</version>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-assembly-plugin</artifactId>
<version>3.3.0</version>
<configuration>
<archive>
<manifest>
<mainClass>com.example.datacollect.Main</mainClass>
</manifest>
</archive>
<descriptorRefs>
<descriptorRef>jar-with-dependencies</descriptorRef>
</descriptorRefs>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

10
W11/src/.idea/.gitignore

@ -0,0 +1,10 @@
# 默认忽略的文件
/shelf/
/workspace.xml
# 已忽略包含查询文件的默认文件夹
/queries/
# Datasource local storage ignored files
/dataSources/
/dataSources.local.xml
# 基于编辑器的 HTTP 客户端请求
/httpRequests/

6
W11/src/.idea/misc.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" languageLevel="JDK_25" default="true" project-jdk-name="25" project-jdk-type="JavaSDK">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

8
W11/src/.idea/modules.xml

@ -0,0 +1,8 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/java-cli.iml" filepath="$PROJECT_DIR$/java-cli.iml" />
</modules>
</component>
</project>

6
W11/src/.idea/vcs.xml

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" />
</component>
</project>

11
W11/src/java-cli.iml

@ -0,0 +1,11 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/main/java" isTestSource="false" />
</content>
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

26
W11/src/main/java/com/example/datacollect/Main.java

@ -0,0 +1,26 @@
package com.example.datacollect;
import com.example.datacollect.controller.CrawlerController;
import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class Main {
private static final Logger logger = LoggerFactory.getLogger(Main.class);
public static void main(String[] args) {
logger.info("应用启动");
ConsoleView view = new ConsoleView();
List<Article> articles = new ArrayList<>();
CrawlerController controller = new CrawlerController(view, articles);
view.printSuccess("Welcome to CLI Crawler (W11)! Type help for commands.");
logger.info("CLI Crawler 启动成功");
while (true) {
controller.handle(view.readLine());
}
}
}

135
W11/src/main/java/com/example/datacollect/command/AnalyzeCommand.java

@ -0,0 +1,135 @@
package com.example.datacollect.command;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import com.example.datacollect.strategy.CrawlStrategy;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.view.ConsoleView;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.List;
import java.util.regex.Pattern;
public class AnalyzeCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class);
private final ConsoleView view;
private final StrategyFactory strategyFactory;
private static final String URL_PATTERN =
"^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?$";
public AnalyzeCommand(ConsoleView view) {
this.view = view;
this.strategyFactory = new StrategyFactory();
}
@Override
public String getName() {
return "analyze";
}
@Override
public void execute(String[] args, List<Article> articles) {
if (args.length < 1) {
view.printError("Usage: analyze <url>");
return;
}
String url = args[0];
if (!isValidUrl(url)) {
view.printError("无效的URL格式:" + url);
view.printInfo("请输入正确的URL,例如:https://www.example.com");
return;
}
view.printInfo("正在分析:" + url);
logger.info("开始分析URL: {}", url);
try {
Document doc = Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
.timeout(15000)
.get();
CrawlStrategy strategy = strategyFactory.getStrategy(url);
if (strategy == null) {
view.printError("未找到匹配的解析策略");
logger.warn("未找到匹配的解析策略: {}", url);
return;
}
view.printInfo("匹配策略:" + strategy.getClass().getSimpleName());
logger.debug("匹配策略: {}", strategy.getClass().getSimpleName());
List<Article> parsedArticles = strategy.parse(url, doc);
view.printSuccess("分析完成!");
view.printInfo("📊 统计信息:");
view.printInfo(" - 解析到文章数量:" + parsedArticles.size());
logger.info("分析完成,解析到 {} 篇文章", parsedArticles.size());
if (!parsedArticles.isEmpty()) {
int totalContentLength = 0;
int titlesWithContent = 0;
int titlesWithoutContent = 0;
for (Article article : parsedArticles) {
if (article.getContent() != null && !article.getContent().isBlank()) {
totalContentLength += article.getContent().length();
titlesWithContent++;
} else {
titlesWithoutContent++;
}
}
view.printInfo(" - 包含内容的文章:" + titlesWithContent);
view.printInfo(" - 仅标题的文章:" + titlesWithoutContent);
view.printInfo(" - 总内容长度:" + totalContentLength + " 字符");
if (titlesWithContent > 0) {
int avgLength = totalContentLength / titlesWithContent;
view.printInfo(" - 平均内容长度:" + avgLength + " 字符");
}
view.printInfo("\n📝 文章标题列表:");
for (int i = 0; i < Math.min(parsedArticles.size(), 10); i++) {
Article article = parsedArticles.get(i);
String title = article.getTitle();
if (title.length() > 40) {
title = title.substring(0, 37) + "...";
}
view.printInfo(" " + (i + 1) + ". " + title);
}
if (parsedArticles.size() > 10) {
view.printInfo(" ... 还有 " + (parsedArticles.size() - 10) + " 篇文章");
}
}
view.printInfo("\n💡 提示:分析结果未保存,如需保存请使用 crawl 命令");
} catch (IOException e) {
view.printError("分析失败:" + e.getMessage());
logger.error("分析失败: {} - {}", url, e.getMessage(), e);
} catch (ParseException e) {
view.printError("解析失败:" + e.getMessage());
logger.error("解析失败: {} - {}", e.getUrl(), e.getMessage(), e);
}
}
@Override
public String getDescription() {
return "analyze <url> - 分析URL内容(不保存),输出统计信息";
}
private boolean isValidUrl(String url) {
if (url == null || url.isBlank()) {
return false;
}
return Pattern.matches(URL_PATTERN, url);
}
}

11
W11/src/main/java/com/example/datacollect/command/Command.java

@ -0,0 +1,11 @@
package com.example.datacollect.command;
import com.example.datacollect.model.Article;
import java.util.List;
public interface Command {
String getName();
void execute(String[] args, List<Article> articles);
String getDescription();
}

119
W11/src/main/java/com/example/datacollect/command/CrawlCommand.java

@ -0,0 +1,119 @@
package com.example.datacollect.command;
import com.example.datacollect.exception.NetworkException;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import com.example.datacollect.strategy.CrawlStrategy;
import com.example.datacollect.strategy.StrategyFactory;
import com.example.datacollect.view.ConsoleView;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.List;
import java.util.regex.Pattern;
public class CrawlCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class);
private final ConsoleView view;
private final StrategyFactory strategyFactory;
private static final String URL_PATTERN =
"^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?$";
private static final int MAX_RETRY = 3;
private static final long RETRY_DELAY_MS = 2000;
public CrawlCommand(ConsoleView view) {
this.view = view;
this.strategyFactory = new StrategyFactory();
}
@Override
public String getName() {
return "crawl";
}
@Override
public void execute(String[] args, List<Article> articles) {
if (args.length < 1) {
view.printError("Usage: crawl <url>");
return;
}
String url = args[0];
if (!isValidUrl(url)) {
view.printError("无效的URL格式:" + url);
view.printInfo("请输入正确的URL,例如:https://www.example.com");
return;
}
view.printInfo("正在爬取:" + url);
logger.info("开始爬取URL: {}", url);
try {
Document doc = fetchWithRetry(url);
List<Article> parsedArticles = parseWithStrategy(url, doc);
for (Article article : parsedArticles) {
articles.add(article);
}
view.printSuccess("爬取成功!共获取 " + parsedArticles.size() + " 篇文章");
view.printInfo("输入 list 查看");
logger.info("爬取完成,获取 {} 篇文章", parsedArticles.size());
} catch (NetworkException e) {
view.printError("网络请求失败:" + e.getMessage());
logger.error("网络请求失败: {} - {}", e.getUrl(), e.getMessage(), e);
} catch (ParseException e) {
view.printError("解析失败:" + e.getMessage());
logger.error("解析失败: {} - {}", e.getUrl(), e.getMessage(), e);
}
}
private Document fetchWithRetry(String url) throws NetworkException {
int retryCount = 0;
IOException lastException = null;
while (retryCount < MAX_RETRY) {
try {
return Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
.timeout(15000)
.get();
} catch (IOException e) {
lastException = e;
retryCount++;
if (retryCount < MAX_RETRY) {
view.printInfo("重试第 " + retryCount + " 次...");
try {
Thread.sleep(RETRY_DELAY_MS);
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
throw new NetworkException("请求被中断", url, ie);
}
}
}
}
throw new NetworkException("网络请求失败,已重试 " + MAX_RETRY + " 次: " + lastException.getMessage(), url, lastException);
}
private List<Article> parseWithStrategy(String url, Document doc) throws ParseException {
CrawlStrategy strategy = strategyFactory.getStrategy(url);
return strategy.parse(url, doc);
}
@Override
public String getDescription() {
return "crawl <url> - 爬取指定URL的文章";
}
private boolean isValidUrl(String url) {
if (url == null || url.isBlank()) {
return false;
}
return Pattern.matches(URL_PATTERN, url);
}
}

33
W11/src/main/java/com/example/datacollect/command/ExitCommand.java

@ -0,0 +1,33 @@
package com.example.datacollect.command;
import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
public class ExitCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class);
private final ConsoleView view;
public ExitCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "exit";
}
@Override
public void execute(String[] args, List<Article> articles) {
view.printSuccess("Bye!");
logger.info("应用退出");
System.exit(0);
}
@Override
public String getDescription() {
return "exit - 退出程序";
}
}

39
W11/src/main/java/com/example/datacollect/command/HelpCommand.java

@ -0,0 +1,39 @@
package com.example.datacollect.command;
import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
public class HelpCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class);
private final ConsoleView view;
public HelpCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "help";
}
@Override
public void execute(String[] args, List<Article> articles) {
logger.debug("显示帮助信息");
view.printInfo("Commands:");
view.printInfo(" crawl <url> - 爬取指定URL的文章并保存");
view.printInfo(" analyze <url> - 分析URL内容(不保存),输出统计信息");
view.printInfo(" list - 列出已保存的文章");
view.printInfo(" history - 查看命令历史");
view.printInfo(" help - 显示帮助信息");
view.printInfo(" exit - 退出程序");
view.printInfo("Aliases: h=help, c=crawl, l=list, hi=history, e=exit, a=analyze");
}
@Override
public String getDescription() {
return "显示帮助信息";
}
}

46
W11/src/main/java/com/example/datacollect/command/HistoryCommand.java

@ -0,0 +1,46 @@
package com.example.datacollect.command;
import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
public class HistoryCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(HistoryCommand.class);
private static final List<String> commandHistory = new ArrayList<>();
private final ConsoleView view;
public HistoryCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "history";
}
@Override
public void execute(String[] args, List<Article> articles) {
if (commandHistory.isEmpty()) {
view.printInfo("📜 还没有输入过任何命令哦~");
return;
}
logger.debug("显示命令历史,共 {} 条", commandHistory.size());
view.printInfo("📜 你的历史命令列表:");
for (int i = 0; i < commandHistory.size(); i++) {
view.printInfo((i + 1) + ". " + commandHistory.get(i));
}
}
@Override
public String getDescription() {
return "history - 查看你输入过的所有命令";
}
public static void record(String commandLine) {
commandHistory.add(commandLine);
logger.debug("记录命令: {}", commandLine);
}
}

32
W11/src/main/java/com/example/datacollect/command/ListCommand.java

@ -0,0 +1,32 @@
package com.example.datacollect.command;
import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.List;
public class ListCommand implements Command {
private static final Logger logger = LoggerFactory.getLogger(ListCommand.class);
private final ConsoleView view;
public ListCommand(ConsoleView view) {
this.view = view;
}
@Override
public String getName() {
return "list";
}
@Override
public void execute(String[] args, List<Article> articles) {
logger.debug("显示文章列表,共 {} 篇", articles.size());
view.display(articles);
}
@Override
public String getDescription() {
return "list - 显示所有已爬取的文章";
}
}

65
W11/src/main/java/com/example/datacollect/controller/CrawlerController.java

@ -0,0 +1,65 @@
package com.example.datacollect.controller;
import com.example.datacollect.command.*;
import com.example.datacollect.model.Article;
import com.example.datacollect.view.ConsoleView;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class CrawlerController {
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class);
private final Map<String, Command> commands = new HashMap<>();
private final ConsoleView view;
private final List<Article> articles;
private final Map<String, String> aliasMap = new HashMap<>();
public CrawlerController(ConsoleView view, List<Article> articles) {
this.view = view;
this.articles = articles;
register(new HelpCommand(view));
register(new ListCommand(view));
register(new CrawlCommand(view));
register(new ExitCommand(view));
register(new HistoryCommand(view));
register(new AnalyzeCommand(view));
aliasMap.put("h", "help");
aliasMap.put("c", "crawl");
aliasMap.put("l", "list");
aliasMap.put("hi", "history");
aliasMap.put("e", "exit");
aliasMap.put("a", "analyze");
}
private void register(Command command) {
commands.put(command.getName(), command);
}
public void handle(String input) {
if (input == null || input.isBlank()) {
return;
}
HistoryCommand.record(input);
String[] parts = input.split("\\s+", 2);
String commandName = parts[0];
String[] args = parts.length > 1 ? new String[]{parts[1]} : new String[0];
if (aliasMap.containsKey(commandName)) {
commandName = aliasMap.get(commandName);
}
if (commands.containsKey(commandName)) {
logger.debug("执行命令: {} with args: {}", commandName, args.length > 0 ? args[0] : "无参数");
commands.get(commandName).execute(args, articles);
} else {
view.printError("未知命令/别名:" + commandName + ",输入 help 查看所有命令");
logger.warn("未知命令: {}", commandName);
}
}
}

11
W11/src/main/java/com/example/datacollect/exception/CrawlerException.java

@ -0,0 +1,11 @@
package com.example.datacollect.exception;
public class CrawlerException extends Exception {
public CrawlerException(String message) {
super(message);
}
public CrawlerException(String message, Throwable cause) {
super(message, cause);
}
}

19
W11/src/main/java/com/example/datacollect/exception/NetworkException.java

@ -0,0 +1,19 @@
package com.example.datacollect.exception;
public class NetworkException extends CrawlerException {
private final String url;
public NetworkException(String message, String url) {
super(message);
this.url = url;
}
public NetworkException(String message, String url, Throwable cause) {
super(message, cause);
this.url = url;
}
public String getUrl() {
return url;
}
}

19
W11/src/main/java/com/example/datacollect/exception/ParseException.java

@ -0,0 +1,19 @@
package com.example.datacollect.exception;
public class ParseException extends CrawlerException {
private final String url;
public ParseException(String message, String url) {
super(message);
this.url = url;
}
public ParseException(String message, String url, Throwable cause) {
super(message, cause);
this.url = url;
}
public String getUrl() {
return url;
}
}

77
W11/src/main/java/com/example/datacollect/model/Article.java

@ -0,0 +1,77 @@
package com.example.datacollect.model;
public class Article {
private String title;
private String url;
private String content;
private String author;
private String publishDate;
// 修正后的构造方法:参数 → 成员变量
public Article(String title, String url, String content) {
this.title = title;
this.url = url;
this.content = content;
this.author = "";
this.publishDate = "";
}
public Article(String title, String url, String content, String author, String publishDate) {
this.title = title;
this.url = url;
this.content = content;
this.author = author;
this.publishDate = publishDate;
}
public String getTitle() {
return title;
}
public void setTitle(String title) {
this.title = title;
}
public String getUrl() {
return url;
}
public void setUrl(String url) {
this.url = url;
}
public String getContent() {
return content;
}
public void setContent(String content) {
this.content = content;
}
public String getAuthor() {
return author;
}
public void setAuthor(String author) {
this.author = author;
}
public String getPublishDate() {
return publishDate;
}
public void setPublishDate(String publishDate) {
this.publishDate = publishDate;
}
@Override
public String toString() {
return "Article{" +
"title='" + title + '\'' +
", author='" + author + '\'' +
", publishDate='" + publishDate + '\'' +
", url='" + url + '\'' +
", content='" + content + '\'' +
'}';
}
}

29
W11/src/main/java/com/example/datacollect/model/RouteInfo.java

@ -0,0 +1,29 @@
package com.example.datacollect.model;
/**
* 路线信息类存储路线的基本数据
* 包含出发地目的地地图类型交通方式距离时间等信息
*/
public class RouteInfo {
private final String city; // 目的地城市
private final String mapType; // 地图类型(高德/百度)
private final String transportType; // 交通方式(驾车/公交)
private final double distance; // 距离(公里)
private final double time; // 时间(小时)
// 构造方法:初始化所有属性
public RouteInfo(String city, String mapType, String transportType, double distance, double time) {
this.city = city;
this.mapType = mapType;
this.transportType = transportType;
this.distance = distance;
this.time = time;
}
// Getter方法:获取各个属性值
public String getCity() { return city; }
public String getMapType() { return mapType; }
public String getTransportType() { return transportType; }
public double getDistance() { return distance; }
public double getTime() { return time; }
}

72
W11/src/main/java/com/example/datacollect/repository/ArticleRepository.java

@ -0,0 +1,72 @@
package com.example.datacollect.repository;
import com.example.datacollect.model.Article;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class ArticleRepository {
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class);
private final List<Article> articles = new ArrayList<>();
public void add(Article article) {
if (article == null) {
logger.warn("尝试添加空文章对象");
throw new IllegalArgumentException("Article cannot be null");
}
if (article.getTitle() == null || article.getTitle().isBlank()) {
logger.warn("尝试添加标题为空的文章: {}", article.getUrl());
throw new IllegalArgumentException("Article title cannot be null or blank");
}
if (article.getUrl() == null || article.getUrl().isBlank()) {
logger.warn("尝试添加URL为空的文章: {}", article.getTitle());
throw new IllegalArgumentException("Article URL cannot be null or blank");
}
articles.add(article);
logger.debug("添加文章: {}", article.getTitle());
}
public List<Article> getAll() {
logger.debug("获取所有文章,共 {} 篇", articles.size());
return Collections.unmodifiableList(articles);
}
public int size() {
return articles.size();
}
public void clear() {
int count = articles.size();
articles.clear();
logger.info("清空所有文章,共 {} 篇", count);
}
public void addAll(List<Article> articleList) {
if (articleList == null) {
logger.warn("尝试添加空的文章列表");
throw new IllegalArgumentException("Article list cannot be null");
}
if (articleList.isEmpty()) {
logger.debug("添加空的文章列表");
return;
}
for (Article article : articleList) {
add(article);
}
logger.info("批量添加文章,共 {} 篇", articleList.size());
}
public Article getByIndex(int index) {
if (index < 0 || index >= articles.size()) {
logger.warn("无效的索引: {},列表大小: {}", index, articles.size());
throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + articles.size());
}
return articles.get(index);
}
public boolean isEmpty() {
return articles.isEmpty();
}
}

26
W11/src/main/java/com/example/datacollect/strategy/BlogStrategy.java

@ -0,0 +1,26 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class BlogStrategy implements CrawlStrategy {
@Override
public boolean supports(String url) {
return url.contains("blog.example.com");
}
@Override
public List<Article> parse(String url, Document doc) throws ParseException {
List<Article> articles = new ArrayList<>();
Elements titles = doc.select(".post-title");
for (Element e : titles) {
articles.add(new Article(e.text(), url, ""));
}
return articles;
}
}

14
W11/src/main/java/com/example/datacollect/strategy/CrawlStrategy.java

@ -0,0 +1,14 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import java.util.List;
public interface CrawlStrategy {
List<Article> parse(String url, Document doc) throws ParseException;
boolean supports(String url);
default int getPriority() {
return 100;
}
}

50
W11/src/main/java/com/example/datacollect/strategy/HnuNewsStrategy.java

@ -0,0 +1,50 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class HnuNewsStrategy implements CrawlStrategy {
@Override
public boolean supports(String url) {
return url.contains("news.hnu.edu.cn");
}
@Override
public List<Article> parse(String url, Document doc) throws ParseException {
List<Article> articles = new ArrayList<>();
Elements listItems = doc.select("ul.list11 li");
for (Element li : listItems) {
Element link = li.selectFirst("a");
if (link == null) continue;
String articleUrl = link.attr("href");
if (!articleUrl.startsWith("http")) {
articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", "");
}
String title = "";
Element titleEl = link.selectFirst("h4.l2.h4s2");
if (titleEl != null) {
title = titleEl.text().trim();
}
String content = "";
Element contentEl = link.selectFirst("p.l3.ps3");
if (contentEl != null) {
content = contentEl.text().trim();
}
if (!title.isEmpty()) {
articles.add(new Article(title, articleUrl, content));
}
}
return articles;
}
}

40
W11/src/main/java/com/example/datacollect/strategy/Map/AmapPlatform.java

@ -0,0 +1,40 @@
package com.example.datacollect.strategy.Map;
import com.example.datacollect.model.RouteInfo;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import java.io.IOException;
public class AmapPlatform implements MapPlatform {
private final ObjectMapper mapper = new ObjectMapper();
@Override
public String getName() {
return "高德地图";
}
@Override
public String getBaseUrl() {
return "https://restapi.amap.com/v3/direction/";
}
@Override
public String getApiKeyParam() {
return "key";
}
@Override
public RouteInfo parseResponse(String responseBody, String city, String transportType) throws IOException {
JsonNode root = mapper.readTree(responseBody);
if (!root.get("status").asText().equals("1")) {
throw new IOException("API错误: " + root.get("info").asText());
}
JsonNode path = root.get("route").get("paths").get(0);
double distance = path.get("distance").asInt() / 1000.0;
double time = path.get("duration").asInt() / 3600.0;
return new RouteInfo(city, getName(), transportType, distance, time);
}
}

19
W11/src/main/java/com/example/datacollect/strategy/Map/BusStrategy.java

@ -0,0 +1,19 @@
package com.example.datacollect.strategy.Map;
import java.util.HashMap;
import java.util.Map;
public class BusStrategy implements TransportStrategy {
@Override
public String getPath() {
return "transit/integrated";
}
@Override
public Map<String, String> getParams() {
Map<String, String> params = new HashMap<>();
params.put("city", "长沙");
params.put("strategy", "0");
return params;
}
}

18
W11/src/main/java/com/example/datacollect/strategy/Map/DrivingStrategy.java

@ -0,0 +1,18 @@
package com.example.datacollect.strategy.Map;
import java.util.HashMap;
import java.util.Map;
public class DrivingStrategy implements TransportStrategy {
@Override
public String getPath() {
return "driving";
}
@Override
public Map<String, String> getParams() {
Map<String, String> params = new HashMap<>();
params.put("strategy", "0");
return params;
}
}

5
W11/src/main/java/com/example/datacollect/strategy/Map/JsonNode.java

@ -0,0 +1,5 @@
package com.example.datacollect.strategy.Map;
public class JsonNode {
}

11
W11/src/main/java/com/example/datacollect/strategy/Map/MapPlatform.java

@ -0,0 +1,11 @@
package com.example.datacollect.strategy.Map;
import com.example.datacollect.model.RouteInfo;
import java.io.IOException;
public interface MapPlatform {
String getName();
String getBaseUrl();
String getApiKeyParam();
RouteInfo parseResponse(String responseBody, String city, String transportType) throws IOException;
}

9
W11/src/main/java/com/example/datacollect/strategy/Map/TransportStrategy.java

@ -0,0 +1,9 @@
package com.example.datacollect.strategy.Map;
import java.util.Map;
public interface TransportStrategy {
String getPath();
Map<String, String> getParams();
}

26
W11/src/main/java/com/example/datacollect/strategy/NewsStrategy.java

@ -0,0 +1,26 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class NewsStrategy implements CrawlStrategy {
@Override
public boolean supports(String url) {
return url.contains("news.example.com");
}
@Override
public List<Article> parse(String url, Document doc) throws ParseException {
List<Article> articles = new ArrayList<>();
Elements items = doc.select(".article-headline");
for (Element e : items) {
articles.add(new Article(e.text(), url, ""));
}
return articles;
}
}

90
W11/src/main/java/com/example/datacollect/strategy/StrategyFactory.java

@ -0,0 +1,90 @@
package com.example.datacollect.strategy;
import com.example.datacollect.exception.ParseException;
import com.example.datacollect.model.Article;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Pattern;
public class StrategyFactory {
private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class);
private final List<CrawlStrategy> strategies = new ArrayList<>();
private CrawlStrategy defaultStrategy;
public StrategyFactory() {
strategies.add(new HnuNewsStrategy());
strategies.add(new BlogStrategy());
strategies.add(new NewsStrategy());
this.defaultStrategy = new DefaultStrategy();
logger.info("策略工厂初始化,注册了 {} 个策略", strategies.size());
}
public CrawlStrategy getStrategy(String url) {
List<CrawlStrategy> matchingStrategies = new ArrayList<>();
for (CrawlStrategy s : strategies) {
if (s.supports(url)) {
matchingStrategies.add(s);
}
}
if (matchingStrategies.isEmpty()) {
logger.debug("未找到匹配策略,使用默认策略: {}", url);
return defaultStrategy;
}
if (matchingStrategies.size() > 1) {
matchingStrategies.sort(Comparator.comparingInt(CrawlStrategy::getPriority));
logger.debug("找到多个匹配策略,选择优先级最高的: {}", matchingStrategies.get(0).getClass().getSimpleName());
}
return matchingStrategies.get(0);
}
public void register(CrawlStrategy strategy) {
strategies.add(strategy);
logger.info("注册新策略: {}", strategy.getClass().getSimpleName());
}
public void setDefaultStrategy(CrawlStrategy defaultStrategy) {
this.defaultStrategy = defaultStrategy;
logger.info("设置默认策略: {}", defaultStrategy.getClass().getSimpleName());
}
public List<CrawlStrategy> getMatchingStrategies(String url) {
List<CrawlStrategy> matching = new ArrayList<>();
for (CrawlStrategy s : strategies) {
if (s.supports(url)) {
matching.add(s);
}
}
matching.sort(Comparator.comparingInt(CrawlStrategy::getPriority));
return matching;
}
private static class DefaultStrategy implements CrawlStrategy {
private static final Pattern TITLE_PATTERN = Pattern.compile("<title[^>]*>([^<]+)</title>", Pattern.CASE_INSENSITIVE);
@Override
public List<Article> parse(String url, org.jsoup.nodes.Document doc) throws ParseException {
List<Article> articles = new ArrayList<>();
String title = doc.title();
if (title != null && !title.isBlank()) {
articles.add(new Article(title.trim(), url, ""));
}
return articles;
}
@Override
public boolean supports(String url) {
return true;
}
@Override
public int getPriority() {
return Integer.MAX_VALUE;
}
}
}

42
W11/src/main/java/com/example/datacollect/view/ConsoleView.java

@ -0,0 +1,42 @@
package com.example.datacollect.view;
import com.example.datacollect.model.Article;
import java.util.List;
import java.util.Scanner;
public class ConsoleView {
private static final String ANSI_RESET = "\u001B[0m";
private static final String ANSI_GREEN = "\u001B[32m";
private static final String ANSI_RED = "\u001B[31m";
private static final String ANSI_BLUE = "\u001B[34m";
private final Scanner scanner = new Scanner(System.in);
public String readLine() {
System.out.print("> ");
return scanner.nextLine();
}
public void printSuccess(String msg) {
System.out.println(ANSI_GREEN + msg + ANSI_RESET);
}
public void printError(String msg) {
System.out.println(ANSI_RED + msg + ANSI_RESET);
}
public void printInfo(String msg) {
System.out.println(ANSI_BLUE + msg + ANSI_RESET);
}
public void display(List<Article> articles) {
if (articles.isEmpty()) {
printInfo("暂无文章,请先执行 crawl。");
return;
}
for (int i = 0; i < articles.size(); i++) {
Article a = articles.get(i);
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl());
}
}
}

31
W11/src/main/resources/logback.xml

@ -0,0 +1,31 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property name="LOG_PATH" value="./logs"/>
<property name="APP_NAME" value="crawler"/>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_PATH}/${APP_NAME}.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_PATH}/${APP_NAME}.%d{yyyy-MM-dd}.log</fileNamePattern>
<maxHistory>30</maxHistory>
</rollingPolicy>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<logger name="com.example.datacollect" level="DEBUG"/>
<root level="INFO">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
</root>
</configuration>

31
W11/target/classes/logback.xml

@ -0,0 +1,31 @@
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property name="LOG_PATH" value="./logs"/>
<property name="APP_NAME" value="crawler"/>
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender">
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender">
<file>${LOG_PATH}/${APP_NAME}.log</file>
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy">
<fileNamePattern>${LOG_PATH}/${APP_NAME}.%d{yyyy-MM-dd}.log</fileNamePattern>
<maxHistory>30</maxHistory>
</rollingPolicy>
<encoder>
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
<charset>UTF-8</charset>
</encoder>
</appender>
<logger name="com.example.datacollect" level="DEBUG"/>
<root level="INFO">
<appender-ref ref="CONSOLE"/>
<appender-ref ref="FILE"/>
</root>
</configuration>

3
W11/target/maven-archiver/pom.properties

@ -0,0 +1,3 @@
artifactId=W9
groupId=com.example
version=0.1.0

28
W11/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst

@ -0,0 +1,28 @@
com\example\datacollect\command\ListCommand.class
com\example\datacollect\command\CrawlCommand.class
com\example\datacollect\view\ConsoleView.class
com\example\datacollect\strategy\NewsStrategy.class
com\example\datacollect\command\Command.class
com\example\datacollect\exception\CrawlerException.class
com\example\datacollect\exception\NetworkException.class
com\example\datacollect\command\AnalyzeCommand.class
com\example\datacollect\strategy\Map\TransportStrategy.class
com\example\datacollect\strategy\CrawlStrategy.class
com\example\datacollect\model\Article.class
com\example\datacollect\strategy\BlogStrategy.class
com\example\datacollect\strategy\StrategyFactory$DefaultStrategy.class
com\example\datacollect\strategy\Map\JsonNode.class
com\example\datacollect\repository\ArticleRepository.class
com\example\datacollect\strategy\Map\BusStrategy.class
com\example\datacollect\Main.class
com\example\datacollect\command\ExitCommand.class
com\example\datacollect\command\HelpCommand.class
com\example\datacollect\command\HistoryCommand.class
com\example\datacollect\controller\CrawlerController.class
com\example\datacollect\strategy\Map\MapPlatform.class
com\example\datacollect\strategy\Map\DrivingStrategy.class
com\example\datacollect\strategy\StrategyFactory.class
com\example\datacollect\strategy\HnuNewsStrategy.class
com\example\datacollect\exception\ParseException.class
com\example\datacollect\model\RouteInfo.class
com\example\datacollect\strategy\Map\AmapPlatform.class

27
W11/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst

@ -0,0 +1,27 @@
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\HistoryCommand.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\AnalyzeCommand.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\exception\NetworkException.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\BusStrategy.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\CrawlCommand.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\BlogStrategy.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\ListCommand.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\DrivingStrategy.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\TransportStrategy.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\JsonNode.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\Main.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\AmapPlatform.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\HelpCommand.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\StrategyFactory.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\model\Article.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\view\ConsoleView.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\exception\CrawlerException.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\model\RouteInfo.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\MapPlatform.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\ExitCommand.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\NewsStrategy.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\repository\ArticleRepository.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\Command.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\exception\ParseException.java
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\controller\CrawlerController.java
Loading…
Cancel
Save