45 changed files with 1381 additions and 0 deletions
@ -0,0 +1,4 @@ |
|||
*.jar |
|||
*.jar |
|||
*.class |
|||
*.log |
|||
@ -0,0 +1,10 @@ |
|||
# 默认忽略的文件 |
|||
/shelf/ |
|||
/workspace.xml |
|||
# 已忽略包含查询文件的默认文件夹 |
|||
/queries/ |
|||
# Datasource local storage ignored files |
|||
/dataSources/ |
|||
/dataSources.local.xml |
|||
# 基于编辑器的 HTTP 客户端请求 |
|||
/httpRequests/ |
|||
@ -0,0 +1 @@ |
|||
Command.java |
|||
@ -0,0 +1,13 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="CompilerConfiguration"> |
|||
<annotationProcessing> |
|||
<profile name="Maven default annotation processors profile" enabled="true"> |
|||
<sourceOutputDir name="target/generated-sources/annotations" /> |
|||
<sourceTestOutputDir name="target/generated-test-sources/test-annotations" /> |
|||
<outputRelativeToContentRoot value="true" /> |
|||
<module name="W9" /> |
|||
</profile> |
|||
</annotationProcessing> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,20 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="RemoteRepositoriesConfiguration"> |
|||
<remote-repository> |
|||
<option name="id" value="central" /> |
|||
<option name="name" value="Central Repository" /> |
|||
<option name="url" value="https://repo.maven.apache.org/maven2" /> |
|||
</remote-repository> |
|||
<remote-repository> |
|||
<option name="id" value="central" /> |
|||
<option name="name" value="Maven Central repository" /> |
|||
<option name="url" value="https://repo1.maven.org/maven2" /> |
|||
</remote-repository> |
|||
<remote-repository> |
|||
<option name="id" value="jboss.community" /> |
|||
<option name="name" value="JBoss Community repository" /> |
|||
<option name="url" value="https://repository.jboss.org/nexus/content/repositories/public/" /> |
|||
</remote-repository> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,12 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="ExternalStorageConfigurationManager" enabled="true" /> |
|||
<component name="MavenProjectsManager"> |
|||
<option name="originalFiles"> |
|||
<list> |
|||
<option value="$PROJECT_DIR$/pom.xml" /> |
|||
</list> |
|||
</option> |
|||
</component> |
|||
<component name="ProjectRootManager" version="2" languageLevel="JDK_25" default="true" project-jdk-name="25" project-jdk-type="JavaSDK" /> |
|||
</project> |
|||
@ -0,0 +1,6 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="VcsDirectoryMappings"> |
|||
<mapping directory="$PROJECT_DIR$/.." vcs="Git" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,71 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" |
|||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
|
|||
<groupId>com.example</groupId> |
|||
<artifactId>W9</artifactId> |
|||
<version>0.1.0</version> |
|||
|
|||
<properties> |
|||
<maven.compiler.source>11</maven.compiler.source> |
|||
<maven.compiler.target>11</maven.compiler.target> |
|||
</properties> |
|||
|
|||
<!-- 新增:所有依赖必须放在这里 --> |
|||
<dependencies> |
|||
<!-- Jsoup 爬虫依赖 --> |
|||
<dependency> |
|||
<groupId>org.jsoup</groupId> |
|||
<artifactId>jsoup</artifactId> |
|||
<version>1.17.2</version> |
|||
</dependency> |
|||
<!-- Jackson JSON 解析依赖 --> |
|||
<dependency> |
|||
<groupId>com.fasterxml.jackson.core</groupId> |
|||
<artifactId>jackson-databind</artifactId> |
|||
<version>2.15.2</version> |
|||
</dependency> |
|||
<!-- Logback 日志框架依赖 --> |
|||
<dependency> |
|||
<groupId>ch.qos.logback</groupId> |
|||
<artifactId>logback-classic</artifactId> |
|||
<version>1.4.11</version> |
|||
</dependency> |
|||
</dependencies> |
|||
|
|||
<build> |
|||
<plugins> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-compiler-plugin</artifactId> |
|||
<version>3.8.1</version> |
|||
</plugin> |
|||
<plugin> |
|||
<groupId>org.apache.maven.plugins</groupId> |
|||
<artifactId>maven-assembly-plugin</artifactId> |
|||
<version>3.3.0</version> |
|||
<configuration> |
|||
<archive> |
|||
<manifest> |
|||
<mainClass>com.example.datacollect.Main</mainClass> |
|||
</manifest> |
|||
</archive> |
|||
<descriptorRefs> |
|||
<descriptorRef>jar-with-dependencies</descriptorRef> |
|||
</descriptorRefs> |
|||
</configuration> |
|||
<executions> |
|||
<execution> |
|||
<id>make-assembly</id> |
|||
<phase>package</phase> |
|||
<goals> |
|||
<goal>single</goal> |
|||
</goals> |
|||
</execution> |
|||
</executions> |
|||
</plugin> |
|||
</plugins> |
|||
</build> |
|||
</project> |
|||
@ -0,0 +1,10 @@ |
|||
# 默认忽略的文件 |
|||
/shelf/ |
|||
/workspace.xml |
|||
# 已忽略包含查询文件的默认文件夹 |
|||
/queries/ |
|||
# Datasource local storage ignored files |
|||
/dataSources/ |
|||
/dataSources.local.xml |
|||
# 基于编辑器的 HTTP 客户端请求 |
|||
/httpRequests/ |
|||
@ -0,0 +1,6 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="ProjectRootManager" version="2" languageLevel="JDK_25" default="true" project-jdk-name="25" project-jdk-type="JavaSDK"> |
|||
<output url="file://$PROJECT_DIR$/out" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,8 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="ProjectModuleManager"> |
|||
<modules> |
|||
<module fileurl="file://$PROJECT_DIR$/java-cli.iml" filepath="$PROJECT_DIR$/java-cli.iml" /> |
|||
</modules> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,6 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project version="4"> |
|||
<component name="VcsDirectoryMappings"> |
|||
<mapping directory="$PROJECT_DIR$/../.." vcs="Git" /> |
|||
</component> |
|||
</project> |
|||
@ -0,0 +1,11 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<module type="JAVA_MODULE" version="4"> |
|||
<component name="NewModuleRootManager" inherit-compiler-output="true"> |
|||
<exclude-output /> |
|||
<content url="file://$MODULE_DIR$"> |
|||
<sourceFolder url="file://$MODULE_DIR$/main/java" isTestSource="false" /> |
|||
</content> |
|||
<orderEntry type="inheritedJdk" /> |
|||
<orderEntry type="sourceFolder" forTests="false" /> |
|||
</component> |
|||
</module> |
|||
@ -0,0 +1,26 @@ |
|||
package com.example.datacollect; |
|||
|
|||
import com.example.datacollect.controller.CrawlerController; |
|||
import com.example.datacollect.model.Article; |
|||
import com.example.datacollect.view.ConsoleView; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class Main { |
|||
private static final Logger logger = LoggerFactory.getLogger(Main.class); |
|||
|
|||
public static void main(String[] args) { |
|||
logger.info("应用启动"); |
|||
ConsoleView view = new ConsoleView(); |
|||
List<Article> articles = new ArrayList<>(); |
|||
CrawlerController controller = new CrawlerController(view, articles); |
|||
|
|||
view.printSuccess("Welcome to CLI Crawler (W11)! Type help for commands."); |
|||
logger.info("CLI Crawler 启动成功"); |
|||
while (true) { |
|||
controller.handle(view.readLine()); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,135 @@ |
|||
package com.example.datacollect.command; |
|||
|
|||
import com.example.datacollect.exception.ParseException; |
|||
import com.example.datacollect.model.Article; |
|||
import com.example.datacollect.strategy.CrawlStrategy; |
|||
import com.example.datacollect.strategy.StrategyFactory; |
|||
import com.example.datacollect.view.ConsoleView; |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.List; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class AnalyzeCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class); |
|||
private final ConsoleView view; |
|||
private final StrategyFactory strategyFactory; |
|||
private static final String URL_PATTERN = |
|||
"^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?$"; |
|||
|
|||
public AnalyzeCommand(ConsoleView view) { |
|||
this.view = view; |
|||
this.strategyFactory = new StrategyFactory(); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "analyze"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
if (args.length < 1) { |
|||
view.printError("Usage: analyze <url>"); |
|||
return; |
|||
} |
|||
|
|||
String url = args[0]; |
|||
|
|||
if (!isValidUrl(url)) { |
|||
view.printError("无效的URL格式:" + url); |
|||
view.printInfo("请输入正确的URL,例如:https://www.example.com"); |
|||
return; |
|||
} |
|||
|
|||
view.printInfo("正在分析:" + url); |
|||
logger.info("开始分析URL: {}", url); |
|||
|
|||
try { |
|||
Document doc = Jsoup.connect(url) |
|||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") |
|||
.timeout(15000) |
|||
.get(); |
|||
|
|||
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
|||
if (strategy == null) { |
|||
view.printError("未找到匹配的解析策略"); |
|||
logger.warn("未找到匹配的解析策略: {}", url); |
|||
return; |
|||
} |
|||
|
|||
view.printInfo("匹配策略:" + strategy.getClass().getSimpleName()); |
|||
logger.debug("匹配策略: {}", strategy.getClass().getSimpleName()); |
|||
|
|||
List<Article> parsedArticles = strategy.parse(url, doc); |
|||
|
|||
view.printSuccess("分析完成!"); |
|||
view.printInfo("📊 统计信息:"); |
|||
view.printInfo(" - 解析到文章数量:" + parsedArticles.size()); |
|||
logger.info("分析完成,解析到 {} 篇文章", parsedArticles.size()); |
|||
|
|||
if (!parsedArticles.isEmpty()) { |
|||
int totalContentLength = 0; |
|||
int titlesWithContent = 0; |
|||
int titlesWithoutContent = 0; |
|||
|
|||
for (Article article : parsedArticles) { |
|||
if (article.getContent() != null && !article.getContent().isBlank()) { |
|||
totalContentLength += article.getContent().length(); |
|||
titlesWithContent++; |
|||
} else { |
|||
titlesWithoutContent++; |
|||
} |
|||
} |
|||
|
|||
view.printInfo(" - 包含内容的文章:" + titlesWithContent); |
|||
view.printInfo(" - 仅标题的文章:" + titlesWithoutContent); |
|||
view.printInfo(" - 总内容长度:" + totalContentLength + " 字符"); |
|||
|
|||
if (titlesWithContent > 0) { |
|||
int avgLength = totalContentLength / titlesWithContent; |
|||
view.printInfo(" - 平均内容长度:" + avgLength + " 字符"); |
|||
} |
|||
|
|||
view.printInfo("\n📝 文章标题列表:"); |
|||
for (int i = 0; i < Math.min(parsedArticles.size(), 10); i++) { |
|||
Article article = parsedArticles.get(i); |
|||
String title = article.getTitle(); |
|||
if (title.length() > 40) { |
|||
title = title.substring(0, 37) + "..."; |
|||
} |
|||
view.printInfo(" " + (i + 1) + ". " + title); |
|||
} |
|||
|
|||
if (parsedArticles.size() > 10) { |
|||
view.printInfo(" ... 还有 " + (parsedArticles.size() - 10) + " 篇文章"); |
|||
} |
|||
} |
|||
|
|||
view.printInfo("\n💡 提示:分析结果未保存,如需保存请使用 crawl 命令"); |
|||
|
|||
} catch (IOException e) { |
|||
view.printError("分析失败:" + e.getMessage()); |
|||
logger.error("分析失败: {} - {}", url, e.getMessage(), e); |
|||
} catch (ParseException e) { |
|||
view.printError("解析失败:" + e.getMessage()); |
|||
logger.error("解析失败: {} - {}", e.getUrl(), e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "analyze <url> - 分析URL内容(不保存),输出统计信息"; |
|||
} |
|||
|
|||
private boolean isValidUrl(String url) { |
|||
if (url == null || url.isBlank()) { |
|||
return false; |
|||
} |
|||
return Pattern.matches(URL_PATTERN, url); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.example.datacollect.command; |
|||
|
|||
import com.example.datacollect.model.Article; |
|||
import java.util.List; |
|||
|
|||
public interface Command { |
|||
String getName(); |
|||
void execute(String[] args, List<Article> articles); |
|||
|
|||
String getDescription(); |
|||
} |
|||
@ -0,0 +1,119 @@ |
|||
package com.example.datacollect.command; |
|||
|
|||
import com.example.datacollect.exception.NetworkException; |
|||
import com.example.datacollect.exception.ParseException; |
|||
import com.example.datacollect.model.Article; |
|||
import com.example.datacollect.strategy.CrawlStrategy; |
|||
import com.example.datacollect.strategy.StrategyFactory; |
|||
import com.example.datacollect.view.ConsoleView; |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.io.IOException; |
|||
import java.util.List; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class CrawlCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); |
|||
private final ConsoleView view; |
|||
private final StrategyFactory strategyFactory; |
|||
private static final String URL_PATTERN = |
|||
"^(https?://)?([\\w-]+\\.)+[\\w-]+(/[\\w-./?%&=]*)?$"; |
|||
private static final int MAX_RETRY = 3; |
|||
private static final long RETRY_DELAY_MS = 2000; |
|||
|
|||
public CrawlCommand(ConsoleView view) { |
|||
this.view = view; |
|||
this.strategyFactory = new StrategyFactory(); |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "crawl"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
if (args.length < 1) { |
|||
view.printError("Usage: crawl <url>"); |
|||
return; |
|||
} |
|||
|
|||
String url = args[0]; |
|||
|
|||
if (!isValidUrl(url)) { |
|||
view.printError("无效的URL格式:" + url); |
|||
view.printInfo("请输入正确的URL,例如:https://www.example.com"); |
|||
return; |
|||
} |
|||
|
|||
view.printInfo("正在爬取:" + url); |
|||
logger.info("开始爬取URL: {}", url); |
|||
|
|||
try { |
|||
Document doc = fetchWithRetry(url); |
|||
List<Article> parsedArticles = parseWithStrategy(url, doc); |
|||
|
|||
for (Article article : parsedArticles) { |
|||
articles.add(article); |
|||
} |
|||
|
|||
view.printSuccess("爬取成功!共获取 " + parsedArticles.size() + " 篇文章"); |
|||
view.printInfo("输入 list 查看"); |
|||
logger.info("爬取完成,获取 {} 篇文章", parsedArticles.size()); |
|||
|
|||
} catch (NetworkException e) { |
|||
view.printError("网络请求失败:" + e.getMessage()); |
|||
logger.error("网络请求失败: {} - {}", e.getUrl(), e.getMessage(), e); |
|||
} catch (ParseException e) { |
|||
view.printError("解析失败:" + e.getMessage()); |
|||
logger.error("解析失败: {} - {}", e.getUrl(), e.getMessage(), e); |
|||
} |
|||
} |
|||
|
|||
private Document fetchWithRetry(String url) throws NetworkException { |
|||
int retryCount = 0; |
|||
IOException lastException = null; |
|||
|
|||
while (retryCount < MAX_RETRY) { |
|||
try { |
|||
return Jsoup.connect(url) |
|||
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") |
|||
.timeout(15000) |
|||
.get(); |
|||
} catch (IOException e) { |
|||
lastException = e; |
|||
retryCount++; |
|||
if (retryCount < MAX_RETRY) { |
|||
view.printInfo("重试第 " + retryCount + " 次..."); |
|||
try { |
|||
Thread.sleep(RETRY_DELAY_MS); |
|||
} catch (InterruptedException ie) { |
|||
Thread.currentThread().interrupt(); |
|||
throw new NetworkException("请求被中断", url, ie); |
|||
} |
|||
} |
|||
} |
|||
} |
|||
|
|||
throw new NetworkException("网络请求失败,已重试 " + MAX_RETRY + " 次: " + lastException.getMessage(), url, lastException); |
|||
} |
|||
|
|||
private List<Article> parseWithStrategy(String url, Document doc) throws ParseException { |
|||
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
|||
return strategy.parse(url, doc); |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "crawl <url> - 爬取指定URL的文章"; |
|||
} |
|||
|
|||
private boolean isValidUrl(String url) { |
|||
if (url == null || url.isBlank()) { |
|||
return false; |
|||
} |
|||
return Pattern.matches(URL_PATTERN, url); |
|||
} |
|||
} |
|||
@ -0,0 +1,33 @@ |
|||
package com.example.datacollect.command; |
|||
|
|||
import com.example.datacollect.model.Article; |
|||
import com.example.datacollect.view.ConsoleView; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.util.List; |
|||
|
|||
public class ExitCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class); |
|||
private final ConsoleView view; |
|||
|
|||
public ExitCommand(ConsoleView view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "exit"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
view.printSuccess("Bye!"); |
|||
logger.info("应用退出"); |
|||
System.exit(0); |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "exit - 退出程序"; |
|||
} |
|||
} |
|||
@ -0,0 +1,39 @@ |
|||
package com.example.datacollect.command; |
|||
|
|||
import com.example.datacollect.model.Article; |
|||
import com.example.datacollect.view.ConsoleView; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.util.List; |
|||
|
|||
public class HelpCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class); |
|||
private final ConsoleView view; |
|||
|
|||
public HelpCommand(ConsoleView view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "help"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
logger.debug("显示帮助信息"); |
|||
view.printInfo("Commands:"); |
|||
view.printInfo(" crawl <url> - 爬取指定URL的文章并保存"); |
|||
view.printInfo(" analyze <url> - 分析URL内容(不保存),输出统计信息"); |
|||
view.printInfo(" list - 列出已保存的文章"); |
|||
view.printInfo(" history - 查看命令历史"); |
|||
view.printInfo(" help - 显示帮助信息"); |
|||
view.printInfo(" exit - 退出程序"); |
|||
view.printInfo("Aliases: h=help, c=crawl, l=list, hi=history, e=exit, a=analyze"); |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "显示帮助信息"; |
|||
} |
|||
} |
|||
@ -0,0 +1,46 @@ |
|||
package com.example.datacollect.command; |
|||
|
|||
import com.example.datacollect.model.Article; |
|||
import com.example.datacollect.view.ConsoleView; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class HistoryCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(HistoryCommand.class); |
|||
private static final List<String> commandHistory = new ArrayList<>(); |
|||
private final ConsoleView view; |
|||
|
|||
public HistoryCommand(ConsoleView view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "history"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
if (commandHistory.isEmpty()) { |
|||
view.printInfo("📜 还没有输入过任何命令哦~"); |
|||
return; |
|||
} |
|||
logger.debug("显示命令历史,共 {} 条", commandHistory.size()); |
|||
view.printInfo("📜 你的历史命令列表:"); |
|||
for (int i = 0; i < commandHistory.size(); i++) { |
|||
view.printInfo((i + 1) + ". " + commandHistory.get(i)); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "history - 查看你输入过的所有命令"; |
|||
} |
|||
|
|||
public static void record(String commandLine) { |
|||
commandHistory.add(commandLine); |
|||
logger.debug("记录命令: {}", commandLine); |
|||
} |
|||
} |
|||
@ -0,0 +1,32 @@ |
|||
package com.example.datacollect.command; |
|||
|
|||
import com.example.datacollect.model.Article; |
|||
import com.example.datacollect.view.ConsoleView; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.util.List; |
|||
|
|||
public class ListCommand implements Command { |
|||
private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); |
|||
private final ConsoleView view; |
|||
|
|||
public ListCommand(ConsoleView view) { |
|||
this.view = view; |
|||
} |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "list"; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args, List<Article> articles) { |
|||
logger.debug("显示文章列表,共 {} 篇", articles.size()); |
|||
view.display(articles); |
|||
} |
|||
|
|||
@Override |
|||
public String getDescription() { |
|||
return "list - 显示所有已爬取的文章"; |
|||
} |
|||
} |
|||
@ -0,0 +1,65 @@ |
|||
package com.example.datacollect.controller; |
|||
|
|||
import com.example.datacollect.command.*; |
|||
import com.example.datacollect.model.Article; |
|||
import com.example.datacollect.view.ConsoleView; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.util.HashMap; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
public class CrawlerController { |
|||
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); |
|||
private final Map<String, Command> commands = new HashMap<>(); |
|||
private final ConsoleView view; |
|||
private final List<Article> articles; |
|||
private final Map<String, String> aliasMap = new HashMap<>(); |
|||
|
|||
public CrawlerController(ConsoleView view, List<Article> articles) { |
|||
this.view = view; |
|||
this.articles = articles; |
|||
|
|||
register(new HelpCommand(view)); |
|||
register(new ListCommand(view)); |
|||
register(new CrawlCommand(view)); |
|||
register(new ExitCommand(view)); |
|||
register(new HistoryCommand(view)); |
|||
register(new AnalyzeCommand(view)); |
|||
|
|||
aliasMap.put("h", "help"); |
|||
aliasMap.put("c", "crawl"); |
|||
aliasMap.put("l", "list"); |
|||
aliasMap.put("hi", "history"); |
|||
aliasMap.put("e", "exit"); |
|||
aliasMap.put("a", "analyze"); |
|||
} |
|||
|
|||
private void register(Command command) { |
|||
commands.put(command.getName(), command); |
|||
} |
|||
|
|||
public void handle(String input) { |
|||
if (input == null || input.isBlank()) { |
|||
return; |
|||
} |
|||
|
|||
HistoryCommand.record(input); |
|||
|
|||
String[] parts = input.split("\\s+", 2); |
|||
String commandName = parts[0]; |
|||
String[] args = parts.length > 1 ? new String[]{parts[1]} : new String[0]; |
|||
|
|||
if (aliasMap.containsKey(commandName)) { |
|||
commandName = aliasMap.get(commandName); |
|||
} |
|||
|
|||
if (commands.containsKey(commandName)) { |
|||
logger.debug("执行命令: {} with args: {}", commandName, args.length > 0 ? args[0] : "无参数"); |
|||
commands.get(commandName).execute(args, articles); |
|||
} else { |
|||
view.printError("未知命令/别名:" + commandName + ",输入 help 查看所有命令"); |
|||
logger.warn("未知命令: {}", commandName); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.example.datacollect.exception; |
|||
|
|||
public class CrawlerException extends Exception { |
|||
public CrawlerException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public CrawlerException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package com.example.datacollect.exception; |
|||
|
|||
public class NetworkException extends CrawlerException { |
|||
private final String url; |
|||
|
|||
public NetworkException(String message, String url) { |
|||
super(message); |
|||
this.url = url; |
|||
} |
|||
|
|||
public NetworkException(String message, String url, Throwable cause) { |
|||
super(message, cause); |
|||
this.url = url; |
|||
} |
|||
|
|||
public String getUrl() { |
|||
return url; |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package com.example.datacollect.exception; |
|||
|
|||
public class ParseException extends CrawlerException { |
|||
private final String url; |
|||
|
|||
public ParseException(String message, String url) { |
|||
super(message); |
|||
this.url = url; |
|||
} |
|||
|
|||
public ParseException(String message, String url, Throwable cause) { |
|||
super(message, cause); |
|||
this.url = url; |
|||
} |
|||
|
|||
public String getUrl() { |
|||
return url; |
|||
} |
|||
} |
|||
@ -0,0 +1,77 @@ |
|||
package com.example.datacollect.model; |
|||
|
|||
public class Article { |
|||
private String title; |
|||
private String url; |
|||
private String content; |
|||
private String author; |
|||
private String publishDate; |
|||
|
|||
// 修正后的构造方法:参数 → 成员变量
|
|||
public Article(String title, String url, String content) { |
|||
this.title = title; |
|||
this.url = url; |
|||
this.content = content; |
|||
this.author = ""; |
|||
this.publishDate = ""; |
|||
} |
|||
|
|||
public Article(String title, String url, String content, String author, String publishDate) { |
|||
this.title = title; |
|||
this.url = url; |
|||
this.content = content; |
|||
this.author = author; |
|||
this.publishDate = publishDate; |
|||
} |
|||
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public void setTitle(String title) { |
|||
this.title = title; |
|||
} |
|||
|
|||
public String getUrl() { |
|||
return url; |
|||
} |
|||
|
|||
public void setUrl(String url) { |
|||
this.url = url; |
|||
} |
|||
|
|||
public String getContent() { |
|||
return content; |
|||
} |
|||
|
|||
public void setContent(String content) { |
|||
this.content = content; |
|||
} |
|||
|
|||
public String getAuthor() { |
|||
return author; |
|||
} |
|||
|
|||
public void setAuthor(String author) { |
|||
this.author = author; |
|||
} |
|||
|
|||
public String getPublishDate() { |
|||
return publishDate; |
|||
} |
|||
|
|||
public void setPublishDate(String publishDate) { |
|||
this.publishDate = publishDate; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "Article{" + |
|||
"title='" + title + '\'' + |
|||
", author='" + author + '\'' + |
|||
", publishDate='" + publishDate + '\'' + |
|||
", url='" + url + '\'' + |
|||
", content='" + content + '\'' + |
|||
'}'; |
|||
} |
|||
} |
|||
@ -0,0 +1,29 @@ |
|||
package com.example.datacollect.model; |
|||
|
|||
/** |
|||
* 路线信息类:存储路线的基本数据 |
|||
* 包含出发地、目的地、地图类型、交通方式、距离、时间等信息 |
|||
*/ |
|||
public class RouteInfo { |
|||
private final String city; // 目的地城市
|
|||
private final String mapType; // 地图类型(高德/百度)
|
|||
private final String transportType; // 交通方式(驾车/公交)
|
|||
private final double distance; // 距离(公里)
|
|||
private final double time; // 时间(小时)
|
|||
|
|||
// 构造方法:初始化所有属性
|
|||
public RouteInfo(String city, String mapType, String transportType, double distance, double time) { |
|||
this.city = city; |
|||
this.mapType = mapType; |
|||
this.transportType = transportType; |
|||
this.distance = distance; |
|||
this.time = time; |
|||
} |
|||
|
|||
// Getter方法:获取各个属性值
|
|||
public String getCity() { return city; } |
|||
public String getMapType() { return mapType; } |
|||
public String getTransportType() { return transportType; } |
|||
public double getDistance() { return distance; } |
|||
public double getTime() { return time; } |
|||
} |
|||
@ -0,0 +1,72 @@ |
|||
package com.example.datacollect.repository; |
|||
|
|||
import com.example.datacollect.model.Article; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.util.ArrayList; |
|||
import java.util.Collections; |
|||
import java.util.List; |
|||
|
|||
public class ArticleRepository { |
|||
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); |
|||
private final List<Article> articles = new ArrayList<>(); |
|||
|
|||
public void add(Article article) { |
|||
if (article == null) { |
|||
logger.warn("尝试添加空文章对象"); |
|||
throw new IllegalArgumentException("Article cannot be null"); |
|||
} |
|||
if (article.getTitle() == null || article.getTitle().isBlank()) { |
|||
logger.warn("尝试添加标题为空的文章: {}", article.getUrl()); |
|||
throw new IllegalArgumentException("Article title cannot be null or blank"); |
|||
} |
|||
if (article.getUrl() == null || article.getUrl().isBlank()) { |
|||
logger.warn("尝试添加URL为空的文章: {}", article.getTitle()); |
|||
throw new IllegalArgumentException("Article URL cannot be null or blank"); |
|||
} |
|||
articles.add(article); |
|||
logger.debug("添加文章: {}", article.getTitle()); |
|||
} |
|||
|
|||
public List<Article> getAll() { |
|||
logger.debug("获取所有文章,共 {} 篇", articles.size()); |
|||
return Collections.unmodifiableList(articles); |
|||
} |
|||
|
|||
public int size() { |
|||
return articles.size(); |
|||
} |
|||
|
|||
public void clear() { |
|||
int count = articles.size(); |
|||
articles.clear(); |
|||
logger.info("清空所有文章,共 {} 篇", count); |
|||
} |
|||
|
|||
public void addAll(List<Article> articleList) { |
|||
if (articleList == null) { |
|||
logger.warn("尝试添加空的文章列表"); |
|||
throw new IllegalArgumentException("Article list cannot be null"); |
|||
} |
|||
if (articleList.isEmpty()) { |
|||
logger.debug("添加空的文章列表"); |
|||
return; |
|||
} |
|||
for (Article article : articleList) { |
|||
add(article); |
|||
} |
|||
logger.info("批量添加文章,共 {} 篇", articleList.size()); |
|||
} |
|||
|
|||
public Article getByIndex(int index) { |
|||
if (index < 0 || index >= articles.size()) { |
|||
logger.warn("无效的索引: {},列表大小: {}", index, articles.size()); |
|||
throw new IndexOutOfBoundsException("Index: " + index + ", Size: " + articles.size()); |
|||
} |
|||
return articles.get(index); |
|||
} |
|||
|
|||
public boolean isEmpty() { |
|||
return articles.isEmpty(); |
|||
} |
|||
} |
|||
@ -0,0 +1,26 @@ |
|||
package com.example.datacollect.strategy; |
|||
|
|||
import com.example.datacollect.exception.ParseException; |
|||
import com.example.datacollect.model.Article; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class BlogStrategy implements CrawlStrategy { |
|||
@Override |
|||
public boolean supports(String url) { |
|||
return url.contains("blog.example.com"); |
|||
} |
|||
|
|||
@Override |
|||
public List<Article> parse(String url, Document doc) throws ParseException { |
|||
List<Article> articles = new ArrayList<>(); |
|||
Elements titles = doc.select(".post-title"); |
|||
for (Element e : titles) { |
|||
articles.add(new Article(e.text(), url, "")); |
|||
} |
|||
return articles; |
|||
} |
|||
} |
|||
@ -0,0 +1,14 @@ |
|||
package com.example.datacollect.strategy; |
|||
|
|||
import com.example.datacollect.exception.ParseException; |
|||
import com.example.datacollect.model.Article; |
|||
import org.jsoup.nodes.Document; |
|||
import java.util.List; |
|||
|
|||
public interface CrawlStrategy { |
|||
List<Article> parse(String url, Document doc) throws ParseException; |
|||
boolean supports(String url); |
|||
default int getPriority() { |
|||
return 100; |
|||
} |
|||
} |
|||
@ -0,0 +1,50 @@ |
|||
package com.example.datacollect.strategy; |
|||
|
|||
import com.example.datacollect.exception.ParseException; |
|||
import com.example.datacollect.model.Article; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class HnuNewsStrategy implements CrawlStrategy { |
|||
@Override |
|||
public boolean supports(String url) { |
|||
return url.contains("news.hnu.edu.cn"); |
|||
} |
|||
|
|||
@Override |
|||
public List<Article> parse(String url, Document doc) throws ParseException { |
|||
List<Article> articles = new ArrayList<>(); |
|||
Elements listItems = doc.select("ul.list11 li"); |
|||
|
|||
for (Element li : listItems) { |
|||
Element link = li.selectFirst("a"); |
|||
if (link == null) continue; |
|||
|
|||
String articleUrl = link.attr("href"); |
|||
if (!articleUrl.startsWith("http")) { |
|||
articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", ""); |
|||
} |
|||
|
|||
String title = ""; |
|||
Element titleEl = link.selectFirst("h4.l2.h4s2"); |
|||
if (titleEl != null) { |
|||
title = titleEl.text().trim(); |
|||
} |
|||
|
|||
String content = ""; |
|||
Element contentEl = link.selectFirst("p.l3.ps3"); |
|||
if (contentEl != null) { |
|||
content = contentEl.text().trim(); |
|||
} |
|||
|
|||
if (!title.isEmpty()) { |
|||
articles.add(new Article(title, articleUrl, content)); |
|||
} |
|||
} |
|||
|
|||
return articles; |
|||
} |
|||
} |
|||
@ -0,0 +1,40 @@ |
|||
package com.example.datacollect.strategy.Map; |
|||
|
|||
import com.example.datacollect.model.RouteInfo; |
|||
import com.fasterxml.jackson.databind.JsonNode; |
|||
import com.fasterxml.jackson.databind.ObjectMapper; |
|||
import java.io.IOException; |
|||
|
|||
public class AmapPlatform implements MapPlatform { |
|||
private final ObjectMapper mapper = new ObjectMapper(); |
|||
|
|||
@Override |
|||
public String getName() { |
|||
return "高德地图"; |
|||
} |
|||
|
|||
@Override |
|||
public String getBaseUrl() { |
|||
return "https://restapi.amap.com/v3/direction/"; |
|||
} |
|||
|
|||
@Override |
|||
public String getApiKeyParam() { |
|||
return "key"; |
|||
} |
|||
|
|||
@Override |
|||
public RouteInfo parseResponse(String responseBody, String city, String transportType) throws IOException { |
|||
JsonNode root = mapper.readTree(responseBody); |
|||
|
|||
if (!root.get("status").asText().equals("1")) { |
|||
throw new IOException("API错误: " + root.get("info").asText()); |
|||
} |
|||
|
|||
JsonNode path = root.get("route").get("paths").get(0); |
|||
double distance = path.get("distance").asInt() / 1000.0; |
|||
double time = path.get("duration").asInt() / 3600.0; |
|||
|
|||
return new RouteInfo(city, getName(), transportType, distance, time); |
|||
} |
|||
} |
|||
@ -0,0 +1,19 @@ |
|||
package com.example.datacollect.strategy.Map; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
|
|||
public class BusStrategy implements TransportStrategy { |
|||
@Override |
|||
public String getPath() { |
|||
return "transit/integrated"; |
|||
} |
|||
|
|||
@Override |
|||
public Map<String, String> getParams() { |
|||
Map<String, String> params = new HashMap<>(); |
|||
params.put("city", "长沙"); |
|||
params.put("strategy", "0"); |
|||
return params; |
|||
} |
|||
} |
|||
@ -0,0 +1,18 @@ |
|||
package com.example.datacollect.strategy.Map; |
|||
|
|||
import java.util.HashMap; |
|||
import java.util.Map; |
|||
|
|||
public class DrivingStrategy implements TransportStrategy { |
|||
@Override |
|||
public String getPath() { |
|||
return "driving"; |
|||
} |
|||
|
|||
@Override |
|||
public Map<String, String> getParams() { |
|||
Map<String, String> params = new HashMap<>(); |
|||
params.put("strategy", "0"); |
|||
return params; |
|||
} |
|||
} |
|||
@ -0,0 +1,5 @@ |
|||
package com.example.datacollect.strategy.Map; |
|||
|
|||
public class JsonNode { |
|||
|
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.example.datacollect.strategy.Map; |
|||
|
|||
import com.example.datacollect.model.RouteInfo; |
|||
import java.io.IOException; |
|||
|
|||
public interface MapPlatform { |
|||
String getName(); |
|||
String getBaseUrl(); |
|||
String getApiKeyParam(); |
|||
RouteInfo parseResponse(String responseBody, String city, String transportType) throws IOException; |
|||
} |
|||
@ -0,0 +1,9 @@ |
|||
package com.example.datacollect.strategy.Map; |
|||
|
|||
import java.util.Map; |
|||
|
|||
public interface TransportStrategy { |
|||
String getPath(); |
|||
Map<String, String> getParams(); |
|||
} |
|||
|
|||
@ -0,0 +1,26 @@ |
|||
package com.example.datacollect.strategy; |
|||
|
|||
import com.example.datacollect.exception.ParseException; |
|||
import com.example.datacollect.model.Article; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class NewsStrategy implements CrawlStrategy { |
|||
@Override |
|||
public boolean supports(String url) { |
|||
return url.contains("news.example.com"); |
|||
} |
|||
|
|||
@Override |
|||
public List<Article> parse(String url, Document doc) throws ParseException { |
|||
List<Article> articles = new ArrayList<>(); |
|||
Elements items = doc.select(".article-headline"); |
|||
for (Element e : items) { |
|||
articles.add(new Article(e.text(), url, "")); |
|||
} |
|||
return articles; |
|||
} |
|||
} |
|||
@ -0,0 +1,90 @@ |
|||
package com.example.datacollect.strategy; |
|||
|
|||
import com.example.datacollect.exception.ParseException; |
|||
import com.example.datacollect.model.Article; |
|||
import org.slf4j.Logger; |
|||
import org.slf4j.LoggerFactory; |
|||
import java.util.ArrayList; |
|||
import java.util.Comparator; |
|||
import java.util.List; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class StrategyFactory { |
|||
private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class); |
|||
private final List<CrawlStrategy> strategies = new ArrayList<>(); |
|||
private CrawlStrategy defaultStrategy; |
|||
|
|||
public StrategyFactory() { |
|||
strategies.add(new HnuNewsStrategy()); |
|||
strategies.add(new BlogStrategy()); |
|||
strategies.add(new NewsStrategy()); |
|||
this.defaultStrategy = new DefaultStrategy(); |
|||
logger.info("策略工厂初始化,注册了 {} 个策略", strategies.size()); |
|||
} |
|||
|
|||
public CrawlStrategy getStrategy(String url) { |
|||
List<CrawlStrategy> matchingStrategies = new ArrayList<>(); |
|||
for (CrawlStrategy s : strategies) { |
|||
if (s.supports(url)) { |
|||
matchingStrategies.add(s); |
|||
} |
|||
} |
|||
|
|||
if (matchingStrategies.isEmpty()) { |
|||
logger.debug("未找到匹配策略,使用默认策略: {}", url); |
|||
return defaultStrategy; |
|||
} |
|||
|
|||
if (matchingStrategies.size() > 1) { |
|||
matchingStrategies.sort(Comparator.comparingInt(CrawlStrategy::getPriority)); |
|||
logger.debug("找到多个匹配策略,选择优先级最高的: {}", matchingStrategies.get(0).getClass().getSimpleName()); |
|||
} |
|||
|
|||
return matchingStrategies.get(0); |
|||
} |
|||
|
|||
public void register(CrawlStrategy strategy) { |
|||
strategies.add(strategy); |
|||
logger.info("注册新策略: {}", strategy.getClass().getSimpleName()); |
|||
} |
|||
|
|||
public void setDefaultStrategy(CrawlStrategy defaultStrategy) { |
|||
this.defaultStrategy = defaultStrategy; |
|||
logger.info("设置默认策略: {}", defaultStrategy.getClass().getSimpleName()); |
|||
} |
|||
|
|||
public List<CrawlStrategy> getMatchingStrategies(String url) { |
|||
List<CrawlStrategy> matching = new ArrayList<>(); |
|||
for (CrawlStrategy s : strategies) { |
|||
if (s.supports(url)) { |
|||
matching.add(s); |
|||
} |
|||
} |
|||
matching.sort(Comparator.comparingInt(CrawlStrategy::getPriority)); |
|||
return matching; |
|||
} |
|||
|
|||
private static class DefaultStrategy implements CrawlStrategy { |
|||
private static final Pattern TITLE_PATTERN = Pattern.compile("<title[^>]*>([^<]+)</title>", Pattern.CASE_INSENSITIVE); |
|||
|
|||
@Override |
|||
public List<Article> parse(String url, org.jsoup.nodes.Document doc) throws ParseException { |
|||
List<Article> articles = new ArrayList<>(); |
|||
String title = doc.title(); |
|||
if (title != null && !title.isBlank()) { |
|||
articles.add(new Article(title.trim(), url, "")); |
|||
} |
|||
return articles; |
|||
} |
|||
|
|||
@Override |
|||
public boolean supports(String url) { |
|||
return true; |
|||
} |
|||
|
|||
@Override |
|||
public int getPriority() { |
|||
return Integer.MAX_VALUE; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,42 @@ |
|||
package com.example.datacollect.view; |
|||
|
|||
import com.example.datacollect.model.Article; |
|||
import java.util.List; |
|||
import java.util.Scanner; |
|||
|
|||
public class ConsoleView { |
|||
private static final String ANSI_RESET = "\u001B[0m"; |
|||
private static final String ANSI_GREEN = "\u001B[32m"; |
|||
private static final String ANSI_RED = "\u001B[31m"; |
|||
private static final String ANSI_BLUE = "\u001B[34m"; |
|||
|
|||
private final Scanner scanner = new Scanner(System.in); |
|||
|
|||
public String readLine() { |
|||
System.out.print("> "); |
|||
return scanner.nextLine(); |
|||
} |
|||
|
|||
public void printSuccess(String msg) { |
|||
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
|||
} |
|||
|
|||
public void printError(String msg) { |
|||
System.out.println(ANSI_RED + msg + ANSI_RESET); |
|||
} |
|||
|
|||
public void printInfo(String msg) { |
|||
System.out.println(ANSI_BLUE + msg + ANSI_RESET); |
|||
} |
|||
|
|||
public void display(List<Article> articles) { |
|||
if (articles.isEmpty()) { |
|||
printInfo("暂无文章,请先执行 crawl。"); |
|||
return; |
|||
} |
|||
for (int i = 0; i < articles.size(); i++) { |
|||
Article a = articles.get(i); |
|||
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,31 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<configuration> |
|||
<property name="LOG_PATH" value="./logs"/> |
|||
<property name="APP_NAME" value="crawler"/> |
|||
|
|||
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<file>${LOG_PATH}/${APP_NAME}.log</file> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<fileNamePattern>${LOG_PATH}/${APP_NAME}.%d{yyyy-MM-dd}.log</fileNamePattern> |
|||
<maxHistory>30</maxHistory> |
|||
</rollingPolicy> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<logger name="com.example.datacollect" level="DEBUG"/> |
|||
|
|||
<root level="INFO"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
</root> |
|||
</configuration> |
|||
@ -0,0 +1,31 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<configuration> |
|||
<property name="LOG_PATH" value="./logs"/> |
|||
<property name="APP_NAME" value="crawler"/> |
|||
|
|||
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
|||
<file>${LOG_PATH}/${APP_NAME}.log</file> |
|||
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
|||
<fileNamePattern>${LOG_PATH}/${APP_NAME}.%d{yyyy-MM-dd}.log</fileNamePattern> |
|||
<maxHistory>30</maxHistory> |
|||
</rollingPolicy> |
|||
<encoder> |
|||
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
|||
<charset>UTF-8</charset> |
|||
</encoder> |
|||
</appender> |
|||
|
|||
<logger name="com.example.datacollect" level="DEBUG"/> |
|||
|
|||
<root level="INFO"> |
|||
<appender-ref ref="CONSOLE"/> |
|||
<appender-ref ref="FILE"/> |
|||
</root> |
|||
</configuration> |
|||
@ -0,0 +1,3 @@ |
|||
artifactId=W9 |
|||
groupId=com.example |
|||
version=0.1.0 |
|||
@ -0,0 +1,28 @@ |
|||
com\example\datacollect\command\ListCommand.class |
|||
com\example\datacollect\command\CrawlCommand.class |
|||
com\example\datacollect\view\ConsoleView.class |
|||
com\example\datacollect\strategy\NewsStrategy.class |
|||
com\example\datacollect\command\Command.class |
|||
com\example\datacollect\exception\CrawlerException.class |
|||
com\example\datacollect\exception\NetworkException.class |
|||
com\example\datacollect\command\AnalyzeCommand.class |
|||
com\example\datacollect\strategy\Map\TransportStrategy.class |
|||
com\example\datacollect\strategy\CrawlStrategy.class |
|||
com\example\datacollect\model\Article.class |
|||
com\example\datacollect\strategy\BlogStrategy.class |
|||
com\example\datacollect\strategy\StrategyFactory$DefaultStrategy.class |
|||
com\example\datacollect\strategy\Map\JsonNode.class |
|||
com\example\datacollect\repository\ArticleRepository.class |
|||
com\example\datacollect\strategy\Map\BusStrategy.class |
|||
com\example\datacollect\Main.class |
|||
com\example\datacollect\command\ExitCommand.class |
|||
com\example\datacollect\command\HelpCommand.class |
|||
com\example\datacollect\command\HistoryCommand.class |
|||
com\example\datacollect\controller\CrawlerController.class |
|||
com\example\datacollect\strategy\Map\MapPlatform.class |
|||
com\example\datacollect\strategy\Map\DrivingStrategy.class |
|||
com\example\datacollect\strategy\StrategyFactory.class |
|||
com\example\datacollect\strategy\HnuNewsStrategy.class |
|||
com\example\datacollect\exception\ParseException.class |
|||
com\example\datacollect\model\RouteInfo.class |
|||
com\example\datacollect\strategy\Map\AmapPlatform.class |
|||
@ -0,0 +1,27 @@ |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\HistoryCommand.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\AnalyzeCommand.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\exception\NetworkException.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\BusStrategy.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\CrawlCommand.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\BlogStrategy.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\ListCommand.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\DrivingStrategy.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\TransportStrategy.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\JsonNode.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\Main.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\AmapPlatform.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\HelpCommand.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\StrategyFactory.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\model\Article.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\view\ConsoleView.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\exception\CrawlerException.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\model\RouteInfo.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\Map\MapPlatform.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\ExitCommand.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\strategy\NewsStrategy.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\repository\ArticleRepository.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\command\Command.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\exception\ParseException.java |
|||
C:\Users\taro blue\java作业\W11\src\main\java\com\example\datacollect\controller\CrawlerController.java |
|||
Loading…
Reference in new issue