31 changed files with 1209 additions and 0 deletions
Binary file not shown.
@ -0,0 +1,4 @@ |
|||||
|
*.jar |
||||
|
*.jar |
||||
|
*.class |
||||
|
*.log |
||||
@ -0,0 +1,62 @@ |
|||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 https://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
<groupId>com.example</groupId> |
||||
|
<artifactId>datacollect-cli</artifactId> |
||||
|
<version>0.1.0</version> |
||||
|
<properties> |
||||
|
<maven.compiler.source>11</maven.compiler.source> |
||||
|
<maven.compiler.target>11</maven.compiler.target> |
||||
|
</properties> |
||||
|
<dependencies> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>1.17.2</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>org.slf4j</groupId> |
||||
|
<artifactId>slf4j-api</artifactId> |
||||
|
<version>2.0.9</version> |
||||
|
</dependency> |
||||
|
<dependency> |
||||
|
<groupId>ch.qos.logback</groupId> |
||||
|
<artifactId>logback-classic</artifactId> |
||||
|
<version>1.4.14</version> |
||||
|
</dependency> |
||||
|
</dependencies> |
||||
|
<build> |
||||
|
<plugins> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-compiler-plugin</artifactId> |
||||
|
<version>3.8.1</version> |
||||
|
</plugin> |
||||
|
<plugin> |
||||
|
<groupId>org.apache.maven.plugins</groupId> |
||||
|
<artifactId>maven-assembly-plugin</artifactId> |
||||
|
<version>3.3.0</version> |
||||
|
<configuration> |
||||
|
<archive> |
||||
|
<manifest> |
||||
|
<mainClass>com.example.datacollect.Main</mainClass> |
||||
|
</manifest> |
||||
|
</archive> |
||||
|
<descriptorRefs> |
||||
|
<descriptorRef>jar-with-dependencies</descriptorRef> |
||||
|
</descriptorRefs> |
||||
|
</configuration> |
||||
|
<executions> |
||||
|
<execution> |
||||
|
<id>make-assembly</id> |
||||
|
<phase>package</phase> |
||||
|
<goals> |
||||
|
<goal>single</goal> |
||||
|
</goals> |
||||
|
</execution> |
||||
|
</executions> |
||||
|
</plugin> |
||||
|
</plugins> |
||||
|
</build> |
||||
|
</project> |
||||
@ -0,0 +1,41 @@ |
|||||
|
package com.example.datacollect; |
||||
|
|
||||
|
import com.example.datacollect.controller.CrawlerController; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
/*- 添加 logger 成员 |
||||
|
- 记录启动日志 |
||||
|
- 添加全局异常处理 */ |
||||
|
public class Main { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(Main.class); |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
try { |
||||
|
logger.info("Starting CLI Crawler application"); |
||||
|
|
||||
|
ConsoleView view = new ConsoleView(); |
||||
|
ArticleRepository repository = new ArticleRepository(); |
||||
|
StrategyFactory strategyFactory = new StrategyFactory(); |
||||
|
CrawlerController controller = new CrawlerController(view, repository, strategyFactory); |
||||
|
|
||||
|
view.printSuccess("Welcome to CLI Crawler (w10_3)! Type help for commands."); |
||||
|
logger.info("Application initialized successfully"); |
||||
|
|
||||
|
while (true) { |
||||
|
try { |
||||
|
controller.handle(view.readLine()); |
||||
|
} catch (Exception e) { |
||||
|
view.printError("Error: " + e.getMessage()); |
||||
|
logger.error("Error in main loop: {}", e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Fatal error in application: {}", e.getMessage(), e); |
||||
|
System.err.println("Fatal error: " + e.getMessage()); |
||||
|
System.exit(1); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,103 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.exception.NetworkException; |
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.util.RetryUtils; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.List; |
||||
|
import java.util.concurrent.Callable; |
||||
|
|
||||
|
public class AnalyzeCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(AnalyzeCommand.class); |
||||
|
private final ConsoleView view; |
||||
|
private final StrategyFactory strategyFactory; |
||||
|
|
||||
|
public AnalyzeCommand(ConsoleView view, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.strategyFactory = strategyFactory; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "analyze"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: analyze <url>"); |
||||
|
logger.warn("Invalid command: missing URL argument"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
logger.info("Analyze command executed for URL: {}", url); |
||||
|
|
||||
|
try { |
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
if (strategy == null) { |
||||
|
view.printError("No strategy found for: " + url); |
||||
|
logger.error("No strategy found for URL: {}", url); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
Callable<Document> fetchTask = () -> { |
||||
|
logger.debug("Fetching document from: {}", url); |
||||
|
try { |
||||
|
return Jsoup.connect(url) |
||||
|
.userAgent("Mozilla/5.0") |
||||
|
.timeout(5000) |
||||
|
.get(); |
||||
|
} catch (IOException e) { |
||||
|
throw new NetworkException("Failed to connect to " + url + ": " + e.getMessage(), e); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
Document doc = RetryUtils.executeWithRetry(fetchTask); |
||||
|
logger.info("Successfully fetched document from: {}", url); |
||||
|
|
||||
|
List<Article> articles = strategy.parse(url, doc); |
||||
|
logger.info("Parsed {} articles for analysis", articles.size()); |
||||
|
|
||||
|
int total = articles.size(); |
||||
|
int totalTitleLen = 0; |
||||
|
int totalContentLen = 0; |
||||
|
|
||||
|
for (Article a : articles) { |
||||
|
totalTitleLen += a.getTitle() == null ? 0 : a.getTitle().length(); |
||||
|
totalContentLen += a.getContent() == null ? 0 : a.getContent().length(); |
||||
|
} |
||||
|
|
||||
|
view.printInfo("===== 分析统计结果 ====="); |
||||
|
view.printInfo("文章总数:" + total + " 篇"); |
||||
|
view.printInfo("标题总长度:" + totalTitleLen); |
||||
|
view.printInfo("内容总长度:" + totalContentLen); |
||||
|
if (total > 0) { |
||||
|
view.printInfo("平均标题长度:" + (totalTitleLen / total)); |
||||
|
view.printInfo("平均内容长度:" + (totalContentLen / total)); |
||||
|
} |
||||
|
view.printInfo("======================"); |
||||
|
view.printSuccess("分析完成(数据未保存)"); |
||||
|
|
||||
|
logger.info("Analysis completed: {} articles analyzed", total); |
||||
|
} catch (NetworkException e) { |
||||
|
view.printError("Network error: " + e.getMessage()); |
||||
|
logger.error("Network error while analyzing {}: {}", url, e.getMessage(), e); |
||||
|
} catch (ParseException e) { |
||||
|
view.printError("Parse error: " + e.getMessage()); |
||||
|
logger.error("Parse error while analyzing {}: {}", url, e.getMessage(), e); |
||||
|
} catch (Exception e) { |
||||
|
view.printError("分析失败:" + e.getMessage()); |
||||
|
logger.error("Unexpected error while analyzing {}: {}", url, e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,8 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
|
||||
|
public interface Command { |
||||
|
String getName(); |
||||
|
void execute(String[] args, ArticleRepository repository); |
||||
|
} |
||||
@ -0,0 +1,87 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.exception.NetworkException; |
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.CrawlStrategy; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.util.RetryUtils; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
import java.io.IOException; |
||||
|
import java.util.concurrent.Callable; |
||||
|
|
||||
|
public class CrawlCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(CrawlCommand.class); |
||||
|
private final ConsoleView view; |
||||
|
private final StrategyFactory strategyFactory; |
||||
|
|
||||
|
public CrawlCommand(ConsoleView view, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.strategyFactory = strategyFactory; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "crawl"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
if (args.length < 2) { |
||||
|
view.printError("Usage: crawl <url>"); |
||||
|
logger.warn("Invalid command: missing URL argument"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[1]; |
||||
|
logger.info("Crawl started for: {}", url); |
||||
|
|
||||
|
CrawlStrategy strategy = strategyFactory.getStrategy(url); |
||||
|
if (strategy == null) { |
||||
|
view.printError("No strategy found for: " + url); |
||||
|
logger.error("No strategy found for URL: {}", url); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
view.printInfo("Crawling: " + url); |
||||
|
|
||||
|
Callable<Document> fetchTask = () -> { |
||||
|
logger.debug("Fetching document from: {}", url); |
||||
|
try { |
||||
|
return Jsoup.connect(url) |
||||
|
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") |
||||
|
.timeout(10000) |
||||
|
.get(); |
||||
|
} catch (IOException e) { |
||||
|
throw new NetworkException("Failed to connect to " + url + ": " + e.getMessage(), e); |
||||
|
} |
||||
|
}; |
||||
|
|
||||
|
Document doc = RetryUtils.executeWithRetry(fetchTask); |
||||
|
logger.info("Successfully fetched document from: {}", url); |
||||
|
|
||||
|
var articles = strategy.parse(url, doc); |
||||
|
logger.info("Parsed {} articles", articles.size()); |
||||
|
|
||||
|
repository.addAll(articles); |
||||
|
logger.info("Successfully added {} articles to repository", articles.size()); |
||||
|
|
||||
|
view.printSuccess("Crawled " + articles.size() + " articles."); |
||||
|
logger.info("Successfully crawled {} articles from {}", articles.size(), url); |
||||
|
} catch (NetworkException e) { |
||||
|
view.printError("Network error: " + e.getMessage()); |
||||
|
logger.error("Network error while crawling {}: {}", url, e.getMessage(), e); |
||||
|
} catch (ParseException e) { |
||||
|
view.printError("Parse error: " + e.getMessage()); |
||||
|
logger.error("Parse error while crawling {}: {}", url, e.getMessage(), e); |
||||
|
} catch (Exception e) { |
||||
|
view.printError("Failed to crawl: " + e.getMessage()); |
||||
|
logger.error("Unexpected error while crawling {}: {}", url, e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class ExitCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ExitCommand.class); |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ExitCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
logger.info("Exit command executed, shutting down"); |
||||
|
view.printSuccess("Bye!"); |
||||
|
System.exit(0);/*退出程序 */ |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,26 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class HelpCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(HelpCommand.class); |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public HelpCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
logger.info("Help command executed"); |
||||
|
view.printInfo("Commands: crawl <url>, list, help, exit, analyze"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,26 @@ |
|||||
|
package com.example.datacollect.command; |
||||
|
|
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
|
||||
|
public class ListCommand implements Command { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ListCommand.class); |
||||
|
private final ConsoleView view; |
||||
|
|
||||
|
public ListCommand(ConsoleView view) { |
||||
|
this.view = view; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "list"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, ArticleRepository repository) { |
||||
|
logger.info("List command executed, showing {} articles", repository.size()); |
||||
|
view.display(repository.getAll()); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,64 @@ |
|||||
|
package com.example.datacollect.controller; |
||||
|
|
||||
|
import com.example.datacollect.command.AnalyzeCommand; |
||||
|
import com.example.datacollect.command.Command; |
||||
|
import com.example.datacollect.command.CrawlCommand; |
||||
|
import com.example.datacollect.command.ExitCommand; |
||||
|
import com.example.datacollect.command.HelpCommand; |
||||
|
import com.example.datacollect.command.ListCommand; |
||||
|
import com.example.datacollect.repository.ArticleRepository; |
||||
|
import com.example.datacollect.strategy.StrategyFactory; |
||||
|
import com.example.datacollect.view.ConsoleView; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CrawlerController { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(CrawlerController.class); |
||||
|
private final Map<String, Command> commands = new HashMap<>(); |
||||
|
private final ConsoleView view; |
||||
|
private final ArticleRepository repository; |
||||
|
|
||||
|
public CrawlerController(ConsoleView view, ArticleRepository repository, StrategyFactory strategyFactory) { |
||||
|
this.view = view; |
||||
|
this.repository = repository; |
||||
|
register(new HelpCommand(view)); |
||||
|
register(new ListCommand(view)); |
||||
|
register(new CrawlCommand(view, strategyFactory)); |
||||
|
register(new ExitCommand(view)); |
||||
|
register(new AnalyzeCommand(view, strategyFactory)); |
||||
|
logger.info("CrawlerController initialized with {} commands", commands.size()); |
||||
|
} |
||||
|
|
||||
|
private void register(Command command) { |
||||
|
commands.put(command.getName(), command); |
||||
|
logger.debug("Registered command: {}", command.getName()); |
||||
|
} |
||||
|
|
||||
|
public void handle(String input) {/* 处理用户输入 */ |
||||
|
String text = input == null ? "" : input.trim();/* 处理空输入 */ |
||||
|
if (text.isEmpty()) { |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
String[] args = text.split("\\s+");/* 解析命令行参数 */ |
||||
|
String cmdName = args[0].toLowerCase();/* 提取命令名称并转换为小写 */ |
||||
|
|
||||
|
logger.debug("Processing command: {}", cmdName); |
||||
|
|
||||
|
Command command = commands.get(cmdName);/* 获取命令对象 */ |
||||
|
if (command == null) { |
||||
|
view.printError("Unknown command: " + cmdName); |
||||
|
logger.warn("Unknown command attempted: {}", cmdName); |
||||
|
return; |
||||
|
} |
||||
|
|
||||
|
try { |
||||
|
command.execute(args, repository);/* 执行命令 */ |
||||
|
} catch (Exception e) { |
||||
|
view.printError("Command execution failed: " + e.getMessage()); |
||||
|
logger.error("Error executing command {}: {}", cmdName, e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,10 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class CrawlerException extends Exception { |
||||
|
public CrawlerException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
public CrawlerException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,10 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class NetworkException extends CrawlerException { |
||||
|
public NetworkException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
public NetworkException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,10 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class ParseException extends CrawlerException { |
||||
|
public ParseException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
public ParseException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,10 @@ |
|||||
|
package com.example.datacollect.exception; |
||||
|
|
||||
|
public class UrlFormatException extends RuntimeException { |
||||
|
public UrlFormatException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
public UrlFormatException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,72 @@ |
|||||
|
package com.example.datacollect.model; |
||||
|
/*- 文章模型类 |
||||
|
- 添加字段验证 |
||||
|
- 添加 toString() 方法(已有) |
||||
|
- 考虑添加 equals() 和 hashCode() */ |
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
|
||||
|
public Article(String title, String url, String content) { |
||||
|
setTitle(title); |
||||
|
setUrl(url); |
||||
|
setContent(content); |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public void setTitle(String title) { |
||||
|
if (title == null) { |
||||
|
throw new IllegalArgumentException("Title cannot be null"); |
||||
|
} |
||||
|
if (title.trim().isEmpty()) { |
||||
|
throw new IllegalArgumentException("Title cannot be empty"); |
||||
|
} |
||||
|
if (title.length() > 500) { |
||||
|
throw new IllegalArgumentException("Title cannot exceed 500 characters"); |
||||
|
} |
||||
|
this.title = title.trim(); |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
public void setUrl(String url) { |
||||
|
if (url == null) { |
||||
|
throw new IllegalArgumentException("URL cannot be null"); |
||||
|
} |
||||
|
if (url.trim().isEmpty()) { |
||||
|
throw new IllegalArgumentException("URL cannot be empty"); |
||||
|
} |
||||
|
if (!url.startsWith("http://") && !url.startsWith("https://")) { |
||||
|
throw new IllegalArgumentException("URL must start with http:// or https://"); |
||||
|
} |
||||
|
this.url = url.trim(); |
||||
|
} |
||||
|
|
||||
|
public String getContent() { |
||||
|
return content; |
||||
|
} |
||||
|
|
||||
|
public void setContent(String content) { |
||||
|
if (content == null) { |
||||
|
this.content = ""; |
||||
|
} else if (content.length() > 10000) { |
||||
|
this.content = content.substring(0, 10000);/* 截断内容到 10000 个字符 */ |
||||
|
} else { |
||||
|
this.content = content; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "Article{" |
||||
|
+ "title='" + title + '\'' |
||||
|
+ ", url='" + url + '\'' |
||||
|
+ '}'; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,113 @@ |
|||||
|
package com.example.datacollect.repository; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.Collections; |
||||
|
import java.util.HashSet; |
||||
|
import java.util.List; |
||||
|
import java.util.Set; |
||||
|
/* 文章仓库 |
||||
|
- 添加 logger 成员 |
||||
|
- 增强 add() 方法的防御检查 |
||||
|
- 增强 addALL() 方法的防御检查 |
||||
|
- 添加空值检查、重复检查、长度验证 |
||||
|
- 记录操作日志*/ |
||||
|
public class ArticleRepository { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ArticleRepository.class); |
||||
|
private static final int MAX_TITLE_LENGTH = 500;/* 最大标题长度 */ |
||||
|
private static final int MAX_CONTENT_LENGTH = 10000;/* 最大内容长度 */ |
||||
|
|
||||
|
private final List<Article> articles = new ArrayList<>(); |
||||
|
private final Set<String> urlSet = new HashSet<>(); |
||||
|
|
||||
|
public void add(Article article) { |
||||
|
if (article == null) { |
||||
|
logger.error("Attempted to add null article"); |
||||
|
throw new IllegalArgumentException("Article cannot be null"); |
||||
|
} |
||||
|
|
||||
|
String title = article.getTitle(); |
||||
|
String url = article.getUrl(); |
||||
|
String content = article.getContent(); |
||||
|
|
||||
|
if (title == null || title.trim().isEmpty()) { |
||||
|
logger.warn("Attempted to add article with empty title"); |
||||
|
throw new IllegalArgumentException("Article title cannot be null or empty"); |
||||
|
} |
||||
|
|
||||
|
if (url == null || url.trim().isEmpty()) { |
||||
|
logger.warn("Attempted to add article with empty URL"); |
||||
|
throw new IllegalArgumentException("Article URL cannot be null or empty"); |
||||
|
} |
||||
|
|
||||
|
if (title.length() > MAX_TITLE_LENGTH) { |
||||
|
logger.warn("Article title too long: {} characters (max: {})", title.length(), MAX_TITLE_LENGTH); |
||||
|
throw new IllegalArgumentException("Article title exceeds maximum length of " + MAX_TITLE_LENGTH); |
||||
|
} |
||||
|
|
||||
|
if (content != null && content.length() > MAX_CONTENT_LENGTH) { |
||||
|
logger.warn("Article content too long: {} characters (max: {})", content.length(), MAX_CONTENT_LENGTH); |
||||
|
content = content.substring(0, MAX_CONTENT_LENGTH); |
||||
|
} |
||||
|
|
||||
|
if (!url.startsWith("http://") && !url.startsWith("https://")) { |
||||
|
logger.warn("Invalid URL format: {}", url); |
||||
|
throw new IllegalArgumentException("Article URL must start with http:// or https://"); |
||||
|
} |
||||
|
|
||||
|
if (urlSet.contains(url)) { |
||||
|
logger.warn("Duplicate article URL detected: {}", url); |
||||
|
return;/* 跳过重复文章 */ |
||||
|
} |
||||
|
|
||||
|
Article validatedArticle = new Article(title.trim(), url.trim(), content != null ? content.trim() : "");/* 创建验证后的文章 */ |
||||
|
articles.add(validatedArticle);/* 添加文章到列表 */ |
||||
|
urlSet.add(url);/* 添加URL到集合 */ |
||||
|
logger.debug("Added article: {}", title);/* 记录添加日志 */ |
||||
|
} |
||||
|
|
||||
|
public void addAll(List<Article> articleList) { |
||||
|
if (articleList == null) { |
||||
|
logger.error("Attempted to add null article list"); |
||||
|
throw new IllegalArgumentException("Article list cannot be null"); |
||||
|
} |
||||
|
|
||||
|
int successCount = 0;/* 成功添加的文章数量 */ |
||||
|
int skipCount = 0;/* 跳过的无效文章数量 */ |
||||
|
|
||||
|
for (Article article : articleList) { |
||||
|
if (article != null) { |
||||
|
try { |
||||
|
add(article); |
||||
|
successCount++; |
||||
|
} catch (IllegalArgumentException e) { |
||||
|
logger.warn("Skipped invalid article: {}", e.getMessage()); |
||||
|
skipCount++; |
||||
|
} |
||||
|
} else { |
||||
|
logger.warn("Skipped null article in list"); |
||||
|
skipCount++; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
logger.info("Added {} articles, skipped {} invalid articles", successCount, skipCount); |
||||
|
} |
||||
|
|
||||
|
public List<Article> getAll() { |
||||
|
logger.debug("Retrieving all articles, total: {}", articles.size()); |
||||
|
return Collections.unmodifiableList(articles);/* 返回不可修改的列表 */ |
||||
|
} |
||||
|
|
||||
|
public int size() { |
||||
|
return articles.size();/* 返回文章数量 */ |
||||
|
} |
||||
|
|
||||
|
public void clear() { |
||||
|
int count = articles.size();/* 记录当前文章数量 */ |
||||
|
articles.clear(); |
||||
|
urlSet.clear(); |
||||
|
logger.info("Cleared repository, removed {} articles", count); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class BlogStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("blog.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements titles = doc.select(".post-title"); |
||||
|
for (Element e : titles) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,11 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public interface CrawlStrategy { |
||||
|
List<Article> parse(String url, Document doc) throws ParseException; |
||||
|
boolean supports(String url); |
||||
|
} |
||||
@ -0,0 +1,77 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
/* HNU News 策略 |
||||
|
- 添加 logger 成员 |
||||
|
- 添加异常处理 |
||||
|
- 实现防御性编程 */ |
||||
|
public class HnuNewsStrategy implements CrawlStrategy { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(HnuNewsStrategy.class); |
||||
|
|
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("news.hnu.edu.cn");/* 支持 HNU News 网站 */ |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) throws ParseException { |
||||
|
logger.info("Starting to parse HNU News: {}", url); |
||||
|
List<Article> articles = new ArrayList<>();/* 存储储解析后的文章 */ |
||||
|
|
||||
|
try { |
||||
|
Elements listItems = doc.select("ul.list11 li");/* 选择文章列表项 */ |
||||
|
logger.debug("Found {} list items", listItems.size());/* 记录找到的列表项数量 */ |
||||
|
|
||||
|
for (Element li : listItems) { |
||||
|
try { |
||||
|
Element link = li.selectFirst("a");/* 选择列表项中的链接 */ |
||||
|
if (link == null) { |
||||
|
logger.warn("No link found in list item");/* 记录未找到链接 */ |
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
String articleUrl = link.attr("href");/* 获取链接的 href 属性值 */ |
||||
|
if (!articleUrl.startsWith("http")) { |
||||
|
articleUrl = "https://news.hnu.edu.cn" + articleUrl.replace("..", "");/* 补全相对路径 */ |
||||
|
} |
||||
|
|
||||
|
String title = "";/* 存储文章标题 */ |
||||
|
Element titleEl = link.selectFirst("h4.l2.h4s2");/* 选择标题元素 */ |
||||
|
if (titleEl != null) { |
||||
|
title = titleEl.text().trim();/* 提取标题文本并移除首尾空格 */ |
||||
|
} |
||||
|
|
||||
|
String content = "";/* 存储文章内容 */ |
||||
|
Element contentEl = link.selectFirst("p.l3.ps3");/* 选择内容元素 */ |
||||
|
if (contentEl != null) { |
||||
|
content = contentEl.text().trim();/* 提取内容文本并移除首尾空格 */ |
||||
|
} |
||||
|
|
||||
|
if (!title.isEmpty()) { |
||||
|
Article article = new Article(title, articleUrl, content);/* 创建文章对象 */ |
||||
|
articles.add(article);/* 将文章添加到列表 */ |
||||
|
} else { |
||||
|
logger.warn("Empty title found, skipping article"); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Error parsing individual article: {}", e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
logger.info("Successfully parsed {} articles from HNU News", articles.size()); |
||||
|
return articles; |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Failed to parse HNU News page: {}", e.getMessage(), e); |
||||
|
throw new ParseException("Failed to parse HNU News: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,25 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class NewsStrategy implements CrawlStrategy { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("news.example.com"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) { |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
Elements items = doc.select(".article-headline"); |
||||
|
for (Element e : items) { |
||||
|
articles.add(new Article(e.text(), url, "")); |
||||
|
} |
||||
|
return articles; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,83 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
/* 人民网策略类 */ |
||||
|
public class PeopleStrategy implements CrawlStrategy { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(PeopleStrategy.class); |
||||
|
|
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("people.com.cn");/* 检查URL是否包含people.com.cn */ |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) throws ParseException { |
||||
|
logger.info("Starting to parse People's Daily News: {}", url); |
||||
|
List<Article> articles = new ArrayList<>();/* 初始化文章列表 */ |
||||
|
|
||||
|
try { |
||||
|
Elements newsItems = doc.select("div.w1000, div.news-item, li.list_item");/* 选择新闻容器 */ |
||||
|
logger.debug("Found {} news containers", newsItems.size()); |
||||
|
|
||||
|
if (newsItems.isEmpty()) { |
||||
|
newsItems = doc.select("a[href*='/n1/']");/* 选择替代选择器 */ |
||||
|
logger.debug("Trying alternative selector, found {} items", newsItems.size()); |
||||
|
} |
||||
|
|
||||
|
for (Element item : newsItems) { |
||||
|
try { |
||||
|
Element link = item.selectFirst("a");/* 选择链接元素 */ |
||||
|
if (link == null) { |
||||
|
link = item.tagName().equals("a") ? item : null;/* 检查是否为链接元素 */ |
||||
|
} |
||||
|
|
||||
|
if (link == null) { |
||||
|
logger.warn("No link found in news item"); |
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
String articleUrl = link.attr("href");/* 获取链接URL */ |
||||
|
if (!articleUrl.startsWith("http")) {/* 检查是否为绝对URL */ |
||||
|
if (articleUrl.startsWith("/")) { |
||||
|
articleUrl = "https://www.people.com.cn" + articleUrl; |
||||
|
} else { |
||||
|
articleUrl = "https://www.people.com.cn/" + articleUrl; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
String title = link.text().trim();/* 获取标题文本 */ |
||||
|
|
||||
|
String content = "";/* 初始化内容文本 */ |
||||
|
Element contentEl = item.selectFirst("p, div.ed, div.summary");/* 选择内容元素 */ |
||||
|
if (contentEl != null) { |
||||
|
content = contentEl.text().trim();/* 获取内容文本 */ |
||||
|
} |
||||
|
|
||||
|
if (!title.isEmpty() && title.length() > 5) { |
||||
|
Article article = new Article(title, articleUrl, content);/* 创建文章对象 */ |
||||
|
articles.add(article);/* 添加文章到列表 */ |
||||
|
logger.debug("Parsed article: {}", title);/* 记录解析文章 */ |
||||
|
} else { |
||||
|
logger.warn("Invalid title found, skipping article");/* 记录无效标题 */ |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Error parsing individual article: {}", e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
logger.info("Successfully parsed {} articles from People's Daily News", articles.size()); |
||||
|
return articles; |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Failed to parse People's Daily News page: {}", e.getMessage(), e); |
||||
|
throw new ParseException("Failed to parse People's Daily News: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,36 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class StrategyFactory { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(StrategyFactory.class); |
||||
|
private final List<CrawlStrategy> strategies = new ArrayList<>(); |
||||
|
|
||||
|
public StrategyFactory() { |
||||
|
strategies.add(new HnuNewsStrategy()); |
||||
|
strategies.add(new YouthStrategy()); |
||||
|
strategies.add(new PeopleStrategy()); |
||||
|
strategies.add(new BlogStrategy()); |
||||
|
strategies.add(new NewsStrategy()); |
||||
|
logger.info("Initialized StrategyFactory with {} strategies", strategies.size()); |
||||
|
} |
||||
|
|
||||
|
public CrawlStrategy getStrategy(String url) { |
||||
|
for (CrawlStrategy s : strategies) { |
||||
|
if (s.supports(url)) { |
||||
|
logger.debug("Found strategy {} for URL: {}", s.getClass().getSimpleName(), url); |
||||
|
return s; |
||||
|
} |
||||
|
} |
||||
|
logger.warn("No strategy found for URL: {}", url); |
||||
|
return null; |
||||
|
} |
||||
|
|
||||
|
public void register(CrawlStrategy strategy) { |
||||
|
strategies.add(strategy); |
||||
|
logger.info("Registered new strategy: {}", strategy.getClass().getSimpleName()); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,87 @@ |
|||||
|
package com.example.datacollect.strategy; |
||||
|
|
||||
|
import com.example.datacollect.exception.ParseException; |
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
/* 青年网新闻解析策略*/ |
||||
|
public class YouthStrategy implements CrawlStrategy { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(YouthStrategy.class); |
||||
|
|
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url.contains("youth.cn");/* 检查URL是否包含青年网域名 */ |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Article> parse(String url, Document doc) throws ParseException { |
||||
|
logger.info("Starting to parse Youth News: {}", url); |
||||
|
List<Article> articles = new ArrayList<>(); |
||||
|
|
||||
|
try { |
||||
|
Elements newsItems = doc.select("div.news-item, div.article-item, li.news-list-item");/* 选择新闻项元素 */ |
||||
|
logger.debug("Found {} news items", newsItems.size()); |
||||
|
|
||||
|
if (newsItems.isEmpty()) { |
||||
|
newsItems = doc.select("a[href*='/n1/']");/* 选择替代选择器 */ |
||||
|
logger.debug("Trying alternative selector, found {} items", newsItems.size()); |
||||
|
} |
||||
|
|
||||
|
for (Element item : newsItems) { |
||||
|
try { |
||||
|
Element link = item.selectFirst("a");/* 选择链接元素 */ |
||||
|
if (link == null) { |
||||
|
link = item.tagName().equals("a") ? item : null;/* 检查是否为链接元素 */ |
||||
|
} |
||||
|
|
||||
|
if (link == null) { |
||||
|
logger.warn("No link found in news item"); |
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
String articleUrl = link.attr("href");/* 获取链接URL */ |
||||
|
|
||||
|
if (!articleUrl.startsWith("http")) {/* 检查URL是否为绝对URL */ |
||||
|
if (articleUrl.startsWith("/")) { |
||||
|
articleUrl = "https://www.youth.cn" + articleUrl; |
||||
|
} else { |
||||
|
articleUrl = "https://www.youth.cn/" + articleUrl; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
String title = link.text().trim();/* 获取链接文本 */ |
||||
|
if (title.isEmpty()) {/* 检查标题是否为空 */ |
||||
|
continue; |
||||
|
} |
||||
|
|
||||
|
String content = "";/* 初始化内容为空字符串 */ |
||||
|
Element contentEl = item.selectFirst("p.summary, p.desc, div.brief");/* 选择摘要元素 */ |
||||
|
if (contentEl != null) { |
||||
|
content = contentEl.text().trim();/* 获取摘要文本 */ |
||||
|
} |
||||
|
|
||||
|
if (!title.isEmpty() && title.length() > 5) { |
||||
|
Article article = new Article(title, articleUrl, content); |
||||
|
articles.add(article); |
||||
|
logger.debug("Parsed article: {}", title); |
||||
|
} else { |
||||
|
logger.warn("Invalid title found, skipping article"); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Error parsing individual article: {}", e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
logger.info("Successfully parsed {} articles from Youth News", articles.size()); |
||||
|
return articles; |
||||
|
} catch (Exception e) { |
||||
|
logger.error("Failed to parse Youth News page: {}", e.getMessage(), e); |
||||
|
throw new ParseException("Failed to parse Youth News: " + e.getMessage(), e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,51 @@ |
|||||
|
package com.example.datacollect.util; |
||||
|
|
||||
|
import com.example.datacollect.exception.NetworkException; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.concurrent.Callable; |
||||
|
|
||||
|
public class RetryUtils { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(RetryUtils.class); |
||||
|
|
||||
|
private static final int DEFAULT_MAX_RETRIES = 3; |
||||
|
private static final long DEFAULT_RETRY_BASE_DELAY_MS = 500; |
||||
|
|
||||
|
public static <T> T executeWithRetry(Callable<T> task) throws Exception { |
||||
|
return executeWithRetry(task, DEFAULT_MAX_RETRIES, DEFAULT_RETRY_BASE_DELAY_MS); |
||||
|
} |
||||
|
|
||||
|
public static <T> T executeWithRetry(Callable<T> task, int maxRetries, long baseDelayMs) throws Exception { |
||||
|
Exception lastException = null; |
||||
|
|
||||
|
for (int attempt = 0; attempt <= maxRetries; attempt++) { |
||||
|
try { |
||||
|
if (attempt > 0) { |
||||
|
long waitTime = (long) (baseDelayMs * Math.pow(2, attempt - 1)); |
||||
|
logger.info("Retry attempt {}/{} for task, waiting {} ms", attempt, maxRetries, waitTime); |
||||
|
Thread.sleep(waitTime); |
||||
|
} |
||||
|
|
||||
|
return task.call(); |
||||
|
} catch (Exception e) { |
||||
|
lastException = e; |
||||
|
|
||||
|
if (e instanceof NetworkException) { |
||||
|
logger.warn("Network error on attempt {}: {}", attempt, e.getMessage()); |
||||
|
|
||||
|
if (attempt < maxRetries) { |
||||
|
long nextWaitTime = (long) (baseDelayMs * Math.pow(2, attempt)); |
||||
|
logger.info("Will retry in {} ms...", nextWaitTime); |
||||
|
continue; |
||||
|
} |
||||
|
} else { |
||||
|
logger.error("Non-retryable error: {}", e.getMessage()); |
||||
|
throw e; |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
logger.error("All {} retry attempts failed", maxRetries + 1); |
||||
|
throw lastException; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,46 @@ |
|||||
|
package com.example.datacollect.view; |
||||
|
|
||||
|
import com.example.datacollect.model.Article; |
||||
|
import org.slf4j.Logger; |
||||
|
import org.slf4j.LoggerFactory; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class ConsoleView { |
||||
|
private static final Logger logger = LoggerFactory.getLogger(ConsoleView.class); |
||||
|
private static final String ANSI_RESET = "\u001B[0m"; |
||||
|
private static final String ANSI_GREEN = "\u001B[32m"; |
||||
|
private static final String ANSI_RED = "\u001B[31m"; |
||||
|
private static final String ANSI_BLUE = "\u001B[34m"; |
||||
|
|
||||
|
private final Scanner scanner = new Scanner(System.in); |
||||
|
|
||||
|
public String readLine() { |
||||
|
System.out.print("> "); |
||||
|
String input = scanner.nextLine(); |
||||
|
return input;/* 返回用户输入 */ |
||||
|
} |
||||
|
|
||||
|
public void printSuccess(String msg) { |
||||
|
System.out.println(ANSI_GREEN + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printError(String msg) { |
||||
|
System.out.println(ANSI_RED + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void printInfo(String msg) { |
||||
|
System.out.println(ANSI_BLUE + msg + ANSI_RESET); |
||||
|
} |
||||
|
|
||||
|
public void display(List<Article> articles) { |
||||
|
if (articles.isEmpty()) { |
||||
|
printInfo("暂无文章,请先执行 crawl。"); |
||||
|
return; |
||||
|
} |
||||
|
for (int i = 0; i < articles.size(); i++) { |
||||
|
Article a = articles.get(i); |
||||
|
System.out.println((i + 1) + ". " + a.getTitle() + " | " + a.getUrl()); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,24 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<configuration> |
||||
|
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<file>logs/crawler.log</file> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<fileNamePattern>logs/crawler.%d{yyyy-MM-dd}.log</fileNamePattern> |
||||
|
<maxHistory>30</maxHistory> |
||||
|
</rollingPolicy> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<root level="INFO"> |
||||
|
<appender-ref ref="CONSOLE" /> |
||||
|
<appender-ref ref="FILE" /> |
||||
|
</root> |
||||
|
</configuration> |
||||
@ -0,0 +1,24 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<configuration> |
||||
|
<appender name="CONSOLE" class="ch.qos.logback.core.ConsoleAppender"> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<appender name="FILE" class="ch.qos.logback.core.rolling.RollingFileAppender"> |
||||
|
<file>logs/crawler.log</file> |
||||
|
<rollingPolicy class="ch.qos.logback.core.rolling.TimeBasedRollingPolicy"> |
||||
|
<fileNamePattern>logs/crawler.%d{yyyy-MM-dd}.log</fileNamePattern> |
||||
|
<maxHistory>30</maxHistory> |
||||
|
</rollingPolicy> |
||||
|
<encoder> |
||||
|
<pattern>%d{yyyy-MM-dd HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern> |
||||
|
</encoder> |
||||
|
</appender> |
||||
|
|
||||
|
<root level="INFO"> |
||||
|
<appender-ref ref="CONSOLE" /> |
||||
|
<appender-ref ref="FILE" /> |
||||
|
</root> |
||||
|
</configuration> |
||||
@ -0,0 +1,3 @@ |
|||||
|
artifactId=datacollect-cli |
||||
|
groupId=com.example |
||||
|
version=0.1.0 |
||||
@ -0,0 +1,22 @@ |
|||||
|
com\example\datacollect\command\ListCommand.class |
||||
|
com\example\datacollect\strategy\PeopleStrategy.class |
||||
|
com\example\datacollect\command\CrawlCommand.class |
||||
|
com\example\datacollect\strategy\BlogStrategy.class |
||||
|
com\example\datacollect\repository\ArticleRepository.class |
||||
|
com\example\datacollect\Main.class |
||||
|
com\example\datacollect\view\ConsoleView.class |
||||
|
com\example\datacollect\command\ExitCommand.class |
||||
|
com\example\datacollect\command\HelpCommand.class |
||||
|
com\example\datacollect\util\RetryUtils.class |
||||
|
com\example\datacollect\strategy\NewsStrategy.class |
||||
|
com\example\datacollect\command\Command.class |
||||
|
com\example\datacollect\controller\CrawlerController.class |
||||
|
com\example\datacollect\exception\CrawlerException.class |
||||
|
com\example\datacollect\exception\NetworkException.class |
||||
|
com\example\datacollect\command\AnalyzeCommand.class |
||||
|
com\example\datacollect\strategy\StrategyFactory.class |
||||
|
com\example\datacollect\strategy\HnuNewsStrategy.class |
||||
|
com\example\datacollect\strategy\YouthStrategy.class |
||||
|
com\example\datacollect\exception\ParseException.class |
||||
|
com\example\datacollect\strategy\CrawlStrategy.class |
||||
|
com\example\datacollect\model\Article.class |
||||
@ -0,0 +1,22 @@ |
|||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\NewsStrategy.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\controller\CrawlerController.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\repository\ArticleRepository.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\HnuNewsStrategy.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\ExitCommand.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\Command.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\Main.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\CrawlCommand.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\exception\NetworkException.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\StrategyFactory.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\BlogStrategy.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\util\RetryUtils.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\HelpCommand.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\exception\CrawlerException.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\exception\ParseException.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\model\Article.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\view\ConsoleView.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\AnalyzeCommand.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\YouthStrategy.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\command\ListCommand.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\CrawlStrategy.java |
||||
|
C:\Users\27687\Desktop\java-cli\src\main\java\com\example\datacollect\strategy\PeopleStrategy.java |
||||
Loading…
Reference in new issue