14 changed files with 326 additions and 0 deletions
@ -0,0 +1,39 @@ |
|||||
|
import java.util.List; |
||||
|
import java.util.logging.Logger; |
||||
|
|
||||
|
public class AnalyzeCommand11 implements Command11 { |
||||
|
private static final Logger logger = Logger.getLogger(AnalyzeCommand11.class.getName()); |
||||
|
private final StrategyFactory11 factory; |
||||
|
|
||||
|
public AnalyzeCommand11(StrategyFactory11 factory) { |
||||
|
this.factory = factory; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "analyze"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article11> articles) { |
||||
|
if (args == null || args.length == 0) { |
||||
|
logger.warning("analyze 命令未传入URL参数"); |
||||
|
System.out.println("用法:analyze 网址"); |
||||
|
return; |
||||
|
} |
||||
|
String url = args[0]; |
||||
|
logger.info("开始分析链接:" + url); |
||||
|
CrawlStrategy11 strategy = factory.getMatchStrategy(url); |
||||
|
|
||||
|
System.out.println("===== 链接分析结果 ====="); |
||||
|
if (strategy != null) { |
||||
|
logger.info("链接 " + url + " 匹配到对应爬取策略"); |
||||
|
System.out.println("链接:" + url); |
||||
|
System.out.println("状态:支持解析 ✅"); |
||||
|
} else { |
||||
|
logger.warning("链接 " + url + " 未匹配到任何策略"); |
||||
|
System.out.println("链接:" + url); |
||||
|
System.out.println("状态:不支持解析 ❌"); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
public class Article11 { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
private String content; |
||||
|
private String author; |
||||
|
private String publishDate; |
||||
|
|
||||
|
public Article11(String title, String url, String content) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
} |
||||
|
|
||||
|
public Article11(String title, String url, String content, String author, String publishDate) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
this.content = content; |
||||
|
this.author = author; |
||||
|
this.publishDate = publishDate; |
||||
|
} |
||||
|
|
||||
|
public String getTitle() { return title; } |
||||
|
public String getUrl() { return url; } |
||||
|
public String getContent() { return content; } |
||||
|
public String getAuthor() { return author; } |
||||
|
public String getPublishDate() { return publishDate; } |
||||
|
} |
||||
@ -0,0 +1,48 @@ |
|||||
|
import java.util.ArrayList; |
||||
|
import java.util.Collections; |
||||
|
import java.util.List; |
||||
|
import java.util.logging.Logger; |
||||
|
|
||||
|
public class ArticleRepository11 { |
||||
|
private static final Logger logger = Logger.getLogger(ArticleRepository11.class.getName()); |
||||
|
private final List<Article11> articles = new ArrayList<>(); |
||||
|
|
||||
|
public void add(Article11 article) { |
||||
|
if (article == null) { |
||||
|
logger.severe("添加失败:文章对象为 null"); |
||||
|
throw new IllegalArgumentException("文章对象不能为空"); |
||||
|
} |
||||
|
articles.add(article); |
||||
|
logger.info("成功添加文章:" + article.getTitle()); |
||||
|
} |
||||
|
|
||||
|
public void addAll(List<Article11> newArticles) { |
||||
|
if (newArticles == null) { |
||||
|
logger.severe("批量添加失败:传入列表为 null"); |
||||
|
throw new IllegalArgumentException("文章列表不能为空"); |
||||
|
} |
||||
|
int successCount = 0; |
||||
|
for (Article11 article : newArticles) { |
||||
|
if (article == null) { |
||||
|
logger.warning("列表中存在空对象,已跳过"); |
||||
|
continue; |
||||
|
} |
||||
|
articles.add(article); |
||||
|
successCount++; |
||||
|
} |
||||
|
logger.info("批量添加完成,共成功添加 " + successCount + " 篇文章"); |
||||
|
} |
||||
|
|
||||
|
public List<Article11> getAll() { |
||||
|
return Collections.unmodifiableList(articles); |
||||
|
} |
||||
|
|
||||
|
public int size() { |
||||
|
return articles.size(); |
||||
|
} |
||||
|
|
||||
|
public void clear() { |
||||
|
logger.info("清空文章集合,原有数量:" + articles.size()); |
||||
|
articles.clear(); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,6 @@ |
|||||
|
import java.util.List; |
||||
|
|
||||
|
public interface Command11 { |
||||
|
String getName(); |
||||
|
void execute(String[] args, List<Article11> articles); |
||||
|
} |
||||
@ -0,0 +1,14 @@ |
|||||
|
import java.util.HashMap; |
||||
|
import java.util.Map; |
||||
|
|
||||
|
public class CommandRegistry11 { |
||||
|
private final Map<String, Command11> commandMap = new HashMap<>(); |
||||
|
|
||||
|
public void registerCommand(Command11 command) { |
||||
|
commandMap.put(command.getName(), command); |
||||
|
} |
||||
|
|
||||
|
public Command11 getCommand(String name) { |
||||
|
return commandMap.get(name); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,4 @@ |
|||||
|
public interface CrawlStrategy11 { |
||||
|
boolean supports(String url); |
||||
|
Article11 crawl(String url) throws ParseException11, NetworkException11; |
||||
|
} |
||||
@ -0,0 +1,6 @@ |
|||||
|
public class CrawlerException11 extends Exception { |
||||
|
public CrawlerException11() { super(); } |
||||
|
public CrawlerException11(String message) { super(message); } |
||||
|
public CrawlerException11(String message, Throwable cause) { super(message, cause); } |
||||
|
public CrawlerException11(Throwable cause) { super(cause); } |
||||
|
} |
||||
@ -0,0 +1,18 @@ |
|||||
|
import java.util.List; |
||||
|
import java.util.logging.Logger; |
||||
|
|
||||
|
public class ExitCommand11 implements Command11 { |
||||
|
private static final Logger logger = Logger.getLogger(ExitCommand11.class.getName()); |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "exit"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article11> articles) { |
||||
|
logger.info("用户执行 exit 命令,程序即将退出"); |
||||
|
System.out.println("程序已安全退出!"); |
||||
|
System.exit(0); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,21 @@ |
|||||
|
import java.util.List; |
||||
|
import java.util.logging.Logger; |
||||
|
|
||||
|
public class HelpCommand11 implements Command11 { |
||||
|
private static final Logger logger = Logger.getLogger(HelpCommand11.class.getName()); |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "help"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article11> articles) { |
||||
|
logger.info("用户执行 help 命令"); |
||||
|
System.out.println("===== W11 帮助菜单 ====="); |
||||
|
System.out.println("help 查看帮助"); |
||||
|
System.out.println("history 查看命令历史"); |
||||
|
System.out.println("analyze 分析URL链接"); |
||||
|
System.out.println("exit 退出程序"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,31 @@ |
|||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.logging.Logger; |
||||
|
|
||||
|
public class HistoryCommand11 implements Command11 { |
||||
|
private static final Logger logger = Logger.getLogger(HistoryCommand11.class.getName()); |
||||
|
private final List<String> commandHistory = new ArrayList<>(); |
||||
|
|
||||
|
@Override |
||||
|
public String getName() { |
||||
|
return "history"; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args, List<Article11> articles) { |
||||
|
if (commandHistory.isEmpty()) { |
||||
|
logger.info("暂无命令历史记录"); |
||||
|
System.out.println("暂无命令历史记录"); |
||||
|
return; |
||||
|
} |
||||
|
logger.info("用户查看命令历史"); |
||||
|
System.out.println("===== 命令历史 ====="); |
||||
|
for (int i = 0; i < commandHistory.size(); i++) { |
||||
|
System.out.println((i + 1) + ". " + commandHistory.get(i)); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
public void addCommand(String cmd) { |
||||
|
commandHistory.add(cmd); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,81 @@ |
|||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
import java.util.logging.Logger; |
||||
|
|
||||
|
public class Main11 { |
||||
|
private static final Logger logger = Logger.getLogger(Main11.class.getName()); |
||||
|
private static final int MAX_RETRY_TIMES = 3; |
||||
|
|
||||
|
public static void main(String[] args) { |
||||
|
StrategyFactory11 strategyFactory = new StrategyFactory11(); |
||||
|
CommandRegistry11 cmdRegistry = new CommandRegistry11(); |
||||
|
HistoryCommand11 historyCmd = new HistoryCommand11(); |
||||
|
ArticleRepository11 repository = new ArticleRepository11(); |
||||
|
|
||||
|
// 模拟爬取策略 + 失败重试逻辑
|
||||
|
CrawlStrategy11 blogStrategy = new CrawlStrategy11() { |
||||
|
@Override |
||||
|
public boolean supports(String url) { |
||||
|
return url != null && url.contains("blog"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public Article11 crawl(String url) throws ParseException11, NetworkException11 { |
||||
|
for (int i = 1; i <= MAX_RETRY_TIMES; i++) { |
||||
|
try { |
||||
|
double random = Math.random(); |
||||
|
if (random < 0.3) { |
||||
|
throw new NetworkException11("网络连接超时"); |
||||
|
} |
||||
|
if (random < 0.2) { |
||||
|
throw new ParseException11("页面内容解析失败"); |
||||
|
} |
||||
|
return new Article11("博客文章", url, "正文内容", "博主", "2026-05-30"); |
||||
|
} catch (NetworkException11 e) { |
||||
|
logger.warning("第 " + i + " 次爬取异常:" + e.getMessage()); |
||||
|
if (i == MAX_RETRY_TIMES) { |
||||
|
throw new NetworkException11("已重试" + MAX_RETRY_TIMES + "次,任务终止", e); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
return null; |
||||
|
} |
||||
|
}; |
||||
|
strategyFactory.registerStrategy(blogStrategy); |
||||
|
|
||||
|
cmdRegistry.registerCommand(new HelpCommand11()); |
||||
|
cmdRegistry.registerCommand(historyCmd); |
||||
|
cmdRegistry.registerCommand(new AnalyzeCommand11(strategyFactory)); |
||||
|
cmdRegistry.registerCommand(new ExitCommand11()); |
||||
|
|
||||
|
List<Article11> articleData = new ArrayList<>(); |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
logger.info("W11 爬虫程序启动成功"); |
||||
|
System.out.println("===== W11 爬虫程序启动 ====="); |
||||
|
|
||||
|
while (true) { |
||||
|
System.out.print("请输入命令:"); |
||||
|
String input = scanner.nextLine().trim(); |
||||
|
historyCmd.addCommand(input); |
||||
|
logger.info("用户输入命令:" + input); |
||||
|
|
||||
|
String[] splitArr = input.split(" ", 2); |
||||
|
String cmdName = splitArr[0]; |
||||
|
Command11 command = cmdRegistry.getCommand(cmdName); |
||||
|
|
||||
|
if (command != null) { |
||||
|
String[] params = splitArr.length > 1 ? new String[]{splitArr[1]} : new String[0]; |
||||
|
try { |
||||
|
command.execute(params, articleData); |
||||
|
} catch (Exception e) { |
||||
|
logger.severe("命令执行发生异常:" + e.getMessage()); |
||||
|
System.out.println("命令执行出错,请查看日志!"); |
||||
|
} |
||||
|
} else { |
||||
|
logger.warning("未知命令:" + cmdName); |
||||
|
System.out.println("未知命令,请输入 help 查看帮助"); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,6 @@ |
|||||
|
public class NetworkException11 extends CrawlerException11 { |
||||
|
public NetworkException11() { super(); } |
||||
|
public NetworkException11(String message) { super(message); } |
||||
|
public NetworkException11(String message, Throwable cause) { super(message, cause); } |
||||
|
public NetworkException11(Throwable cause) { super(cause); } |
||||
|
} |
||||
@ -0,0 +1,6 @@ |
|||||
|
public class ParseException11 extends CrawlerException11 { |
||||
|
public ParseException11() { super(); } |
||||
|
public ParseException11(String message) { super(message); } |
||||
|
public ParseException11(String message, Throwable cause) { super(message, cause); } |
||||
|
public ParseException11(Throwable cause) { super(cause); } |
||||
|
} |
||||
@ -0,0 +1,19 @@ |
|||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class StrategyFactory11 { |
||||
|
private final List<CrawlStrategy11> strategyList = new ArrayList<>(); |
||||
|
|
||||
|
public void registerStrategy(CrawlStrategy11 strategy) { |
||||
|
strategyList.add(strategy); |
||||
|
} |
||||
|
|
||||
|
public CrawlStrategy11 getMatchStrategy(String url) { |
||||
|
for (CrawlStrategy11 s : strategyList) { |
||||
|
if (s.supports(url)) { |
||||
|
return s; |
||||
|
} |
||||
|
} |
||||
|
return null; |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue