8 changed files with 242 additions and 0 deletions
@ -0,0 +1,39 @@ |
|||
target/ |
|||
!.mvn/wrapper/maven-wrapper.jar |
|||
!**/src/main/**/target/ |
|||
!**/src/test/**/target/ |
|||
.kotlin |
|||
|
|||
### IntelliJ IDEA ### |
|||
.idea/modules.xml |
|||
.idea/jarRepositories.xml |
|||
.idea/compiler.xml |
|||
.idea/libraries/ |
|||
*.iws |
|||
*.iml |
|||
*.ipr |
|||
|
|||
### Eclipse ### |
|||
.apt_generated |
|||
.classpath |
|||
.factorypath |
|||
.project |
|||
.settings |
|||
.springBeans |
|||
.sts4-cache |
|||
|
|||
### NetBeans ### |
|||
/nbproject/private/ |
|||
/nbbuild/ |
|||
/dist/ |
|||
/nbdist/ |
|||
/.nb-gradle/ |
|||
build/ |
|||
!**/src/main/**/build/ |
|||
!**/src/test/**/build/ |
|||
|
|||
### VS Code ### |
|||
.vscode/ |
|||
|
|||
### Mac OS ### |
|||
.DS_Store |
|||
@ -0,0 +1,26 @@ |
|||
package org.example.model; |
|||
|
|||
public class Article { |
|||
private String title; |
|||
private String url; |
|||
|
|||
// 构造方法
|
|||
public Article(String title, String url) { |
|||
this.title = title; |
|||
this.url = url; |
|||
} |
|||
|
|||
// Getter 方法 (非常重要,SearchCommand 里要用到)
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public String getUrl() { |
|||
return url; |
|||
} |
|||
|
|||
@Override |
|||
public String toString() { |
|||
return "标题: " + title + "\n链接: " + url; |
|||
} |
|||
} |
|||
@ -0,0 +1,7 @@ |
|||
package org.example.command; |
|||
|
|||
public interface Command { |
|||
void execute(); |
|||
|
|||
void execute(String[] args); |
|||
} |
|||
@ -0,0 +1,12 @@ |
|||
package org.example.exception; |
|||
|
|||
// 自定义运行时异常
|
|||
public class CrawlerException extends RuntimeException { |
|||
public CrawlerException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public CrawlerException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,53 @@ |
|||
package org.example.service; |
|||
|
|||
import org.example.exception.CrawlerException; |
|||
import org.example.model.Article; |
|||
import org.example.strategy.BaiduStrategy; |
|||
import org.example.strategy.BingStrategy; |
|||
import org.example.strategy.CrawlerStrategy; |
|||
import org.example.strategy.CsdnStrategy; |
|||
|
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class CrawlerService { |
|||
|
|||
/** |
|||
* 聚合搜索:同时调用所有策略进行搜索 |
|||
*/ |
|||
public List<Article> search(String keyword) throws CrawlerException { |
|||
if (keyword == null || keyword.trim().isEmpty()) { |
|||
throw new CrawlerException("关键词不能为空!"); |
|||
} |
|||
|
|||
System.out.println("Service层: 正在调度所有爬虫策略..."); |
|||
|
|||
List<CrawlerStrategy> strategies = new ArrayList<>(); |
|||
strategies.add(new BaiduStrategy()); |
|||
strategies.add(new BingStrategy()); |
|||
strategies.add(new CsdnStrategy()); |
|||
|
|||
List<Article> allResults = new ArrayList<>(); |
|||
|
|||
for (CrawlerStrategy strategy : strategies) { |
|||
try { |
|||
System.out.println("正在爬取 [" + strategy.getName() + "] ..."); |
|||
|
|||
// 调用具体策略的 crawl 方法
|
|||
List<Article> results = strategy.crawl(keyword); |
|||
|
|||
// 将当前策略的结果添加到总列表中
|
|||
allResults.addAll(results); |
|||
|
|||
System.out.println("[" + strategy.getName() + "] 爬取完成,获取到 " + results.size() + " 条数据。"); |
|||
|
|||
} catch (Exception e) { |
|||
// 捕获单个策略的异常,防止因为一个网站挂了导致整个程序崩溃
|
|||
System.err.println("警告: [" + strategy.getName() + "] 爬取失败: " + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
System.out.println("搜索结束,共获取到 " + allResults.size() + " 条结果。"); |
|||
return allResults; |
|||
} |
|||
} |
|||
@ -0,0 +1,23 @@ |
|||
package org.example; |
|||
|
|||
import org.example.command.Command; |
|||
import org.example.command.SearchCommand; |
|||
import org.example.service.CrawlerService; |
|||
import java.util.Scanner; |
|||
|
|||
public class Main { |
|||
public static void main(String[] args) { |
|||
System.out.println("简易搜索引擎启动"); |
|||
|
|||
// 1. 初始化 Service
|
|||
CrawlerService service = new CrawlerService(); |
|||
|
|||
// 2. 初始化 Command (注入 Service)
|
|||
Command searchCmd = new SearchCommand(service); |
|||
|
|||
// 3. 执行命令
|
|||
searchCmd.execute(); |
|||
|
|||
System.out.println("程序结束"); |
|||
} |
|||
} |
|||
@ -0,0 +1,55 @@ |
|||
package org.example.command; |
|||
|
|||
import org.example.exception.CrawlerException; |
|||
import org.example.model.Article; |
|||
import org.example.service.CrawlerService; |
|||
|
|||
import java.util.List; |
|||
import java.util.Scanner; |
|||
|
|||
/** |
|||
* 处理搜索指令的具体命令类 |
|||
*/ |
|||
public class SearchCommand implements Command { |
|||
|
|||
private final CrawlerService crawlerService; |
|||
|
|||
// 构造方法注入 Service
|
|||
public SearchCommand(CrawlerService crawlerService) { |
|||
this.crawlerService = crawlerService; |
|||
} |
|||
|
|||
@Override |
|||
public void execute() { |
|||
Scanner scanner = new Scanner(System.in); |
|||
try { |
|||
System.out.println("请输入要搜索的关键词:"); |
|||
String keyword = scanner.nextLine(); |
|||
|
|||
System.out.println("正在爬取数据,请稍候..."); |
|||
|
|||
// 调用 Service 层的搜索方法
|
|||
List<Article> results = crawlerService.search(keyword); |
|||
|
|||
if (results.isEmpty()) { |
|||
System.out.println("未找到相关结果。"); |
|||
} else { |
|||
System.out.println("共找到 " + results.size() + " 条结果:"); |
|||
for (Article article : results) { |
|||
System.out.println(article); |
|||
} |
|||
} |
|||
|
|||
} catch (CrawlerException e) { |
|||
// 捕获我们在 Service 层抛出的自定义异常
|
|||
System.err.println("搜索失败:" + e.getMessage()); |
|||
} catch (Exception e) { |
|||
System.err.println("发生未知错误:" + e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
|
|||
} |
|||
} |
|||
@ -0,0 +1,27 @@ |
|||
<?xml version="1.0" encoding="UTF-8"?> |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" |
|||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
|
|||
<groupId>org.example</groupId> |
|||
<artifactId>Homework</artifactId> |
|||
<version>1.0-SNAPSHOT</version> |
|||
|
|||
<properties> |
|||
<maven.compiler.source>25</maven.compiler.source> |
|||
<maven.compiler.target>25</maven.compiler.target> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
</properties> |
|||
<dependencies> |
|||
<!-- Jsoup: 用于解析 HTML 网页 --> |
|||
<dependency> |
|||
<groupId>org.jsoup</groupId> |
|||
<artifactId>jsoup</artifactId> |
|||
<version>1.15.3</version> |
|||
</dependency> |
|||
|
|||
<!-- HttpClient: 用于发送网络请求 (可选,Jsoup自带简单的连接功能,但HttpClient更强大) --> |
|||
<!-- 这里先只加 Jsoup 简化起步,如果后面需要复杂的并发爬取再加这个 --> |
|||
</dependencies> |
|||
</project> |
|||
Loading…
Reference in new issue