8 changed files with 242 additions and 0 deletions
@ -0,0 +1,39 @@ |
|||||
|
target/ |
||||
|
!.mvn/wrapper/maven-wrapper.jar |
||||
|
!**/src/main/**/target/ |
||||
|
!**/src/test/**/target/ |
||||
|
.kotlin |
||||
|
|
||||
|
### IntelliJ IDEA ### |
||||
|
.idea/modules.xml |
||||
|
.idea/jarRepositories.xml |
||||
|
.idea/compiler.xml |
||||
|
.idea/libraries/ |
||||
|
*.iws |
||||
|
*.iml |
||||
|
*.ipr |
||||
|
|
||||
|
### Eclipse ### |
||||
|
.apt_generated |
||||
|
.classpath |
||||
|
.factorypath |
||||
|
.project |
||||
|
.settings |
||||
|
.springBeans |
||||
|
.sts4-cache |
||||
|
|
||||
|
### NetBeans ### |
||||
|
/nbproject/private/ |
||||
|
/nbbuild/ |
||||
|
/dist/ |
||||
|
/nbdist/ |
||||
|
/.nb-gradle/ |
||||
|
build/ |
||||
|
!**/src/main/**/build/ |
||||
|
!**/src/test/**/build/ |
||||
|
|
||||
|
### VS Code ### |
||||
|
.vscode/ |
||||
|
|
||||
|
### Mac OS ### |
||||
|
.DS_Store |
||||
@ -0,0 +1,26 @@ |
|||||
|
package org.example.model; |
||||
|
|
||||
|
public class Article { |
||||
|
private String title; |
||||
|
private String url; |
||||
|
|
||||
|
// 构造方法
|
||||
|
public Article(String title, String url) { |
||||
|
this.title = title; |
||||
|
this.url = url; |
||||
|
} |
||||
|
|
||||
|
// Getter 方法 (非常重要,SearchCommand 里要用到)
|
||||
|
public String getTitle() { |
||||
|
return title; |
||||
|
} |
||||
|
|
||||
|
public String getUrl() { |
||||
|
return url; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public String toString() { |
||||
|
return "标题: " + title + "\n链接: " + url; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,7 @@ |
|||||
|
package org.example.command; |
||||
|
|
||||
|
public interface Command { |
||||
|
void execute(); |
||||
|
|
||||
|
void execute(String[] args); |
||||
|
} |
||||
@ -0,0 +1,12 @@ |
|||||
|
package org.example.exception; |
||||
|
|
||||
|
// 自定义运行时异常
|
||||
|
public class CrawlerException extends RuntimeException { |
||||
|
public CrawlerException(String message) { |
||||
|
super(message); |
||||
|
} |
||||
|
|
||||
|
public CrawlerException(String message, Throwable cause) { |
||||
|
super(message, cause); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,53 @@ |
|||||
|
package org.example.service; |
||||
|
|
||||
|
import org.example.exception.CrawlerException; |
||||
|
import org.example.model.Article; |
||||
|
import org.example.strategy.BaiduStrategy; |
||||
|
import org.example.strategy.BingStrategy; |
||||
|
import org.example.strategy.CrawlerStrategy; |
||||
|
import org.example.strategy.CsdnStrategy; |
||||
|
|
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
public class CrawlerService { |
||||
|
|
||||
|
/** |
||||
|
* 聚合搜索:同时调用所有策略进行搜索 |
||||
|
*/ |
||||
|
public List<Article> search(String keyword) throws CrawlerException { |
||||
|
if (keyword == null || keyword.trim().isEmpty()) { |
||||
|
throw new CrawlerException("关键词不能为空!"); |
||||
|
} |
||||
|
|
||||
|
System.out.println("Service层: 正在调度所有爬虫策略..."); |
||||
|
|
||||
|
List<CrawlerStrategy> strategies = new ArrayList<>(); |
||||
|
strategies.add(new BaiduStrategy()); |
||||
|
strategies.add(new BingStrategy()); |
||||
|
strategies.add(new CsdnStrategy()); |
||||
|
|
||||
|
List<Article> allResults = new ArrayList<>(); |
||||
|
|
||||
|
for (CrawlerStrategy strategy : strategies) { |
||||
|
try { |
||||
|
System.out.println("正在爬取 [" + strategy.getName() + "] ..."); |
||||
|
|
||||
|
// 调用具体策略的 crawl 方法
|
||||
|
List<Article> results = strategy.crawl(keyword); |
||||
|
|
||||
|
// 将当前策略的结果添加到总列表中
|
||||
|
allResults.addAll(results); |
||||
|
|
||||
|
System.out.println("[" + strategy.getName() + "] 爬取完成,获取到 " + results.size() + " 条数据。"); |
||||
|
|
||||
|
} catch (Exception e) { |
||||
|
// 捕获单个策略的异常,防止因为一个网站挂了导致整个程序崩溃
|
||||
|
System.err.println("警告: [" + strategy.getName() + "] 爬取失败: " + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
System.out.println("搜索结束,共获取到 " + allResults.size() + " 条结果。"); |
||||
|
return allResults; |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,23 @@ |
|||||
|
package org.example; |
||||
|
|
||||
|
import org.example.command.Command; |
||||
|
import org.example.command.SearchCommand; |
||||
|
import org.example.service.CrawlerService; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
public class Main { |
||||
|
public static void main(String[] args) { |
||||
|
System.out.println("简易搜索引擎启动"); |
||||
|
|
||||
|
// 1. 初始化 Service
|
||||
|
CrawlerService service = new CrawlerService(); |
||||
|
|
||||
|
// 2. 初始化 Command (注入 Service)
|
||||
|
Command searchCmd = new SearchCommand(service); |
||||
|
|
||||
|
// 3. 执行命令
|
||||
|
searchCmd.execute(); |
||||
|
|
||||
|
System.out.println("程序结束"); |
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,55 @@ |
|||||
|
package org.example.command; |
||||
|
|
||||
|
import org.example.exception.CrawlerException; |
||||
|
import org.example.model.Article; |
||||
|
import org.example.service.CrawlerService; |
||||
|
|
||||
|
import java.util.List; |
||||
|
import java.util.Scanner; |
||||
|
|
||||
|
/** |
||||
|
* 处理搜索指令的具体命令类 |
||||
|
*/ |
||||
|
public class SearchCommand implements Command { |
||||
|
|
||||
|
private final CrawlerService crawlerService; |
||||
|
|
||||
|
// 构造方法注入 Service
|
||||
|
public SearchCommand(CrawlerService crawlerService) { |
||||
|
this.crawlerService = crawlerService; |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute() { |
||||
|
Scanner scanner = new Scanner(System.in); |
||||
|
try { |
||||
|
System.out.println("请输入要搜索的关键词:"); |
||||
|
String keyword = scanner.nextLine(); |
||||
|
|
||||
|
System.out.println("正在爬取数据,请稍候..."); |
||||
|
|
||||
|
// 调用 Service 层的搜索方法
|
||||
|
List<Article> results = crawlerService.search(keyword); |
||||
|
|
||||
|
if (results.isEmpty()) { |
||||
|
System.out.println("未找到相关结果。"); |
||||
|
} else { |
||||
|
System.out.println("共找到 " + results.size() + " 条结果:"); |
||||
|
for (Article article : results) { |
||||
|
System.out.println(article); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
} catch (CrawlerException e) { |
||||
|
// 捕获我们在 Service 层抛出的自定义异常
|
||||
|
System.err.println("搜索失败:" + e.getMessage()); |
||||
|
} catch (Exception e) { |
||||
|
System.err.println("发生未知错误:" + e.getMessage()); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public void execute(String[] args) { |
||||
|
|
||||
|
} |
||||
|
} |
||||
@ -0,0 +1,27 @@ |
|||||
|
<?xml version="1.0" encoding="UTF-8"?> |
||||
|
<project xmlns="http://maven.apache.org/POM/4.0.0" |
||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
||||
|
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
||||
|
<modelVersion>4.0.0</modelVersion> |
||||
|
|
||||
|
<groupId>org.example</groupId> |
||||
|
<artifactId>Homework</artifactId> |
||||
|
<version>1.0-SNAPSHOT</version> |
||||
|
|
||||
|
<properties> |
||||
|
<maven.compiler.source>25</maven.compiler.source> |
||||
|
<maven.compiler.target>25</maven.compiler.target> |
||||
|
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
||||
|
</properties> |
||||
|
<dependencies> |
||||
|
<!-- Jsoup: 用于解析 HTML 网页 --> |
||||
|
<dependency> |
||||
|
<groupId>org.jsoup</groupId> |
||||
|
<artifactId>jsoup</artifactId> |
||||
|
<version>1.15.3</version> |
||||
|
</dependency> |
||||
|
|
||||
|
<!-- HttpClient: 用于发送网络请求 (可选,Jsoup自带简单的连接功能,但HttpClient更强大) --> |
||||
|
<!-- 这里先只加 Jsoup 简化起步,如果后面需要复杂的并发爬取再加这个 --> |
||||
|
</dependencies> |
||||
|
</project> |
||||
Loading…
Reference in new issue