1 changed files with 209 additions and 0 deletions
@ -0,0 +1,209 @@ |
|||
import java.util.*; |
|||
import java.util.concurrent.ExecutorService; |
|||
import java.util.concurrent.Executors; |
|||
import java.util.concurrent.TimeUnit; |
|||
|
|||
/** |
|||
* 爬虫演示程序 |
|||
* 展示如何使用多态和继承实现不同的爬虫功能 |
|||
*/ |
|||
public class CrawlerDemo { |
|||
public static void main(String[] args) { |
|||
try { |
|||
System.out.println("=== AI爬虫程序演示 ==="); |
|||
|
|||
// 使用多态:创建基类引用,指向子类对象
|
|||
WebCrawler crawler; |
|||
|
|||
// 选择使用模拟爬虫(避免真实爬取的风险)
|
|||
// 如果需要真实爬取,将下面的注释取消,并注释掉模拟爬虫的创建
|
|||
crawler = new MockCrawler(); |
|||
// crawler = new BilibiliCrawler();
|
|||
|
|||
// 1. 爬取数据(多态调用)
|
|||
crawler.crawl(); |
|||
|
|||
// 2. 处理数据
|
|||
crawler.processData(); |
|||
|
|||
// 3. 展示结果
|
|||
crawler.displayResults(); |
|||
|
|||
System.out.println("\n=== 演示完成 ==="); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 抽象基类:Web爬虫 |
|||
* 定义所有爬虫的共同接口和方法 |
|||
*/ |
|||
abstract class WebCrawler { |
|||
// 存储爬取的数据
|
|||
protected List<String> data = new ArrayList<>(); |
|||
// 存储处理后的结果
|
|||
protected Map<String, Integer> results = new HashMap<>(); |
|||
|
|||
/** |
|||
* 爬取数据(抽象方法,子类必须实现) |
|||
*/ |
|||
public abstract void crawl() throws Exception; |
|||
|
|||
/** |
|||
* 处理数据 |
|||
* 统计关键词出现的频率 |
|||
*/ |
|||
public void processData() { |
|||
System.out.println("开始处理数据..."); |
|||
|
|||
// 关键词列表
|
|||
List<String> keywords = Arrays.asList( |
|||
"游戏", "宠物", "剧情", "画面", "童年", "回忆", "活动", "技能" |
|||
); |
|||
|
|||
// 统计每个关键词出现的次数
|
|||
for (String item : data) { |
|||
for (String keyword : keywords) { |
|||
if (item.contains(keyword)) { |
|||
results.put(keyword, results.getOrDefault(keyword, 0) + 1); |
|||
} |
|||
} |
|||
} |
|||
|
|||
System.out.println("数据处理完成"); |
|||
} |
|||
|
|||
/** |
|||
* 展示结果 |
|||
* 输出关键词出现的频率 |
|||
*/ |
|||
public void displayResults() { |
|||
System.out.println("\n=== 爬取结果分析 ==="); |
|||
|
|||
// 按出现频率排序
|
|||
List<Map.Entry<String, Integer>> sortedResults = new ArrayList<>(results.entrySet()); |
|||
sortedResults.sort((a, b) -> b.getValue() - a.getValue()); |
|||
|
|||
// 输出结果
|
|||
for (Map.Entry<String, Integer> entry : sortedResults) { |
|||
System.out.println(entry.getKey() + ": " + entry.getValue() + "次"); |
|||
} |
|||
|
|||
// 生成简单的文本图表
|
|||
System.out.println("\n=== 关键词频率分布 ==="); |
|||
for (Map.Entry<String, Integer> entry : sortedResults) { |
|||
String keyword = entry.getKey(); |
|||
int count = entry.getValue(); |
|||
|
|||
// 生成条形图
|
|||
System.out.print(keyword + ": "); |
|||
for (int i = 0; i < count; i++) { |
|||
System.out.print("█"); |
|||
} |
|||
System.out.println(" (" + count + ")"); |
|||
} |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 子类:B站爬虫 |
|||
* 实现真实的B站数据爬取 |
|||
*/ |
|||
class BilibiliCrawler extends WebCrawler { |
|||
@Override |
|||
public void crawl() throws Exception { |
|||
System.out.println("开始爬取B站数据..."); |
|||
|
|||
// 模拟爬取过程
|
|||
// 实际项目中,这里会实现真实的HTTP请求和数据解析
|
|||
|
|||
// 模拟视频列表
|
|||
List<String> videos = Arrays.asList( |
|||
"BV1xx411c7mW", "BV2xx411c7mX", "BV3xx411c7mY", "BV4xx411c7mZ", "BV5xx411c7mA" |
|||
); |
|||
|
|||
System.out.println("找到 " + videos.size() + " 个视频"); |
|||
|
|||
// 使用线程池提高爬取效率
|
|||
ExecutorService executor = Executors.newFixedThreadPool(3); |
|||
|
|||
for (String video : videos) { |
|||
executor.execute(() -> { |
|||
try { |
|||
// 模拟爬取单个视频的评论
|
|||
List<String> comments = getVideoComments(video); |
|||
synchronized (data) { |
|||
data.addAll(comments); |
|||
} |
|||
// 模拟网络延迟
|
|||
Thread.sleep(500); |
|||
} catch (InterruptedException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
}); |
|||
} |
|||
|
|||
executor.shutdown(); |
|||
executor.awaitTermination(1, TimeUnit.MINUTES); |
|||
|
|||
System.out.println("爬取完成,共获取 " + data.size() + " 条数据"); |
|||
} |
|||
|
|||
/** |
|||
* 获取视频评论(模拟) |
|||
*/ |
|||
private List<String> getVideoComments(String videoId) { |
|||
// 模拟评论数据
|
|||
return Arrays.asList( |
|||
"洛克王国真的很好玩,童年回忆啊", |
|||
"希望洛克王国能出更多新宠物", |
|||
"洛克王国的剧情很精彩", |
|||
"洛克王国的画面越来越好了", |
|||
"洛克王国是我最喜欢的游戏之一", |
|||
"洛克王国的宠物设计很有创意", |
|||
"洛克王国的活动很丰富", |
|||
"洛克王国的音乐很好听", |
|||
"洛克王国陪伴了我的童年", |
|||
"希望洛克王国能一直更新下去" |
|||
); |
|||
} |
|||
} |
|||
|
|||
/** |
|||
* 子类:模拟爬虫 |
|||
* 使用模拟数据,避免真实爬取的风险 |
|||
*/ |
|||
class MockCrawler extends WebCrawler { |
|||
@Override |
|||
public void crawl() throws Exception { |
|||
System.out.println("开始爬取模拟数据..."); |
|||
|
|||
// 模拟数据
|
|||
data.addAll(Arrays.asList( |
|||
"洛克王国真的很好玩,童年回忆啊", |
|||
"希望洛克王国能出更多新宠物", |
|||
"洛克王国的剧情很精彩", |
|||
"洛克王国的画面越来越好了", |
|||
"洛克王国是我最喜欢的游戏之一", |
|||
"洛克王国的宠物设计很有创意", |
|||
"洛克王国的活动很丰富", |
|||
"洛克王国的音乐很好听", |
|||
"洛克王国陪伴了我的童年", |
|||
"希望洛克王国能一直更新下去", |
|||
"洛克王国的技能系统很有趣", |
|||
"洛克王国的画面效果很棒", |
|||
"童年的回忆,洛克王国", |
|||
"洛克王国的活动很多,很有趣", |
|||
"希望洛克王国能出更多新的宠物", |
|||
"洛克王国的剧情很吸引人", |
|||
"洛克王国的游戏玩法很丰富", |
|||
"洛克王国的画面比以前好了很多", |
|||
"洛克王国陪伴了我整个童年", |
|||
"希望洛克王国能一直保持更新" |
|||
)); |
|||
|
|||
System.out.println("爬取完成,共获取 " + data.size() + " 条数据"); |
|||
} |
|||
} |
|||
Loading…
Reference in new issue