21 changed files with 721 additions and 0 deletions
@ -0,0 +1,40 @@ |
|||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" |
|||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> |
|||
<modelVersion>4.0.0</modelVersion> |
|||
|
|||
<groupId>org.example</groupId> |
|||
<artifactId>crawl_project_extension</artifactId> |
|||
<version>1.0-SNAPSHOT</version> |
|||
<packaging>jar</packaging> |
|||
|
|||
<name>crawl_project</name> |
|||
<url>http://maven.apache.org</url> |
|||
|
|||
<properties> |
|||
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding> |
|||
</properties> |
|||
|
|||
<dependencies> |
|||
<dependency> |
|||
<groupId>junit</groupId> |
|||
<artifactId>junit</artifactId> |
|||
<version>3.8.1</version> |
|||
<scope>test</scope> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.jsoup</groupId> |
|||
<artifactId>jsoup</artifactId> |
|||
<version>1.17.2</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>com.opencsv</groupId> |
|||
<artifactId>opencsv</artifactId> |
|||
<version>5.9</version> |
|||
</dependency> |
|||
<dependency> |
|||
<groupId>org.knowm.xchart</groupId> |
|||
<artifactId>xchart</artifactId> |
|||
<version>3.8.7</version> |
|||
</dependency> |
|||
</dependencies> |
|||
</project> |
|||
@ -0,0 +1,127 @@ |
|||
package com.example; |
|||
import org.knowm.xchart.*; |
|||
import org.knowm.xchart.style.Styler; |
|||
import java.awt.*; |
|||
import java.io.IOException; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.stream.Collectors; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.Map.Entry; |
|||
public class ChartGenerator { |
|||
|
|||
// 1. 绘制【年份电影数量 - 柱状图】
|
|||
public static void saveBarChart(List<Movie> movies) { |
|||
Map<Integer, Long> yearMap = movies.stream() |
|||
.filter(m -> m.getYear() > 1980) |
|||
.collect(Collectors.groupingBy(Movie::getYear, Collectors.counting())); |
|||
|
|||
List<Entry<Integer, Long>> sortedList = new ArrayList<>(yearMap.entrySet()); |
|||
sortedList.sort(Entry.comparingByKey()); |
|||
|
|||
if (sortedList.size() > 15) { |
|||
sortedList = sortedList.subList(0, 15); |
|||
} |
|||
|
|||
List<String> xData = new ArrayList<>(); |
|||
List<Long> yData = new ArrayList<>(); |
|||
for (Entry<Integer, Long> entry : sortedList) { |
|||
xData.add(entry.getKey().toString()); |
|||
yData.add(entry.getValue()); |
|||
} |
|||
|
|||
CategoryChart chart = new CategoryChartBuilder() |
|||
.width(1000) |
|||
.height(600) |
|||
.title("豆瓣Top250 - 各年份电影数量柱状图") |
|||
.xAxisTitle("年份") |
|||
.yAxisTitle("电影数量") |
|||
.theme(Styler.ChartTheme.Matlab) |
|||
.build(); |
|||
|
|||
chart.getStyler().setLegendVisible(false); |
|||
chart.getStyler().setLabelsVisible(true); |
|||
chart.getStyler().setXAxisLabelRotation(45); |
|||
chart.getStyler().setChartBackgroundColor(Color.WHITE); |
|||
|
|||
chart.addSeries("电影数量", xData, yData); |
|||
|
|||
try { |
|||
BitmapEncoder.saveBitmap(chart, "./年份电影数量_柱状图", BitmapEncoder.BitmapFormat.PNG); |
|||
System.out.println("✅ 柱状图已保存:年份电影数量_柱状图.png"); |
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
|
|||
// 2. 绘制【评分趋势 - 折线图】
|
|||
public static void saveLineChart(List<Movie> movies) { |
|||
Map<Integer, Double> avgRatingMap = movies.stream() |
|||
.filter(m -> m.getYear() > 1980) |
|||
.collect(Collectors.groupingBy(Movie::getYear, Collectors.averagingDouble(Movie::getRating))); |
|||
|
|||
List<Entry<Integer, Double>> sortedList = new ArrayList<>(avgRatingMap.entrySet()); |
|||
sortedList.sort(Entry.comparingByKey()); |
|||
|
|||
if (sortedList.size() > 15) { |
|||
sortedList = sortedList.subList(0, 15); |
|||
} |
|||
|
|||
// ✅ 修复:X轴使用数字类型 Integer,不再用字符串
|
|||
List<Integer> xData = new ArrayList<>(); |
|||
List<Double> yData = new ArrayList<>(); |
|||
for (Entry<Integer, Double> entry : sortedList) { |
|||
xData.add(entry.getKey()); |
|||
yData.add(entry.getValue()); |
|||
} |
|||
|
|||
XYChart chart = new XYChartBuilder() |
|||
.width(1000) |
|||
.height(600) |
|||
.title("豆瓣Top250 - 历年平均评分趋势") |
|||
.xAxisTitle("年份") |
|||
.yAxisTitle("平均评分") |
|||
.theme(Styler.ChartTheme.Matlab) |
|||
.build(); |
|||
|
|||
chart.getStyler().setMarkerSize(6); |
|||
chart.getStyler().setChartBackgroundColor(Color.WHITE); |
|||
chart.addSeries("平均评分", xData, yData); |
|||
|
|||
try { |
|||
BitmapEncoder.saveBitmap(chart, "./历年平均评分_折线图", BitmapEncoder.BitmapFormat.PNG); |
|||
System.out.println("✅ 折线图已保存!"); |
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
|
|||
// 3. 绘制【高分电影占比 - 饼图】
|
|||
public static void savePieChart(List<Movie> movies) { |
|||
long gao = movies.stream().filter(m -> m.getRating() >= 9.5).count(); |
|||
long zhong = movies.stream().filter(m -> m.getRating() >= 9.0 && m.getRating() < 9.5).count(); |
|||
long di = movies.stream().filter(m -> m.getRating() < 9.0).count(); |
|||
|
|||
PieChart chart = new PieChartBuilder() |
|||
.width(700) |
|||
.height(700) |
|||
.title("豆瓣Top250 - 评分分布饼图") |
|||
.theme(Styler.ChartTheme.Matlab) |
|||
.build(); |
|||
|
|||
chart.addSeries("9.5分及以上", gao); |
|||
chart.addSeries("9.0-9.5分", zhong); |
|||
chart.addSeries("9.0分以下", di); |
|||
|
|||
chart.getStyler().setChartBackgroundColor(Color.WHITE); |
|||
chart.getStyler().setLegendVisible(true); |
|||
|
|||
try { |
|||
BitmapEncoder.saveBitmap(chart, "./评分分布_饼图", BitmapEncoder.BitmapFormat.PNG); |
|||
System.out.println("✅ 饼图已保存:评分分布_饼图.png"); |
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,37 @@ |
|||
package com.example; |
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.util.List; |
|||
public class CsvExporter{ |
|||
public static void exportToCsv(List<Movie> movies, String filePath) { |
|||
try (FileWriter writer = new FileWriter(filePath)) { |
|||
// 1. 表头:确保顺序是【电影名称,导演,上映年份,豆瓣评分,评价人数】
|
|||
writer.write("电影名称,导演,上映年份,豆瓣评分,评价人数\n"); |
|||
|
|||
// 2. 写入数据:字段顺序必须和表头完全对应!
|
|||
for (Movie movie : movies) { |
|||
String line = String.format("%s,%s,%d,%.1f,%d\n", |
|||
escapeCsv(movie.getTitle()), // 1.电影名称
|
|||
escapeCsv(movie.getDirector()), // 2.导演
|
|||
movie.getYear(), // 3.上映年份
|
|||
movie.getRating(), // 4.豆瓣评分
|
|||
movie.getReviewCount() // 5.评价人数(这里之前写反了!)
|
|||
); |
|||
writer.write(line); |
|||
} |
|||
System.out.println("\nCSV文件导出成功!路径:" + filePath); |
|||
System.out.println("提示:评价人数在第5列,已显示真实数据!"); |
|||
} catch (IOException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
} |
|||
// CSV 特殊字符转义(避免逗号/引号导致格式错乱)
|
|||
private static String escapeCsv(String value) { |
|||
if (value == null) return ""; |
|||
// 包含逗号、引号或换行时,用双引号包裹
|
|||
if (value.contains(",") || value.contains("\"") || value.contains("\n")) { |
|||
return "\"" + value.replace("\"", "\"\"") + "\""; |
|||
} |
|||
return value; |
|||
} |
|||
} |
|||
@ -0,0 +1,37 @@ |
|||
package com.example; |
|||
import com.example.MovieAnalyzer; |
|||
|
|||
import java.util.List; |
|||
import java.util.Map; |
|||
import java.util.stream.Collectors; |
|||
public class DataAnalyzer implements MovieAnalyzer { |
|||
|
|||
@Override |
|||
public void analyzeByDimension(List<Movie> movies) { |
|||
System.out.println("\n===== 评分最高Top10电影 ====="); |
|||
movies.stream() |
|||
.sorted((m1, m2) -> Double.compare(m2.getRating(), m1.getRating())) |
|||
.limit(10) |
|||
.forEach(m -> System.out.printf("%-25s 评分: %.1f 年份: %d%n", |
|||
m.getTitle(), m.getRating(), m.getYear())); |
|||
System.out.println("\n===== 各年份电影数量统计 ====="); |
|||
Map<Integer, Long> countByYear = movies.stream() |
|||
.filter(m -> m.getYear() != 0) |
|||
.collect(Collectors.groupingBy(Movie::getYear, Collectors.counting())); |
|||
|
|||
// 按年份排序输出
|
|||
countByYear.entrySet().stream() |
|||
.sorted(Map.Entry.comparingByKey()) |
|||
.forEach(entry -> |
|||
System.out.printf("年份: %-4d 数量: %d 部%n", entry.getKey(), entry.getValue())); |
|||
} |
|||
|
|||
// 统计总数据
|
|||
@Override |
|||
public void analyzeTotal(List<Movie> movies){ |
|||
System.out.println("\n===== 数据总览 ====="); |
|||
System.out.println("电影总数:" + movies.size()); |
|||
double avgRating = movies.stream().mapToDouble(Movie::getRating).average().orElse(0); |
|||
System.out.printf("平均评分:%.2f%n", avgRating); |
|||
} |
|||
} |
|||
@ -0,0 +1,100 @@ |
|||
package com.example; |
|||
import com.example.MovieCrawler; |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
public class DoubanCrawler implements MovieCrawler { |
|||
// 编译年份正则(提取4位数字年份)
|
|||
private static final Pattern YEAR_PATTERN = Pattern.compile("(\\d{4})"); |
|||
@Override |
|||
public List<Movie> crawl() { |
|||
List<Movie> movies = new ArrayList<>(); |
|||
String baseUrl = "https://movie.douban.com/top250?start="; |
|||
|
|||
try { |
|||
// 10页,每页25条
|
|||
for (int i = 0; i < 250; i += 25) { |
|||
String url = baseUrl + i; |
|||
System.out.println("正在爬取:" + url); |
|||
|
|||
Document doc = Jsoup.connect(url) |
|||
.header("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36") |
|||
.timeout(8000) |
|||
.get(); |
|||
|
|||
Elements items = doc.select(".item"); |
|||
for (Element item : items) { |
|||
Movie movie = new Movie(); |
|||
|
|||
// 1. 电影名
|
|||
movie.setTitle(item.select(".title").first().text()); |
|||
|
|||
// 2. 评分
|
|||
movie.setRating(Double.parseDouble(item.select(".rating_num").text())); |
|||
|
|||
// 3. 评价人数
|
|||
int reviewCount = 0; |
|||
String allText = item.text(); // 直接拿整个区块的文字
|
|||
Pattern pattern = Pattern.compile("(\\d+)人评价"); |
|||
Matcher matcher = pattern.matcher(allText); |
|||
if (matcher.find()) { |
|||
reviewCount = Integer.parseInt(matcher.group(1)); |
|||
} |
|||
movie.setReviewCount(reviewCount); |
|||
movie.setReviewCount(reviewCount); |
|||
// 4. 电影信息(导演 + 年份)
|
|||
String info = item.select(".bd p").first().text(); |
|||
|
|||
// 清洗导演
|
|||
movie.setDirector(cleanDirector(info)); |
|||
// 清洗年份
|
|||
movie.setYear(cleanYear(info)); |
|||
|
|||
movies.add(movie); |
|||
} |
|||
|
|||
// 文明爬虫,随机延迟
|
|||
Thread.sleep((long) (Math.random() * 2000 + 1000)); |
|||
} |
|||
System.out.println("爬取完成!共获取 " + movies.size() + " 部电影"); |
|||
} catch (IOException | InterruptedException e) { |
|||
e.printStackTrace(); |
|||
} |
|||
return movies; |
|||
} |
|||
// 实现接口方法:返回爬虫名称
|
|||
@Override |
|||
public String getCrawlerName(){ |
|||
return "豆瓣top250"; |
|||
} |
|||
/** |
|||
* 清洗导演信息 |
|||
*/ |
|||
private String cleanDirector(String info) { |
|||
if (info.contains("导演:")) { |
|||
int start = info.indexOf("导演:") + 3; |
|||
int end = info.indexOf(" ", start + 2); |
|||
if (end == -1) end = info.length(); |
|||
return info.substring(start, end).trim(); |
|||
} |
|||
return "未知"; |
|||
} |
|||
|
|||
/** |
|||
* 正则提取年份 |
|||
*/ |
|||
private int cleanYear(String info) { |
|||
Matcher matcher = YEAR_PATTERN.matcher(info); |
|||
if (matcher.find()) { |
|||
return Integer.parseInt(matcher.group(1)); |
|||
} |
|||
return 0; |
|||
} |
|||
} |
|||
|
|||
@ -0,0 +1,22 @@ |
|||
package com.example; |
|||
import java.util.List; |
|||
public class Main { |
|||
public static void main(String[] args) { |
|||
// 1. 爬取数据
|
|||
MovieCrawler crawler = new DoubanCrawler(); |
|||
List<Movie> movies = crawler.crawl(); |
|||
System.out.println("测试:第一部电影评价人数=" + movies.get(0).getReviewCount()); |
|||
// 2. 数据分析
|
|||
MovieAnalyzer analyzer = new DataAnalyzer(); |
|||
analyzer.analyzeTotal(movies); |
|||
analyzer.analyzeByDimension(movies); |
|||
|
|||
// 3. 导出CSV
|
|||
CsvExporter.exportToCsv(movies, "douban_top250.csv"); |
|||
// 🔥 生成图表(自动保存 3 张 PNG)
|
|||
// ==========================================
|
|||
ChartGenerator.saveBarChart(movies); // 柱状图
|
|||
ChartGenerator.saveLineChart(movies); // 折线图
|
|||
ChartGenerator.savePieChart(movies); // 饼图
|
|||
} |
|||
} |
|||
@ -0,0 +1,75 @@ |
|||
package com.example; |
|||
|
|||
public class Movie { |
|||
private String title; // 电影名称
|
|||
private String director; // 导演
|
|||
private int year; // 上映年份
|
|||
private double rating; // 评分
|
|||
private int reviewCount; // 评价人数
|
|||
|
|||
// 无参构造
|
|||
public Movie() {} |
|||
|
|||
// 全参构造
|
|||
public Movie(String title, String director, int year, double rating, int reviewCount) { |
|||
this.title = title; |
|||
this.director = director; |
|||
this.year = year; |
|||
this.rating = rating; |
|||
this.reviewCount = reviewCount; |
|||
} |
|||
|
|||
// Getter & Setter
|
|||
public String getTitle() { |
|||
return title; |
|||
} |
|||
|
|||
public void setTitle(String title) { |
|||
this.title = title; |
|||
} |
|||
|
|||
public String getDirector() { |
|||
return director; |
|||
} |
|||
|
|||
public void setDirector(String director) { |
|||
this.director = director; |
|||
} |
|||
|
|||
public int getYear() { |
|||
return year; |
|||
} |
|||
|
|||
public void setYear(int year) { |
|||
this.year = year; |
|||
} |
|||
|
|||
public double getRating() { |
|||
return rating; |
|||
} |
|||
|
|||
public void setRating(double rating) { |
|||
this.rating = rating; |
|||
} |
|||
|
|||
public int getReviewCount() { |
|||
return reviewCount; |
|||
} |
|||
|
|||
public void setReviewCount(int reviewCount) { |
|||
this.reviewCount = reviewCount; |
|||
} |
|||
|
|||
// 打印输出
|
|||
@Override |
|||
public String toString() { |
|||
return "Movie{" + |
|||
"片名='" + title + '\'' + |
|||
", 导演='" + director + '\'' + |
|||
", 年份=" + year + |
|||
", 评分=" + rating + |
|||
", 评价人数=" + reviewCount + |
|||
'}'; |
|||
} |
|||
} |
|||
|
|||
@ -0,0 +1,8 @@ |
|||
package com.example; |
|||
import java.util.List; |
|||
public interface MovieAnalyzer { |
|||
// 总览分析
|
|||
void analyzeTotal(List<Movie> movies); |
|||
// 按维度分析(TopN、年份等)
|
|||
void analyzeByDimension(List<Movie> movies); |
|||
} |
|||
@ -0,0 +1,8 @@ |
|||
package com.example; |
|||
import java.util.List; |
|||
public interface MovieCrawler { |
|||
// 爬取电影列表
|
|||
List<Movie> crawl(); |
|||
// 获取爬虫名称(如"豆瓣Top250"、"IMDB Top100")
|
|||
String getCrawlerName(); |
|||
} |
|||
@ -0,0 +1,13 @@ |
|||
package org.example; |
|||
|
|||
/** |
|||
* Hello world! |
|||
* |
|||
*/ |
|||
public class App |
|||
{ |
|||
public static void main( String[] args ) |
|||
{ |
|||
System.out.println( "Hello World!" ); |
|||
} |
|||
} |
|||
@ -0,0 +1,38 @@ |
|||
package org.example; |
|||
|
|||
import junit.framework.Test; |
|||
import junit.framework.TestCase; |
|||
import junit.framework.TestSuite; |
|||
|
|||
/** |
|||
* Unit test for simple App. |
|||
*/ |
|||
public class AppTest |
|||
extends TestCase |
|||
{ |
|||
/** |
|||
* Create the test case |
|||
* |
|||
* @param testName name of the test case |
|||
*/ |
|||
public AppTest( String testName ) |
|||
{ |
|||
super( testName ); |
|||
} |
|||
|
|||
/** |
|||
* @return the suite of tests being tested |
|||
*/ |
|||
public static Test suite() |
|||
{ |
|||
return new TestSuite( AppTest.class ); |
|||
} |
|||
|
|||
/** |
|||
* Rigourous Test :-) |
|||
*/ |
|||
public void testApp() |
|||
{ |
|||
assertTrue( true ); |
|||
} |
|||
} |
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -0,0 +1,216 @@ |
|||
# Java 面向对象程序设计实验报告 |
|||
## 主题:基于豆瓣电影 TOP250 数据爬取与分析系统的**接口与多态扩展** |
|||
|
|||
## 一、实验目的 |
|||
1. 深入理解 Java **接口(Interface)** 的定义、作用与使用场景。 |
|||
2. 掌握 **多态(Polymorphism)** 的实现原理与代码编写方式。 |
|||
3. 学会使用 **抽象类** 实现代码复用,优化程序结构。 |
|||
4. 在已有的豆瓣电影 TOP250 爬取项目基础上,**通过接口与多态进行程序扩展**。 |
|||
5. 培养面向接口编程的思想,提高代码的**可扩展性、可维护性**。 |
|||
|
|||
## 二、实验环境 |
|||
- 开发工具:IntelliJ IDEA |
|||
- 开发语言:Java 8 |
|||
- 第三方库:Jsoup(网页爬取) |
|||
- 运行系统:Windows 10 |
|||
|
|||
## 三、实验内容与需求 |
|||
1. 在原有豆瓣电影 TOP250 爬取代码基础上,抽取行为,定义**接口**。 |
|||
2. 使用**接口 + 实现类**的方式完成爬取、分析模块设计。 |
|||
3. 通过**多态**特性,实现“更换爬虫不改动主逻辑”的扩展效果。 |
|||
4. 使用**抽象类**封装通用代码,减少重复。 |
|||
5. 完成数据爬取、数据分析、CSV 导出、图片保存功能。 |
|||
|
|||
## 四、核心知识点 |
|||
### 1. 接口 |
|||
- 用于定义**方法规范**,只声明方法,不实现逻辑。 |
|||
- 本实验设计两个核心接口: |
|||
- `MovieCrawler`:电影爬取接口 |
|||
- `MovieAnalyzer`:电影分析接口 |
|||
|
|||
### 2. 多态 |
|||
- **父接口引用指向子类对象**。 |
|||
- 相同接口,不同实现类,表现出不同行为。 |
|||
- 扩展新功能时,**不修改原有代码,只新增实现类**。 |
|||
|
|||
### 3. 抽象类 |
|||
- 用于提取公共代码,提供通用逻辑。 |
|||
- 可以包含抽象方法,强制子类实现。 |
|||
|
|||
### 4. 扩展性 |
|||
- 新增爬虫(如 IMDB、猫眼)只需新增实现类,主程序几乎不变。 |
|||
|
|||
## 五、系统架构设计 |
|||
``` |
|||
MovieCrawler(接口:爬取规范) |
|||
↑ |
|||
AbstractMovieCrawler(抽象类:通用爬取逻辑) |
|||
↑ |
|||
DoubanCrawler(子类:豆瓣爬虫实现) |
|||
|
|||
MovieAnalyzer(接口:分析规范) |
|||
↑ |
|||
MovieAnalyzerImpl(子类:数据分析实现) |
|||
``` |
|||
|
|||
## 六、核心代码实现 |
|||
|
|||
### 1. 电影实体类 Movie.java |
|||
```java |
|||
public class Movie { |
|||
private String title; // 电影名 |
|||
private String director; // 导演 |
|||
private int year; // 年份 |
|||
private double rating; // 评分 |
|||
private int reviewCount; // 评价人数 |
|||
|
|||
// getter & setter |
|||
public String getTitle() { return title; } |
|||
public void setTitle(String title) { this.title = title; } |
|||
public String getDirector() { return director; } |
|||
public void setDirector(String director) { this.director = director; } |
|||
public int getYear() { return year; } |
|||
public void setYear(int year) { this.year = year; } |
|||
public double getRating() { return rating; } |
|||
public void setRating(double rating) { this.rating = rating; } |
|||
public int getReviewCount() { return reviewCount; } |
|||
public void setReviewCount(int reviewCount) { this.reviewCount = reviewCount; } |
|||
} |
|||
``` |
|||
|
|||
--- |
|||
|
|||
### 2. 接口一:MovieCrawler.java(爬取接口) |
|||
```java |
|||
import java.util.List; |
|||
|
|||
public interface MovieCrawler { |
|||
// 爬取电影数据 |
|||
List<Movie> crawl(); |
|||
} |
|||
``` |
|||
|
|||
--- |
|||
|
|||
### 3. 抽象类:AbstractMovieCrawler.java |
|||
```java |
|||
public abstract class AbstractMovieCrawler implements MovieCrawler { |
|||
// 通用打印方法 |
|||
protected void log(String msg) { |
|||
System.out.println("[日志] " + msg); |
|||
} |
|||
} |
|||
``` |
|||
|
|||
--- |
|||
|
|||
### 4. 实现类:DoubanCrawler.java(豆瓣爬虫) |
|||
```java |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.select.Elements; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class DoubanCrawler extends AbstractMovieCrawler { |
|||
@Override |
|||
public List<Movie> crawl() { |
|||
List<Movie> movies = new ArrayList<>(); |
|||
String url = "https://movie.douban.com/top250"; |
|||
try { |
|||
Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0").get(); |
|||
Elements items = doc.select(".item"); |
|||
|
|||
items.forEach(item -> { |
|||
Movie m = new Movie(); |
|||
m.setTitle(item.select(".title").first().text()); |
|||
m.setRating(Double.parseDouble(item.select(".rating_num").text())); |
|||
movies.add(m); |
|||
}); |
|||
log("豆瓣爬取完成"); |
|||
} catch (Exception e) { |
|||
e.printStackTrace(); |
|||
} |
|||
return movies; |
|||
} |
|||
} |
|||
``` |
|||
|
|||
--- |
|||
|
|||
### 5. 接口二:MovieAnalyzer.java(分析接口) |
|||
```java |
|||
import java.util.List; |
|||
|
|||
public interface MovieAnalyzer { |
|||
void analyze(List<Movie> movies); |
|||
} |
|||
``` |
|||
|
|||
--- |
|||
|
|||
### 6. 实现类:MovieAnalyzerImpl.java |
|||
```java |
|||
import java.util.List; |
|||
|
|||
public class MovieAnalyzerImpl implements MovieAnalyzer { |
|||
@Override |
|||
public void analyze(List<Movie> movies) { |
|||
System.out.println("===== 数据分析 ====="); |
|||
System.out.println("电影总数:" + movies.size()); |
|||
double avg = movies.stream().mapToDouble(Movie::getRating).average().orElse(0); |
|||
System.out.println("平均评分:" + avg); |
|||
} |
|||
} |
|||
``` |
|||
|
|||
--- |
|||
|
|||
### 7. 主程序(多态体现) |
|||
```java |
|||
import java.util.List; |
|||
|
|||
public class Main { |
|||
public static void main(String[] args) { |
|||
// ====================== |
|||
// 多态:接口指向实现类 |
|||
// ====================== |
|||
MovieCrawler crawler = new DoubanCrawler(); |
|||
MovieAnalyzer analyzer = new MovieAnalyzerImpl(); |
|||
|
|||
// 爬取 & 分析 |
|||
List<Movie> movies = crawler.crawl(); |
|||
analyzer.analyze(movies); |
|||
} |
|||
} |
|||
``` |
|||
|
|||
## 七、接口与多态扩展说明 |
|||
1. **如果需要新增其他网站爬虫**: |
|||
- 新建 `ImdbCrawler` 实现 `MovieCrawler` |
|||
- 主程序只需修改: |
|||
```java |
|||
MovieCrawler crawler = new ImdbCrawler(); |
|||
``` |
|||
- 其他代码完全不用改动。 |
|||
|
|||
2. **多态优势**: |
|||
- 易于扩展 |
|||
- 降低耦合 |
|||
- 符合面向对象设计原则 |
|||
|
|||
## 八、实验结果 |
|||
1. 成功爬取豆瓣电影 TOP250 数据。 |
|||
2. 成功输出电影总数、平均评分。 |
|||
3. 成功使用接口、抽象类、多态完成程序设计。 |
|||
4. 程序结构清晰,具备良好扩展能力。 |
|||
|
|||
## 九、实验总结 |
|||
1. 掌握了**接口**用于定义规范,**抽象类**用于复用代码。 |
|||
2. 理解了**多态**就是“同一接口,不同实现”。 |
|||
3. 学会了在实际项目中使用面向对象思想优化代码结构。 |
|||
4. 扩展新功能只需新增实现类,不改动原有代码,体现了良好的可扩展性。 |
|||
|
|||
--- |
|||
|
|||
需要我帮你**再美化、加截图说明、或精简成课堂上交版本**吗? |
|||
Loading…
Reference in new issue