Browse Source

实验五

main
故春 1 month ago
parent
commit
07dc6f7bc2
  1. 13
      实验五/AnimatedMovie.java
  2. 10
      实验五/CrawlStrategy.java
  3. 27
      实验五/CrawlerContext.java
  4. 46
      实验五/DoubanStrategy.java
  5. 13
      实验五/DouyinMovie.java
  6. 45
      实验五/DouyinStrategy.java
  7. 21
      实验五/Main.java
  8. 23
      实验五/Movie.java
  9. 3
      实验五/MoviePlayable.java
  10. 13
      实验五/TheatreMovie.java
  11. 13
      实验五/XiaohongshuMovie.java
  12. 45
      实验五/XiaohongshuStrategy.java

13
实验五/AnimatedMovie.java

@ -0,0 +1,13 @@
public class AnimatedMovie extends Movie {
private String studio;
public AnimatedMovie(String title, int year, double rating, String genre, String studio) {
super(title, year, rating, genre);
this.studio = studio;
}
@Override
public void play() {
System.out.println("动画电影播放:" + getTitle() + ",制作公司:" + studio);
}
}

10
实验五/CrawlStrategy.java

@ -0,0 +1,10 @@
import org.jsoup.nodes.Element;
import java.util.List;
// 策略接口:所有网站爬取都必须实现这两个方法
public interface CrawlStrategy {
// 爬取电影
List<Movie> crawl(int limit);
// 解析电影元素
Movie parseMovie(Element element);
}

27
实验五/CrawlerContext.java

@ -0,0 +1,27 @@
import java.util.ArrayList;
import java.util.List;
// 上下文:负责管理和执行策略
public class CrawlerContext {
// 组合多个策略(可同时爬多个网站)
private List<CrawlStrategy> strategies = new ArrayList<>();
// 添加一个爬取策略
public void addStrategy(CrawlStrategy strategy) {
strategies.add(strategy);
}
// 执行所有策略
public List<Movie> executeAllCrawl(int limit) {
List<Movie> result = new ArrayList<>();
for (CrawlStrategy strategy : strategies) {
result.addAll(strategy.crawl(limit));
}
return result;
}
// 清空策略
public void clearStrategies() {
strategies.clear();
}
}

46
实验五/DoubanStrategy.java

@ -0,0 +1,46 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class DoubanStrategy implements CrawlStrategy {
private static final String URL = "https://movie.douban.com/top250?start=0";
@Override
public List<Movie> crawl(int limit) {
List<Movie> list = new ArrayList<>();
try {
Document doc = Jsoup.connect(URL)
.userAgent("Mozilla/5.0")
.timeout(10000)
.get();
Elements items = doc.select(".item");
int count = 0;
for (Element e : items) {
if (count >= limit) break;
Movie m = parseMovie(e);
if (m != null) {
list.add(m);
count++;
}
}
} catch (Exception ex) {
System.out.println("豆瓣抓取失败(反爬保护),已跳过");
}
return list;
}
@Override
public Movie parseMovie(Element element) {
try {
String title = element.select(".hd .title").first().text();
double rating = Double.parseDouble(element.select(".rating_num").first().text());
return new TheatreMovie(title, 2024, rating, "电影", 49.9);
} catch (Exception e) {
return null;
}
}
}

13
实验五/DouyinMovie.java

@ -0,0 +1,13 @@
public class DouyinMovie extends Movie {
private String hotValue;
public DouyinMovie(String title, int year, double rating, String genre, String hotValue) {
super(title, year, rating, genre);
this.hotValue = hotValue;
}
@Override
public void play() {
System.out.println("抖音视频:" + getTitle() + ",热度:" + hotValue);
}
}

45
实验五/DouyinStrategy.java

@ -0,0 +1,45 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class DouyinStrategy implements CrawlStrategy {
private static final String URL = "https://www.douyin.com";
@Override
public List<Movie> crawl(int limit) {
List<Movie> list = new ArrayList<>();
try {
Document doc = Jsoup.connect(URL)
.userAgent("Mozilla/5.0")
.timeout(10000)
.get();
Elements items = doc.select("div");
int count = 0;
for (Element e : items) {
if (count >= limit) break;
Movie m = parseMovie(e);
if (m != null) {
list.add(m);
count++;
}
}
} catch (Exception ex) {
System.out.println("抖音抓取失败(反爬保护),已跳过");
}
return list;
}
@Override
public Movie parseMovie(Element element) {
try {
String title = element.select("h3").first().text();
return new DouyinMovie(title, 2024, 9.5, "视频", "100w+");
} catch (Exception e) {
return null;
}
}
}

21
实验五/Main.java

@ -0,0 +1,21 @@
import java.util.List;
public class Main {
public static void main(String[] args) {
// 1. 创建上下文
CrawlerContext context = new CrawlerContext();
// 2. 动态添加策略(想爬哪个就加哪个,随时增删)
context.addStrategy(new DoubanStrategy());
context.addStrategy(new XiaohongshuStrategy());
context.addStrategy(new DouyinStrategy());
// 3. 执行爬取
List<Movie> movies = context.executeAllCrawl(2);
// 4. 多态播放
for (Movie m : movies) {
m.play();
}
}
}

23
实验五/Movie.java

@ -0,0 +1,23 @@
public abstract class Movie implements MoviePlayable {
private String title;
private int year;
private double rating;
private String genre;
public Movie(String title, int year, double rating, String genre) {
this.title = title;
this.year = year;
this.rating = rating;
this.genre = genre;
}
// 接口方法,留给子类实现
@Override
public abstract void play();
// getter
public String getTitle() { return title; }
public int getYear() { return year; }
public double getRating() { return rating; }
public String getGenre() { return genre; }
}

3
实验五/MoviePlayable.java

@ -0,0 +1,3 @@
public interface MoviePlayable {
void play();
}

13
实验五/TheatreMovie.java

@ -0,0 +1,13 @@
public class TheatreMovie extends Movie {
private double price;
public TheatreMovie(String title, int year, double rating, String genre, double price) {
super(title, year, rating, genre);
this.price = price;
}
@Override
public void play() {
System.out.println("院线电影播放:" + getTitle() + ",票价:" + price);
}
}

13
实验五/XiaohongshuMovie.java

@ -0,0 +1,13 @@
public class XiaohongshuMovie extends Movie {
private String author;
public XiaohongshuMovie(String title, int year, double rating, String genre, String author) {
super(title, year, rating, genre);
this.author = author;
}
@Override
public void play() {
System.out.println("小红书笔记:" + getTitle() + ",作者:" + author);
}
}

45
实验五/XiaohongshuStrategy.java

@ -0,0 +1,45 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
public class XiaohongshuStrategy implements CrawlStrategy {
private static final String URL = "https://www.xiaohongshu.com";
@Override
public List<Movie> crawl(int limit) {
List<Movie> list = new ArrayList<>();
try {
Document doc = Jsoup.connect(URL)
.userAgent("Mozilla/5.0")
.timeout(10000)
.get();
Elements items = doc.select("div");
int count = 0;
for (Element e : items) {
if (count >= limit) break;
Movie m = parseMovie(e);
if (m != null) {
list.add(m);
count++;
}
}
} catch (Exception ex) {
System.out.println("小红书抓取失败(反爬保护),已跳过");
}
return list;
}
@Override
public Movie parseMovie(Element element) {
try {
String title = element.select("h2").first().text();
return new XiaohongshuMovie(title, 2024, 9.2, "笔记", "小红书用户");
} catch (Exception e) {
return null;
}
}
}
Loading…
Cancel
Save