Browse Source

提交继承+多态+接口爬虫作业

main
ZhengShiyi 3 days ago
parent
commit
e3866fc8e4
  1. 207
      继承+多态+接口爬虫/CrawlerMain.java

207
继承+多态+接口爬虫/CrawlerMain.java

@ -0,0 +1,207 @@
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import java.util.ArrayList;
import java.util.List;
// ==================== 接口 ====================
interface Crawler {
List<?> startCrawl();
}
// ==================== 抽象爬虫父类 ====================
abstract class BaseCrawler implements Crawler {
protected String baseUrl;
public BaseCrawler(String baseUrl) {
this.baseUrl = baseUrl;
}
protected Document getPage(String url) throws Exception {
return Jsoup.connect(url)
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64)")
.timeout(15000)
.get();
}
}
// ==================== 实体类 ====================
class Movie {
private String title;
private String rating;
public Movie(String title, String rating) {
this.title = title;
this.rating = rating;
}
public String toString() {
return "电影:《" + title + "》 | 评分:" + rating;
}
}
class Hero {
private String name;
public Hero(String name) {
this.name = name;
}
public String toString() {
return "英雄:" + name;
}
}
class Weather {
private String province;
private String city;
private String temperature;
private String condition;
public Weather(String province, String city, String temperature, String condition) {
this.province = province;
this.city = city;
this.temperature = temperature;
this.condition = condition;
}
public String toString() {
return "省份:" + province + " | 城市:" + city + " | 天气:" + condition + " | 温度:" + temperature;
}
}
// ==================== 豆瓣电影爬虫 ====================
class MovieCrawler extends BaseCrawler {
public MovieCrawler() {
super("https://movie.douban.com/top250");
}
@Override
public List<Movie> startCrawl() {
List<Movie> list = new ArrayList<>();
try {
for (int i = 0; i < 250; i += 25) {
Document doc = getPage(baseUrl + "?start=" + i);
Elements items = doc.select(".item");
for (Element e : items) {
String title = e.select(".title").first().text().split("/")[0].trim();
String rating = e.select(".rating_num").text();
list.add(new Movie(title, rating));
}
Thread.sleep(1000);
}
} catch (Exception e) {
System.out.println("电影爬取失败");
}
return list;
}
}
// ==================== 王者荣耀爬虫 ====================
class HeroCrawler extends BaseCrawler {
public HeroCrawler() {
super("https://pvp.qq.com/web201605/herolist.shtml");
}
@Override
public List<Hero> startCrawl() {
List<Hero> list = new ArrayList<>();
try {
Document doc = getPage(baseUrl);
Elements heros = doc.select("ul.herolist li a");
for (Element h : heros) {
String name = h.text().trim();
if (!name.isEmpty()) {
list.add(new Hero(name));
}
}
} catch (Exception e) {
System.out.println("英雄爬取失败");
}
return list;
}
}
// ==================== 天气爬虫(全国城市) ====================
class WeatherCrawler extends BaseCrawler {
private static final String[][] cities = {
{"北京","北京","101010100"},{"上海","上海","101020100"},{"天津","天津","101030100"},{"重庆","重庆","101040100"},
{"河北","石家庄","101090101"},{"山西","太原","101100101"},{"辽宁","沈阳","101070101"},{"吉林","长春","101060101"},
{"黑龙江","哈尔滨","101050101"},{"江苏","南京","101190101"},{"浙江","杭州","101210101"},{"安徽","合肥","101220101"},
{"福建","福州","101230101"},{"江西","南昌","101240101"},{"山东","济南","101120101"},{"河南","郑州","101180101"},
{"湖北","武汉","101200101"},{"湖南","长沙","101250101"},{"广东","广州","101280101"},{"海南","海口","101310101"},
{"四川","成都","101270101"},{"贵州","贵阳","101260101"},{"云南","昆明","101290101"},{"陕西","西安","101110101"},
{"甘肃","兰州","101160101"},{"青海","西宁","101150101"},{"内蒙古","呼和浩特","101080101"},{"广西","南宁","101300101"},
{"西藏","拉萨","101140101"},{"宁夏","银川","101170101"},{"新疆","乌鲁木齐","101130101"},
{"香港","香港","101320101"},{"澳门","澳门","101330101"},{"台湾","台北","101340101"}
};
public WeatherCrawler() {
super("https://www.weather.com.cn/weather/");
}
@Override
public List<Weather> startCrawl() {
List<Weather> list = new ArrayList<>();
try {
for (String[] city : cities) {
String province = city[0];
String cityName = city[1];
String code = city[2];
Document doc = getPage(baseUrl + code + ".shtml");
Element today = doc.select("ul.t li").first();
if (today != null) {
String temp = today.select(".tem").text();
String wea = today.select(".wea").text();
list.add(new Weather(province, cityName, temp, wea));
}
Thread.sleep(500);
}
} catch (Exception e) {
System.out.println("天气爬取失败");
}
return list;
}
}
// ==================== 主程序 ====================
public class CrawlerMain {
public static void main(String[] args) {
// 用接口体现多态(老师重点)
Crawler m = new MovieCrawler();
Crawler h = new HeroCrawler();
Crawler w = new WeatherCrawler();
// 电影
System.out.println("===== 豆瓣电影Top250 =====");
for (Object obj : m.startCrawl()) {
System.out.println(obj);
}
// 英雄
System.out.println("\n===== 王者荣耀全英雄 =====");
List<?> heroes = h.startCrawl();
for (Object obj : heroes) {
System.out.println(obj);
}
System.out.println("共爬取 " + heroes.size() + " 个英雄");
// 天气
System.out.println("\n===== 全国各省天气 =====");
for (Object obj : w.startCrawl()) {
System.out.println(obj);
}
}
}
Loading…
Cancel
Save