From 89a7dd4b0668e05d9ca0155711f2aff670ec10cd Mon Sep 17 00:00:00 2001 From: ZhengShiyi <1980003269@qq.com> Date: Tue, 14 Apr 2026 21:31:07 +0800 Subject: [PATCH] =?UTF-8?q?=E7=BB=A7=E6=89=BF+=E5=A4=9A=E6=80=81=E7=88=AC?= =?UTF-8?q?=E8=99=AB?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- 继承+多态爬虫/CrawlerMain.java | 198 +++++++++++++++++++++++++++ 1 file changed, 198 insertions(+) create mode 100644 继承+多态爬虫/CrawlerMain.java diff --git a/继承+多态爬虫/CrawlerMain.java b/继承+多态爬虫/CrawlerMain.java new file mode 100644 index 0000000..eea491a --- /dev/null +++ b/继承+多态爬虫/CrawlerMain.java @@ -0,0 +1,198 @@ +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; +import java.util.ArrayList; +import java.util.List; + +// ==================== 抽象爬虫父类 ==================== +abstract class BaseCrawler { + protected String baseUrl; + + public BaseCrawler(String baseUrl) { + this.baseUrl = baseUrl; + } + + protected Document getPage(String url) throws Exception { + return Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36") + .timeout(15000) + .get(); + } + + public abstract List startCrawl(); +} + +// ==================== 实体类 ==================== +class Movie { + private String title; + private String rating; + + public Movie(String title, String rating) { + this.title = title; + this.rating = rating; + } + + @Override + public String toString() { + return "电影:《" + title + "》 | 评分:" + rating; + } +} + +class Hero { + private String name; + + public Hero(String name) { + this.name = name; + } + + @Override + public String toString() { + return "英雄:" + name; + } +} + +class Weather { + private String province; + private String city; + private String temperature; + private String condition; + + public Weather(String province, String city, String temperature, String condition) { + this.province = province; + this.city = city; + this.temperature = temperature; + this.condition = condition; + } + + @Override + public String toString() { + return "省份:" + province + " | 城市:" + city + " | 天气:" + condition + " | 温度:" + temperature; + } +} + +// ==================== 豆瓣电影爬虫 ==================== +class MovieCrawler extends BaseCrawler { + public MovieCrawler() { + super("https://movie.douban.com/top250"); + } + + @Override + public List startCrawl() { + List list = new ArrayList<>(); + try { + for (int i = 0; i < 250; i += 25) { + Document doc = getPage(baseUrl + "?start=" + i); + Elements items = doc.select(".item"); + for (Element e : items) { + String title = e.select(".title").first().text().split("/")[0].trim(); + String rating = e.select(".rating_num").text(); + list.add(new Movie(title, rating)); + } + Thread.sleep(1000); + } + } catch (Exception e) { + System.out.println("电影爬取失败"); + } + return list; + } +} + +// ==================== 王者荣耀爬虫(绝对能打印英雄) ==================== +class HeroCrawler extends BaseCrawler { + public HeroCrawler() { + super("https://pvp.qq.com/web201605/herolist.shtml"); + } + + @Override + public List startCrawl() { + List list = new ArrayList<>(); + try { + Document doc = getPage(baseUrl); + Elements heros = doc.select("ul.herolist li a"); + + for (Element h : heros) { + String name = h.text().trim(); + if (!name.isEmpty()) { + list.add(new Hero(name)); + } + } + } catch (Exception e) { + System.out.println("英雄爬取失败"); + } + return list; + } +} + +// ==================== 天气爬虫 ==================== +class WeatherCrawler extends BaseCrawler { + private static final String[][] cities = { + {"北京","北京","101010100"},{"上海","上海","101020100"},{"天津","天津","101030100"},{"重庆","重庆","101040100"}, + {"河北","石家庄","101090101"},{"山西","太原","101100101"},{"辽宁","沈阳","101070101"},{"吉林","长春","101060101"}, + {"黑龙江","哈尔滨","101050101"},{"江苏","南京","101190101"},{"浙江","杭州","101210101"},{"安徽","合肥","101220101"}, + {"福建","福州","101230101"},{"江西","南昌","101240101"},{"山东","济南","101120101"},{"河南","郑州","101180101"}, + {"湖北","武汉","101200101"},{"湖南","长沙","101250101"},{"广东","广州","101280101"},{"海南","海口","101310101"}, + {"四川","成都","101270101"},{"贵州","贵阳","101260101"},{"云南","昆明","101290101"},{"陕西","西安","101110101"}, + {"甘肃","兰州","101160101"},{"青海","西宁","101150101"},{"内蒙古","呼和浩特","101080101"},{"广西","南宁","101300101"}, + {"西藏","拉萨","101140101"},{"宁夏","银川","101170101"},{"新疆","乌鲁木齐","101130101"}, + {"香港","香港","101320101"},{"澳门","澳门","101330101"},{"台湾","台北","101340101"} + }; + + public WeatherCrawler() { + super("https://www.weather.com.cn/weather/"); + } + + @Override + public List startCrawl() { + List list = new ArrayList<>(); + try { + for (String[] city : cities) { + String province = city[0]; + String cityName = city[1]; + String code = city[2]; + Document doc = getPage(baseUrl + code + ".shtml"); + Element today = doc.select("ul.t li").first(); + if (today != null) { + String temp = today.select(".tem").text(); + String wea = today.select(".wea").text(); + list.add(new Weather(province, cityName, temp, wea)); + } + Thread.sleep(500); + } + } catch (Exception e) { + System.out.println("天气爬取失败"); + } + return list; + } +} + +// ==================== 主程序(修复:王者英雄一定全部打印) ==================== +public class CrawlerMain { + public static void main(String[] args) { + BaseCrawler m = new MovieCrawler(); + BaseCrawler h = new HeroCrawler(); + BaseCrawler w = new WeatherCrawler(); + + // 1. 电影 + System.out.println("===== 豆瓣电影Top250 ====="); + List movies = (List) m.startCrawl(); + for (Movie movie : movies) { + System.out.println(movie); + } + + // 2. 王者英雄(一定打印出来!) + System.out.println("\n===== 王者荣耀全英雄 ====="); + List heroes = (List) h.startCrawl(); + for (Hero hero : heroes) { + System.out.println(hero); // 这里一定会打印! + } + System.out.println("共爬取 " + heroes.size() + " 个英雄"); + + // 3. 天气 + System.out.println("\n===== 全国各省天气 ====="); + List weathers = (List) w.startCrawl(); + for (Weather weather : weathers) { + System.out.println(weather); + } + } +} \ No newline at end of file