import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.util.ArrayList; import java.util.List; // ==================== 接口 ==================== interface Crawler { List startCrawl(); } // ==================== 抽象爬虫父类 ==================== abstract class BaseCrawler implements Crawler { protected String baseUrl; public BaseCrawler(String baseUrl) { this.baseUrl = baseUrl; } protected Document getPage(String url) throws Exception { return Jsoup.connect(url) .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64)") .timeout(15000) .get(); } } // ==================== 实体类 ==================== class Movie { private String title; private String rating; public Movie(String title, String rating) { this.title = title; this.rating = rating; } public String toString() { return "电影:《" + title + "》 | 评分:" + rating; } } class Hero { private String name; public Hero(String name) { this.name = name; } public String toString() { return "英雄:" + name; } } class Weather { private String province; private String city; private String temperature; private String condition; public Weather(String province, String city, String temperature, String condition) { this.province = province; this.city = city; this.temperature = temperature; this.condition = condition; } public String toString() { return "省份:" + province + " | 城市:" + city + " | 天气:" + condition + " | 温度:" + temperature; } } // ==================== 豆瓣电影爬虫 ==================== class MovieCrawler extends BaseCrawler { public MovieCrawler() { super("https://movie.douban.com/top250"); } @Override public List startCrawl() { List list = new ArrayList<>(); try { for (int i = 0; i < 250; i += 25) { Document doc = getPage(baseUrl + "?start=" + i); Elements items = doc.select(".item"); for (Element e : items) { String title = e.select(".title").first().text().split("/")[0].trim(); String rating = e.select(".rating_num").text(); list.add(new Movie(title, rating)); } Thread.sleep(1000); } } catch (Exception e) { System.out.println("电影爬取失败"); } return list; } } // ==================== 王者荣耀爬虫 ==================== class HeroCrawler extends BaseCrawler { public HeroCrawler() { super("https://pvp.qq.com/web201605/herolist.shtml"); } @Override public List startCrawl() { List list = new ArrayList<>(); try { Document doc = getPage(baseUrl); Elements heros = doc.select("ul.herolist li a"); for (Element h : heros) { String name = h.text().trim(); if (!name.isEmpty()) { list.add(new Hero(name)); } } } catch (Exception e) { System.out.println("英雄爬取失败"); } return list; } } // ==================== 天气爬虫(全国城市) ==================== class WeatherCrawler extends BaseCrawler { private static final String[][] cities = { {"北京","北京","101010100"},{"上海","上海","101020100"},{"天津","天津","101030100"},{"重庆","重庆","101040100"}, {"河北","石家庄","101090101"},{"山西","太原","101100101"},{"辽宁","沈阳","101070101"},{"吉林","长春","101060101"}, {"黑龙江","哈尔滨","101050101"},{"江苏","南京","101190101"},{"浙江","杭州","101210101"},{"安徽","合肥","101220101"}, {"福建","福州","101230101"},{"江西","南昌","101240101"},{"山东","济南","101120101"},{"河南","郑州","101180101"}, {"湖北","武汉","101200101"},{"湖南","长沙","101250101"},{"广东","广州","101280101"},{"海南","海口","101310101"}, {"四川","成都","101270101"},{"贵州","贵阳","101260101"},{"云南","昆明","101290101"},{"陕西","西安","101110101"}, {"甘肃","兰州","101160101"},{"青海","西宁","101150101"},{"内蒙古","呼和浩特","101080101"},{"广西","南宁","101300101"}, {"西藏","拉萨","101140101"},{"宁夏","银川","101170101"},{"新疆","乌鲁木齐","101130101"}, {"香港","香港","101320101"},{"澳门","澳门","101330101"},{"台湾","台北","101340101"} }; public WeatherCrawler() { super("https://www.weather.com.cn/weather/"); } @Override public List startCrawl() { List list = new ArrayList<>(); try { for (String[] city : cities) { String province = city[0]; String cityName = city[1]; String code = city[2]; Document doc = getPage(baseUrl + code + ".shtml"); Element today = doc.select("ul.t li").first(); if (today != null) { String temp = today.select(".tem").text(); String wea = today.select(".wea").text(); list.add(new Weather(province, cityName, temp, wea)); } Thread.sleep(500); } } catch (Exception e) { System.out.println("天气爬取失败"); } return list; } } // ==================== 主程序 ==================== public class CrawlerMain { public static void main(String[] args) { // 用接口体现多态(老师重点) Crawler m = new MovieCrawler(); Crawler h = new HeroCrawler(); Crawler w = new WeatherCrawler(); // 电影 System.out.println("===== 豆瓣电影Top250 ====="); for (Object obj : m.startCrawl()) { System.out.println(obj); } // 英雄 System.out.println("\n===== 王者荣耀全英雄 ====="); List heroes = h.startCrawl(); for (Object obj : heroes) { System.out.println(obj); } System.out.println("共爬取 " + heroes.size() + " 个英雄"); // 天气 System.out.println("\n===== 全国各省天气 ====="); for (Object obj : w.startCrawl()) { System.out.println(obj); } } }