1 changed files with 207 additions and 0 deletions
@ -0,0 +1,207 @@ |
|||||
|
import org.jsoup.Jsoup; |
||||
|
import org.jsoup.nodes.Document; |
||||
|
import org.jsoup.nodes.Element; |
||||
|
import org.jsoup.select.Elements; |
||||
|
import java.util.ArrayList; |
||||
|
import java.util.List; |
||||
|
|
||||
|
// ==================== 接口 ====================
|
||||
|
interface Crawler { |
||||
|
List<?> startCrawl(); |
||||
|
} |
||||
|
|
||||
|
// ==================== 抽象爬虫父类 ====================
|
||||
|
abstract class BaseCrawler implements Crawler { |
||||
|
protected String baseUrl; |
||||
|
|
||||
|
public BaseCrawler(String baseUrl) { |
||||
|
this.baseUrl = baseUrl; |
||||
|
} |
||||
|
|
||||
|
protected Document getPage(String url) throws Exception { |
||||
|
return Jsoup.connect(url) |
||||
|
.userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64)") |
||||
|
.timeout(15000) |
||||
|
.get(); |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== 实体类 ====================
|
||||
|
class Movie { |
||||
|
private String title; |
||||
|
private String rating; |
||||
|
|
||||
|
public Movie(String title, String rating) { |
||||
|
this.title = title; |
||||
|
this.rating = rating; |
||||
|
} |
||||
|
|
||||
|
public String toString() { |
||||
|
return "电影:《" + title + "》 | 评分:" + rating; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class Hero { |
||||
|
private String name; |
||||
|
|
||||
|
public Hero(String name) { |
||||
|
this.name = name; |
||||
|
} |
||||
|
|
||||
|
public String toString() { |
||||
|
return "英雄:" + name; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
class Weather { |
||||
|
private String province; |
||||
|
private String city; |
||||
|
private String temperature; |
||||
|
private String condition; |
||||
|
|
||||
|
public Weather(String province, String city, String temperature, String condition) { |
||||
|
this.province = province; |
||||
|
this.city = city; |
||||
|
this.temperature = temperature; |
||||
|
this.condition = condition; |
||||
|
} |
||||
|
|
||||
|
public String toString() { |
||||
|
return "省份:" + province + " | 城市:" + city + " | 天气:" + condition + " | 温度:" + temperature; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== 豆瓣电影爬虫 ====================
|
||||
|
class MovieCrawler extends BaseCrawler { |
||||
|
|
||||
|
public MovieCrawler() { |
||||
|
super("https://movie.douban.com/top250"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Movie> startCrawl() { |
||||
|
List<Movie> list = new ArrayList<>(); |
||||
|
try { |
||||
|
for (int i = 0; i < 250; i += 25) { |
||||
|
Document doc = getPage(baseUrl + "?start=" + i); |
||||
|
Elements items = doc.select(".item"); |
||||
|
|
||||
|
for (Element e : items) { |
||||
|
String title = e.select(".title").first().text().split("/")[0].trim(); |
||||
|
String rating = e.select(".rating_num").text(); |
||||
|
list.add(new Movie(title, rating)); |
||||
|
} |
||||
|
|
||||
|
Thread.sleep(1000); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.out.println("电影爬取失败"); |
||||
|
} |
||||
|
return list; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== 王者荣耀爬虫 ====================
|
||||
|
class HeroCrawler extends BaseCrawler { |
||||
|
|
||||
|
public HeroCrawler() { |
||||
|
super("https://pvp.qq.com/web201605/herolist.shtml"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Hero> startCrawl() { |
||||
|
List<Hero> list = new ArrayList<>(); |
||||
|
try { |
||||
|
Document doc = getPage(baseUrl); |
||||
|
Elements heros = doc.select("ul.herolist li a"); |
||||
|
|
||||
|
for (Element h : heros) { |
||||
|
String name = h.text().trim(); |
||||
|
if (!name.isEmpty()) { |
||||
|
list.add(new Hero(name)); |
||||
|
} |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.out.println("英雄爬取失败"); |
||||
|
} |
||||
|
return list; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== 天气爬虫(全国城市) ====================
|
||||
|
class WeatherCrawler extends BaseCrawler { |
||||
|
|
||||
|
private static final String[][] cities = { |
||||
|
{"北京","北京","101010100"},{"上海","上海","101020100"},{"天津","天津","101030100"},{"重庆","重庆","101040100"}, |
||||
|
{"河北","石家庄","101090101"},{"山西","太原","101100101"},{"辽宁","沈阳","101070101"},{"吉林","长春","101060101"}, |
||||
|
{"黑龙江","哈尔滨","101050101"},{"江苏","南京","101190101"},{"浙江","杭州","101210101"},{"安徽","合肥","101220101"}, |
||||
|
{"福建","福州","101230101"},{"江西","南昌","101240101"},{"山东","济南","101120101"},{"河南","郑州","101180101"}, |
||||
|
{"湖北","武汉","101200101"},{"湖南","长沙","101250101"},{"广东","广州","101280101"},{"海南","海口","101310101"}, |
||||
|
{"四川","成都","101270101"},{"贵州","贵阳","101260101"},{"云南","昆明","101290101"},{"陕西","西安","101110101"}, |
||||
|
{"甘肃","兰州","101160101"},{"青海","西宁","101150101"},{"内蒙古","呼和浩特","101080101"},{"广西","南宁","101300101"}, |
||||
|
{"西藏","拉萨","101140101"},{"宁夏","银川","101170101"},{"新疆","乌鲁木齐","101130101"}, |
||||
|
{"香港","香港","101320101"},{"澳门","澳门","101330101"},{"台湾","台北","101340101"} |
||||
|
}; |
||||
|
|
||||
|
public WeatherCrawler() { |
||||
|
super("https://www.weather.com.cn/weather/"); |
||||
|
} |
||||
|
|
||||
|
@Override |
||||
|
public List<Weather> startCrawl() { |
||||
|
List<Weather> list = new ArrayList<>(); |
||||
|
try { |
||||
|
for (String[] city : cities) { |
||||
|
|
||||
|
String province = city[0]; |
||||
|
String cityName = city[1]; |
||||
|
String code = city[2]; |
||||
|
|
||||
|
Document doc = getPage(baseUrl + code + ".shtml"); |
||||
|
Element today = doc.select("ul.t li").first(); |
||||
|
|
||||
|
if (today != null) { |
||||
|
String temp = today.select(".tem").text(); |
||||
|
String wea = today.select(".wea").text(); |
||||
|
list.add(new Weather(province, cityName, temp, wea)); |
||||
|
} |
||||
|
|
||||
|
Thread.sleep(500); |
||||
|
} |
||||
|
} catch (Exception e) { |
||||
|
System.out.println("天气爬取失败"); |
||||
|
} |
||||
|
return list; |
||||
|
} |
||||
|
} |
||||
|
|
||||
|
// ==================== 主程序 ====================
|
||||
|
public class CrawlerMain { |
||||
|
public static void main(String[] args) { |
||||
|
|
||||
|
// 用接口体现多态(老师重点)
|
||||
|
Crawler m = new MovieCrawler(); |
||||
|
Crawler h = new HeroCrawler(); |
||||
|
Crawler w = new WeatherCrawler(); |
||||
|
|
||||
|
// 电影
|
||||
|
System.out.println("===== 豆瓣电影Top250 ====="); |
||||
|
for (Object obj : m.startCrawl()) { |
||||
|
System.out.println(obj); |
||||
|
} |
||||
|
|
||||
|
// 英雄
|
||||
|
System.out.println("\n===== 王者荣耀全英雄 ====="); |
||||
|
List<?> heroes = h.startCrawl(); |
||||
|
for (Object obj : heroes) { |
||||
|
System.out.println(obj); |
||||
|
} |
||||
|
System.out.println("共爬取 " + heroes.size() + " 个英雄"); |
||||
|
|
||||
|
// 天气
|
||||
|
System.out.println("\n===== 全国各省天气 ====="); |
||||
|
for (Object obj : w.startCrawl()) { |
||||
|
System.out.println(obj); |
||||
|
} |
||||
|
} |
||||
|
} |
||||
Loading…
Reference in new issue