diff --git a/project2/.idea/.gitignore b/project2/.idea/.gitignore new file mode 100644 index 0000000..b6b1ecf --- /dev/null +++ b/project2/.idea/.gitignore @@ -0,0 +1,10 @@ +# 默认忽略的文件 +/shelf/ +/workspace.xml +# 已忽略包含查询文件的默认文件夹 +/queries/ +# Datasource local storage ignored files +/dataSources/ +/dataSources.local.xml +# 基于编辑器的 HTTP 客户端请求 +/httpRequests/ diff --git a/project2/.idea/.name b/project2/.idea/.name new file mode 100644 index 0000000..641e66e --- /dev/null +++ b/project2/.idea/.name @@ -0,0 +1 @@ +MovieMain.java \ No newline at end of file diff --git a/project2/.idea/compiler.xml b/project2/.idea/compiler.xml new file mode 100644 index 0000000..81bfba0 --- /dev/null +++ b/project2/.idea/compiler.xml @@ -0,0 +1,13 @@ + + + + + + + + + + + + + \ No newline at end of file diff --git a/project2/.idea/encodings.xml b/project2/.idea/encodings.xml new file mode 100644 index 0000000..aa00ffa --- /dev/null +++ b/project2/.idea/encodings.xml @@ -0,0 +1,7 @@ + + + + + + + \ No newline at end of file diff --git a/project2/.idea/jarRepositories.xml b/project2/.idea/jarRepositories.xml new file mode 100644 index 0000000..712ab9d --- /dev/null +++ b/project2/.idea/jarRepositories.xml @@ -0,0 +1,20 @@ + + + + + + + + + + + \ No newline at end of file diff --git a/project2/.idea/misc.xml b/project2/.idea/misc.xml new file mode 100644 index 0000000..5e4e294 --- /dev/null +++ b/project2/.idea/misc.xml @@ -0,0 +1,12 @@ + + + + + + + + \ No newline at end of file diff --git a/project2/.idea/modules.xml b/project2/.idea/modules.xml new file mode 100644 index 0000000..f0c2589 --- /dev/null +++ b/project2/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/project2/.idea/vcs.xml b/project2/.idea/vcs.xml new file mode 100644 index 0000000..6c0b863 --- /dev/null +++ b/project2/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/project2/.vscode/settings.json b/project2/.vscode/settings.json new file mode 100644 index 0000000..c5f3f6b --- /dev/null +++ b/project2/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "java.configuration.updateBuildConfiguration": "interactive" +} \ No newline at end of file diff --git a/project2/README.md b/project2/README.md new file mode 100644 index 0000000..ce206df --- /dev/null +++ b/project2/README.md @@ -0,0 +1,178 @@ +# 电影爬虫项目 - 继承与多态实现说明 + +## 项目简介 + +本项目是一个Java电影爬虫,从豆瓣电影Top250抓取数据,进行清洗、存储、分析,并生成多种图表展示结果。项目重点展示了面向对象编程中**继承**和**多态**的实现。 + +## 项目中的类 + +### 核心类 + +1. **MovieMain** (`src/main/java/com/crawler/MovieMain.java`) + - 项目主入口类 + - 负责协调爬虫、数据清洗、存储、展示和图表生成 + +2. **DoubanSpider** (`src/main/java/com/crawler/spider/DoubanSpider.java`) + - 爬虫实现类 + - 负责从豆瓣电影Top250页面爬取数据 + - 使用多线程并发爬取,提高效率 + +3. **Movie** (`src/main/java/com/crawler/chart/model/Movie.java`) + - 电影数据模型类 + - 存储电影的各种属性:排名、标题、评分、评价人数、导演、演员、年份、国家/地区、类型、简介 + +4. **MovieAnalyzer** (`src/main/java/com/crawler/analysis/MovieAnalyzer.java`) + - 数据分析工具类 + - 提供各种统计分析方法:评分分布、年份分布、类型分布、导演作品数量排行、平均评分、评分与年份相关性 + +5. **MovieResultDisplay** (`src/main/java/com/crawler/ui/MovieResultDisplay.java`) + - 结果显示和图表生成类 + - 在控制台显示统计结果 + - 生成各种图表:评分分布直方图、年份分布折线图、类型分布饼图、评分与年份相关性图表 + +6. **DataUtils** (`src/main/java/com/crawler/utils/DataUtils.java`) + - 数据工具类 + - 提供数据清洗和保存功能 + +### 图表相关类 + +1. **ChartGenerator** (`src/main/java/com/crawler/chart/ChartGenerator.java`) + - 图表生成器接口 + - 定义了生成图表的方法规范 + +2. **ChartManager** (`src/main/java/com/crawler/chart/ChartManager.java`) + - 图表管理器类 + - 负责管理和协调多个图表生成器 + +3. **实现类** + - `RatingDistributionChartGenerator` (`src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.java`) + - `YearDistributionChartGenerator` (`src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.java`) + - `GenreDistributionChartGenerator` (`src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.java`) + - `YearRatingChartGenerator` (`src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.java`) + +## 封装、多态、继承的实现 + +### 1. 封装 + +- **类封装**:每个类都封装了自己的属性和方法,提供了清晰的接口 +- **数据封装**:`Movie`类使用私有属性和公共的getter/setter方法 +- **功能封装**:不同功能模块被封装到不同的类中,如爬虫、分析、展示等 + +### 2. 继承 + +- **接口继承**:所有图表生成器实现类都继承了`ChartGenerator`接口 +- **方法继承**:实现类继承了接口中定义的`generateChart`和`getChartName`方法 + +### 3. 多态 + +#### 3.1 向上转型(接口引用指向实现类) + +**文件**: `src/main/java/com/crawler/MovieMain.java` + +```java +ChartGenerator ratingChart = new RatingDistributionChartGenerator(); +ChartGenerator yearChart = new YearDistributionChartGenerator(); +ChartGenerator genreChart = new GenreDistributionChartGenerator(); +ChartGenerator yearRatingChart = new YearRatingChartGenerator(); +``` + +#### 3.2 方法参数多态 + +**文件**: `src/main/java/com/crawler/chart/ChartManager.java` + +```java +public void addChartGenerator(ChartGenerator generator) { + chartGenerators.add(generator); +} +``` + +#### 3.3 运行时多态(动态绑定) + +**文件**: `src/main/java/com/crawler/chart/ChartManager.java` + +```java +public void generateAllCharts(List movies) { + Movie[] movieArray = movies.toArray(new Movie[0]); + for (ChartGenerator generator : chartGenerators) { + System.out.println("生成图表: " + generator.getChartName()); + generator.generateChart(movieArray); // 运行时根据实际类型调用对应方法 + } +} +``` + +#### 3.4 统一调用接口 + +**文件**: `src/main/java/com/crawler/MovieMain.java` + +```java +chartManager.addChartGenerator(ratingChart); +chartManager.addChartGenerator(yearChart); +chartManager.addChartGenerator(genreChart); +chartManager.addChartGenerator(yearRatingChart); + +chartManager.generateAllCharts(cleanedMovies); +``` + +## 继承与多态的优势 + +1. **代码复用**:所有图表生成器共享相同的接口方法 +2. **可扩展性**:新增图表类型只需实现接口,无需修改现有代码 +3. **统一管理**:`ChartManager` 可以统一管理不同类型的图表生成器 +4. **灵活性**:通过接口引用可以操作不同的实现类对象 +5. **可维护性**:代码结构清晰,职责分明 + +## 项目结构 + +``` +src/ +└── main/ + └── java/ + └── com/ + └── crawler/ + ├── MovieMain.java # 主入口文件 + ├── analysis/ + │ └── MovieAnalyzer.java # 数据分析工具 + ├── chart/ + │ ├── ChartGenerator.java # 图表生成器接口 + │ ├── ChartManager.java # 图表管理器 + │ ├── model/ + │ │ └── Movie.java # 电影数据模型 + │ └── impl/ + │ ├── RatingDistributionChartGenerator.java # 评分分布图表 + │ ├── YearDistributionChartGenerator.java # 年份分布图表 + │ ├── GenreDistributionChartGenerator.java # 类型分布图表 + │ └── YearRatingChartGenerator.java # 年份评分相关性图表 + ├── spider/ + │ └── DoubanSpider.java # 豆瓣爬虫实现 + ├── ui/ + │ └── MovieResultDisplay.java # 结果显示和图表生成 + └── utils/ + └── DataUtils.java # 数据工具类 +``` + +## 运行说明 + +1. **直接运行**:在IDE中直接运行 `MovieMain.java`,或使用命令行: + ``` + java -cp "src/main/java;lib/*" com.crawler.MovieMain + ``` + +2. **依赖要求**:需要以下库 + - jsoup-1.17.2.jar + - jfreechart-1.5.4.jar + - jcommon-1.0.24.jar + +3. **运行结果**: + - 控制台输出爬取进度和统计结果 + - 生成的CSV数据文件保存在项目根目录 + - 生成的图表以PNG格式保存在项目根目录 + +## 技术栈 + +- Java 8+ +- Jsoup (网页解析) +- JFreeChart (图表生成) + +## 总结 + +本项目通过图表生成器接口及其实现类,充分展示了面向对象编程中**继承**和**多态**的核心概念。接口定义了统一的方法规范,实现类提供了具体的实现逻辑,通过接口引用和运行时动态绑定,实现了代码的灵活性和可扩展性。同时,项目也展示了良好的封装设计,将不同功能模块封装到不同的类中,提高了代码的可维护性。 \ No newline at end of file diff --git a/project2/changsha_weather_1777442821757.csv b/project2/changsha_weather_1777442821757.csv new file mode 100644 index 0000000..3cb33fc --- /dev/null +++ b/project2/changsha_weather_1777442821757.csv @@ -0,0 +1,8 @@ +日期,星期,天气,最高温度,最低温度,风向,风力 +04/29,星期三,晴,24℃,17℃,南风,2-3级 +04/30,星期四,多云,29℃,18℃,北风,2-3级 +05/01,星期五,晴转多云,27℃,18℃,南风,2-3级 +05/02,星期六,多云转晴,26℃,18℃,北风,2-3级 +05/03,星期日,阴,28℃,17℃,南风,2-3级 +05/04,星期一,小雨,29℃,17℃,北风,2-3级 +05/05,星期二,阵雨,27℃,19℃,南风,2-3级 diff --git a/project2/douban_movies_1777442791188.csv b/project2/douban_movies_1777442791188.csv new file mode 100644 index 0000000..bd7be1b --- /dev/null +++ b/project2/douban_movies_1777442791188.csv @@ -0,0 +1,251 @@ +排名,标题,评分,评价人数,导演,演员,年份,国家/地区,类型,简介 +1,肖申克的救赎,9.7,0,弗兰克·德拉邦特 Frank Darabont,蒂姆·罗宾斯 Tim Robbins /...,1994,美国,犯罪, +2,霸王别姬,9.6,0,陈凯歌 Kaige Chen,张国荣 Leslie Cheung / 张丰毅 Fengyi Zha...,1993,中国大陆 中国香港,剧情, +3,泰坦尼克号,9.5,0,詹姆斯·卡梅隆 James Cameron,莱昂纳多·迪卡普里奥 Leonardo...,1997,美国,剧情, +4,阿甘正传,9.5,0,罗伯特·泽米吉斯 Robert Zemeckis,汤姆·汉克斯 Tom Hanks / ...,1994,美国,剧情, +5,千与千寻,9.4,0,宫崎骏 Hayao Miyazaki,柊瑠美 Rumi Hîragi / 入野自由 Miy...,2001,日本,剧情, +6,美丽人生,9.5,0,罗伯托·贝尼尼 Roberto Benigni,罗伯托·贝尼尼 Roberto Beni...,1997,意大利,剧情, +7,星际穿越,9.4,0,克里斯托弗·诺兰 Christopher Nolan,马修·麦康纳 Matthew Mc...,2014,美国 英国 加拿大,剧情, +8,这个杀手不太冷,9.4,0,吕克·贝松 Luc Besson,让·雷诺 Jean Reno / 娜塔莉·波特曼 ...,1994,法国 美国,剧情, +9,盗梦空间,9.4,0,克里斯托弗·诺兰 Christopher Nolan,莱昂纳多·迪卡普里奥 Le...,2010,美国 英国,剧情, +10,楚门的世界,9.4,0,彼得·威尔 Peter Weir,金·凯瑞 Jim Carrey / 劳拉·琳妮 Lau...,1998,美国,剧情, +11,辛德勒的名单,9.5,0,史蒂文·斯皮尔伯格 Steven Spielberg,连姆·尼森 Liam Neeson...,1993,美国,剧情, +12,忠犬八公的故事,9.4,0,莱塞·霍尔斯道姆 Lasse Hallström,理查·基尔 Richard Ger...,2009,美国 英国,剧情, +13,海上钢琴师,9.3,0,朱塞佩·托纳多雷 Giuseppe Tornatore,蒂姆·罗斯 Tim Roth / ...,1998,意大利,剧情, +14,疯狂动物城,9.3,0,拜伦·霍华德 Byron Howard / 瑞奇·摩尔 Rich Moore,金妮弗·...,2016,美国,喜剧, +15,三傻大闹宝莱坞,9.2,0,拉库马·希拉尼 Rajkumar Hirani,阿米尔·汗 Aamir Khan / 卡...,2009,印度,剧情, +16,机器人总动员,9.3,0,安德鲁·斯坦顿 Andrew Stanton,本·贝尔特 Ben Burtt / 艾丽...,2008,美国,科幻, +17,放牛班的春天,9.3,0,克里斯托夫·巴拉蒂 Christophe Barratier,让-巴蒂斯特·莫尼...,2004,法国 瑞士 德国,剧情, +18,无间道,9.3,0,刘伟强 / 麦兆辉,刘德华 Andy Lau / 梁朝伟 Tony Leung Chiu W...,2002,中国香港,剧情, +19,控方证人,9.6,0,比利·怀尔德 Billy Wilder,泰隆·鲍华 Tyrone Power / 玛琳·...,1957,美国,剧情, +20,寻梦环游记,9.1,0,李·昂克里奇 Lee Unkrich / 阿德里安·莫利纳 Adrian Molina,...,2017,美国,喜剧, +21,大话西游之大圣娶亲,9.2,0,刘镇伟 Jeffrey Lau,周星驰 Stephen Chow / 吴孟达 Man Tat Ng...,1995,中国香港 中国大陆,喜剧, +22,熔炉,9.3,0,黄东赫 Dong-hyuk Hwang,孔侑 Yoo Gong / 郑有美 Yu-mi Jung /...,2011,韩国,剧情, +23,触不可及,9.3,0,奥利维·那卡什 Olivier Nakache / 艾力克·托兰达 Eric Toledano 主...,,2011,法国,剧情, +24,教父,9.3,0,弗朗西斯·福特·科波拉 Francis Ford Coppola,马龙·白兰度 M...,1972,美国,剧情, +25,末代皇帝,9.3,0,贝纳尔多·贝托鲁奇 Bernardo Bertolucci,尊龙 John Lone / 陈...,1987,英国 意大利 中国大陆 法国,剧情, +26,哈利·波特与魔法石,9.2,0,Chris Columbus,Daniel Radcliffe / Emma Watson / Rupert Grint,2001,美国 英国,奇幻, +27,当幸福来敲门,9.1,0,加布里尔·穆奇诺 Gabriele Muccino,威尔·史密斯 Will Smith ...,2006,美国,剧情, +28,龙猫,9.2,0,宫崎骏 Hayao Miyazaki,日高法子 Noriko Hidaka / 坂本千夏 Ch...,1988,日本,动画, +29,活着,9.3,0,张艺谋 Yimou Zhang,葛优 You Ge / 巩俐 Li Gong / 姜武 Wu Jiang,1994,中国大陆 中国香港,剧情, +30,怦然心动,9.1,0,罗伯·莱纳 Rob Reiner,玛德琳·卡罗尔 Madeline Carroll / 卡...,2010,美国,剧情, +31,蝙蝠侠:黑暗骑士,9.2,0,克里斯托弗·诺兰 Christopher Nolan,克里斯蒂安·贝尔 Christ...,2008,美国 英国,剧情, +32,指环王3:王者无敌,9.3,0,彼得·杰克逊 Peter Jackson,伊利亚·伍德 Elijah Wood / 西恩...,2003,美国 新西兰,剧情, +33,我不是药神,9.0,0,文牧野 Muye Wen,徐峥 Zheng Xu / 王传君 Chuanjun Wang / 周...,2018,中国大陆,剧情, +34,乱世佳人,9.3,0,维克多·弗莱明 Victor Fleming / 乔治·库克 George Cukor,费...,1939,美国,剧情, +35,飞屋环游记,9.1,0,彼特·道格特 Pete Docter / 鲍勃·彼德森 Bob Peterson,爱德...,2009,美国,剧情, +36,让子弹飞,9.0,0,姜文 Wen Jiang,姜文 Wen Jiang / 葛优 You Ge / 周润发 Yun-F...,2010,中国大陆 中国香港,剧情, +37,哈尔的移动城堡,9.1,0,宫崎骏 Hayao Miyazaki,倍赏千惠子 Chieko Baishô / 木村拓...,2004,日本,爱情, +38,十二怒汉,9.4,0,西德尼·吕美特 Sidney Lumet,亨利·方达 Henry Fonda / 马丁...,1957,美国,剧情, +39,海蒂和爷爷,9.3,0,阿兰·葛斯彭纳 Alain Gsponer,阿努克·斯特芬 Anuk Steffen /...,2015,德国 瑞士,剧情, +40,素媛,9.3,0,李濬益 Jun-ik Lee,薛景求 Kyung-gu Sol / 严志媛 Ji-won Uhm ...,2013,韩国,剧情, +41,猫鼠游戏,9.1,0,史蒂文·斯皮尔伯格 Steven Spielberg,莱昂纳多·迪卡普里奥 L...,2002,美国 加拿大,传记, +42,天空之城,9.2,0,宫崎骏 Hayao Miyazaki,田中真弓 Mayumi Tanaka / 横泽启子 Ke...,1986,日本,动画, +43,鬼子来了,9.3,0,姜文 Wen Jiang,姜文 Wen Jiang / 香川照之 Teruyuki Kagawa /...,2000,中国大陆,剧情, +44,摔跤吧!爸爸,9.0,0,涅提·蒂瓦里 Nitesh Tiwari,阿米尔·汗 Aamir Khan / 法缇玛...,2016,印度,剧情, +45,少年派的奇幻漂流,9.1,0,李安 Ang Lee,苏拉·沙玛 Suraj Sharma / 伊尔凡·可汗 Irrfan...,2012,美国 中国台湾 英国 加拿大,剧情, +46,钢琴家,9.3,0,罗曼·波兰斯基 Roman Polanski,艾德里安·布洛迪 Adrien Brod...,2002,英国 法国 波兰 德国 美国,剧情, +47,指环王2:双塔奇兵,9.2,0,彼得·杰克逊 Peter Jackson,伊利亚·伍德 Elijah Wood / 西恩...,2002,美国 新西兰,剧情, +48,死亡诗社,9.2,0,彼得·威尔 Peter Weir,罗宾·威廉姆斯 Robin Williams / 罗伯...,1989,美国,剧情, +49,大话西游之月光宝盒,9.0,0,刘镇伟 Jeffrey Lau,周星驰 Stephen Chow / 吴孟达 Man Tat Ng...,1995,中国香港 中国大陆,喜剧, +50,绿皮书,8.9,0,彼得·法雷里 Peter Farrelly,维果·莫腾森 Viggo Mortensen /...,2018,美国 中国大陆,剧情, +51,何以为家,9.1,0,娜丁·拉巴基 Nadine Labaki,扎因·拉费阿 Zain al-Rafeea / ...,2018,黎巴嫩 美国 法国 塞浦路斯 卡塔尔 英国,剧情, +52,闻香识女人,9.1,0,马丁·布莱斯 Martin Brest,阿尔·帕西诺 Al Pacino / 克里斯...,1992,美国,剧情, +53,大闹天宫,9.4,0,万籁鸣 Laiming Wan,邱岳峰 Yuefeng Qiu / 富润生 Runsheng Fu...,1961,1964(中国大陆),1978(中国大陆), +54,黑客帝国,9.1,0,安迪·沃卓斯基 Andy Wachowski / 拉娜·沃卓斯基 Lana Wachowski 主...,,1999,美国,动作, +55,指环王1:护戒使者,9.1,0,彼得·杰克逊 Peter Jackson,伊利亚·伍德 Elijah Wood / 西恩...,2001,新西兰 美国 英国,剧情, +56,罗马假日,9.1,0,威廉·惠勒 William Wyler,奥黛丽·赫本 Audrey Hepburn / 格...,1953,美国,喜剧, +57,教父2,9.3,0,弗朗西斯·福特·科波拉 Francis Ford Coppola,阿尔·帕西诺 A...,1974,美国,剧情, +58,狮子王,9.1,0,Roger Allers / 罗伯·明可夫 Rob Minkoff,乔纳森·泰勒·托马...,1994,美国,动画, +59,天堂电影院,9.2,0,朱塞佩·托纳多雷 Giuseppe Tornatore,菲利普·努瓦雷 Philipp...,1988,意大利 法国,剧情, +60,饮食男女,9.2,0,李安 Ang Lee,郎雄 Sihung Lung / 杨贵媚 Kuei-Mei Yang / 吴...,1994,中国台湾 美国,剧情, +61,辩护人,9.2,0,杨宇硕 Woo-seok Yang,宋康昊 Kang-ho Song / 金英爱 Yeong-ae...,2013,韩国,剧情, +62,本杰明·巴顿奇事,9.0,0,大卫·芬奇 David Fincher,布拉德·皮特 Brad Pitt / 凯特·布...,2008,美国,剧情, +63,搏击俱乐部,9.0,0,大卫·芬奇 David Fincher,爱德华·诺顿 Edward Norton / 布拉...,1999,美国,剧情, +64,美丽心灵,9.1,0,朗·霍华德 Ron Howard,罗素·克劳 Russell Crowe / 艾德·哈...,2001,美国,传记, +65,穿条纹睡衣的男孩,9.2,0,马克·赫尔曼 Mark Herman,阿萨·巴特菲尔德 Asa Butterfield ...,2008,英国 美国,剧情, +66,哈利·波特与死亡圣器(下),9.0,0,大卫·叶茨 David Yates,丹尼尔·雷德克里夫 Daniel Radcliffe...,2011,美国 英国,奇幻, +67,情书,8.9,0,岩井俊二 Shunji Iwai,中山美穗 Miho Nakayama / 丰川悦司 Ets...,1995,日本,剧情, +68,两杆大烟枪,9.1,0,盖·里奇 Guy Ritchie,杰森·弗莱明 Jason Flemyng / 德克斯特...,1998,英国,剧情, +69,窃听风暴,9.2,0,弗洛里安·亨克尔·冯·多纳斯马尔克 Florian Henckel von Donnersmarck &n...,,2006,德国,剧情, +70,音乐之声,9.1,0,罗伯特·怀斯 Robert Wise,朱莉·安德鲁斯 Julie Andrews / 克...,1965,美国,剧情, +71,功夫,8.9,0,周星驰 Stephen Chow,周星驰 Stephen Chow / 元秋 Qiu Yuen / ...,2004,中国大陆 中国香港,动作, +72,哈利·波特与阿兹卡班的囚徒,9.0,0,阿方索·卡隆 Alfonso Cuarón,丹尼尔·雷德克里夫 Daniel Rad...,2004,英国 美国,奇幻, +73,阿凡达,8.8,0,詹姆斯·卡梅隆 James Cameron,萨姆·沃辛顿 Sam Worthington ...,2009,美国,动作, +74,西西里的美丽传说,8.9,0,朱塞佩·托纳多雷 Giuseppe Tornatore,莫妮卡·贝鲁奇 Monica ...,2000,意大利 美国,剧情, +75,看不见的客人,8.8,0,奥里奥尔·保罗 Oriol Paulo,马里奥·卡萨斯 Mario Casas / 阿...,2016,西班牙,剧情, +76,拯救大兵瑞恩,9.1,0,史蒂文·斯皮尔伯格 Steven Spielberg,汤姆·汉克斯 Tom Hanks...,1998,美国,剧情, +77,沉默的羔羊,8.9,0,乔纳森·戴米 Jonathan Demme,朱迪·福斯特 Jodie Foster / 安...,1991,美国,剧情, +78,小鞋子,9.2,0,马基德·马基迪 Majid Majidi,默罕默德·阿米尔·纳吉 Mohamma...,1997,伊朗,剧情, +79,布达佩斯大饭店,8.9,0,韦斯·安德森 Wes Anderson,拉尔夫·费因斯 Ralph Fiennes / ...,2014,美国 德国 英国,剧情, +80,蝴蝶效应,8.9,0,埃里克·布雷斯 Eric Bress / J·麦基·格鲁伯 J. Mackye Gruber 主...,,2004,美国 加拿大,剧情, +81,飞越疯人院,9.1,0,米洛斯·福尔曼 Miloš Forman,杰克·尼科尔森 Jack Nichols...,1975,美国,剧情, +82,还有明天,9.3,0,宝拉·柯特莱西 Paola Cortellesi,宝拉·柯特莱西 Paola Corte...,2023,意大利,剧情, +83,禁闭岛,8.9,0,Martin Scorsese,莱昂纳多·迪卡普里奥 Leonardo DiCaprio / ...,2010,美国,剧情, +84,心灵捕手,9.0,0,格斯·范·桑特 Gus Van Sant,马特·达蒙 Matt Damon / 罗宾·...,1997,美国,剧情, +85,致命魔术,8.9,0,克里斯托弗·诺兰 Christopher Nolan,休·杰克曼 Hugh Jackman...,2006,英国 美国,剧情, +86,低俗小说,8.9,0,昆汀·塔伦蒂诺 Quentin Tarantino,约翰·特拉沃尔塔 John Tra...,1994,美国,剧情, +87,哈利·波特与密室,8.9,0,Chris Columbus,丹尼尔·雷德克里夫 Daniel Radcliffe / 艾玛...,2002,英国 美国,奇幻, +88,超脱,9.0,0,托尼·凯耶 Tony Kaye,艾德里安·布洛迪 Adrien Brody / 马西...,2011,美国,剧情, +89,一一,9.1,0,杨德昌 Edward Yang,吴念真 / 李凯莉 Kelly Lee / 金燕玲 Elai...,2000,中国台湾 日本,剧情, +90,喜剧之王,8.8,0,周星驰 Stephen Chow / 李力持 Lik-Chi Lee,周星驰 Stephen Ch...,1999,中国香港,喜剧, +91,杀人回忆,8.9,0,奉俊昊 Joon-ho Bong,宋康昊 Kang-ho Song / 金相庆 Sang-kyun...,2003,韩国,剧情, +92,致命ID,8.9,0,詹姆斯·曼高德 James Mangold,约翰·库萨克 John Cusack / 雷...,2003,美国,剧情, +93,摩登时代,9.3,0,查理·卓别林 Charles Chaplin,查理·卓别林 Charles Chaplin ...,1936,美国,剧情, +94,春光乍泄,9.0,0,王家卫 Kar Wai Wong,张国荣 Leslie Cheung / 梁朝伟 Tony Leu...,1997,中国香港 日本 韩国,剧情, +95,加勒比海盗,8.8,0,戈尔·维宾斯基 Gore Verbinski,约翰尼·德普 Johnny Depp / ...,2003,美国,动作, +96,海豚湾,9.3,0,路易·西霍尤斯 Louie Psihoyos,路易·西霍尤斯 Louie Psihoyo...,2009,美国,纪录片, +97,美国往事,9.1,0,赛尔乔·莱翁内 Sergio Leone,罗伯特·德尼罗 Robert De Niro ...,1984,美国 意大利,犯罪, +98,红辣椒,9.0,0,今敏 Satoshi Kon,林原惠美 Megumi Hayashibara / 江守彻 Toru...,2006,日本,动画, +99,七宗罪,8.8,0,大卫·芬奇 David Fincher,摩根·弗里曼 Morgan Freeman / 布...,1995,美国,剧情, +100,唐伯虎点秋香,8.8,0,李力持 Lik-Chi Lee,周星驰 Stephen Chow / 巩俐 Li Gong / 陈...,1993,中国香港,喜剧, +101,狩猎,9.1,0,托马斯·温特伯格 Thomas Vinterberg,麦斯·米科尔森 Mads Mik...,2012,丹麦 瑞典,剧情, +102,幽灵公主,8.9,0,宫崎骏 Hayao Miyazaki,松田洋治 Yôji Matsuda / 石田百合...,1997,日本,动画, +103,甜蜜蜜,8.9,0,陈可辛 Peter Chan,黎明 Leon Lai / 张曼玉 Maggie Cheung / ...,1996,中国香港,剧情, +104,寄生虫,8.8,0,奉俊昊 Joon-ho Bong,宋康昊 Kang-ho Song / 李善均 Seon-gyun...,2019,韩国,剧情, +105,天书奇谭,9.2,0,王树忱 Shuchen Wang / 钱运达 Yunda Qian,丁建华 Jianhua Din...,1983,2021,中国大陆, +106,蝙蝠侠:黑暗骑士崛起,8.9,0,克里斯托弗·诺兰 Christopher Nolan,克里斯蒂安·贝尔 Christ...,2012,美国 英国,剧情, +107,超能陆战队,8.8,0,唐·霍尔 Don Hall / 克里斯·威廉姆斯 Chris Williams,斯科特...,2014,美国,喜剧, +108,7号房的礼物,8.9,0,李焕庆 Hwan-kyeong Lee,柳承龙 Seung-yong Ryoo / 朴信惠 Shi...,2013,韩国,剧情, +109,茶馆,9.5,0,谢添 Tian Xie,于是之 Shizhi Yu / 郑榕 Rong Zhen / 蓝天野 T...,1982,中国大陆,剧情, +110,第六感,8.9,0,M·奈特·沙马兰 M. Night Shyamalan,布鲁斯·威利斯 Bruce Wi...,1999,美国,剧情, +111,爱在黎明破晓前,8.8,0,理查德·林克莱特 Richard Linklater,伊桑·霍克 Ethan Hawke ...,1995,美国 奥地利 瑞士,剧情, +112,爱在日落黄昏时,8.9,0,理查德·林克莱特 Richard Linklater,伊桑·霍克 Ethan Hawke ...,2004,美国 法国,剧情, +113,被嫌弃的松子的一生,8.8,0,中岛哲也 Tetsuya Nakashima,中谷美纪 Miki Nakatani / 瑛太 E...,2006,日本,剧情, +114,头脑特工队,8.8,0,彼特·道格特 Pete Docter / 罗纳尔多·德尔·卡门 Ronaldo Del Carmen &nb...,,2015,美国,喜剧, +115,哈利·波特与火焰杯,8.8,0,迈克·内威尔 Mike Newell,丹尼尔·雷德克里夫 Daniel Radclif...,2005,英国 美国,悬疑, +116,未麻的部屋,9.1,0,今敏 Satoshi Kon,岩男润子 Junko Iwao / 松本梨香 Rica Matsu...,1997,日本,剧情, +117,重庆森林,8.8,0,王家卫 Kar Wai Wong,林青霞 Brigitte Lin / 金城武 Takeshi K...,1994,中国香港,剧情, +118,借东西的小人阿莉埃蒂,8.9,0,米林宏昌 Hiromasa Yonebayashi,志田未来 Mirai Shida / 神木...,2010,日本,动画, +119,菊次郎的夏天,8.9,0,北野武 Takeshi Kitano,北野武 Takeshi Kitano / 关口雄介 Yus...,1999,日本,剧情, +120,入殓师,8.9,0,泷田洋二郎 Yôjirô Takita,本木雅弘 Masahiro Motoki / ...,2008,日本,剧情, +121,断背山,8.8,0,李安 Ang Lee,希斯·莱杰 Heath Ledger / 杰克·吉伦哈尔 Jake...,2005,美国 加拿大,剧情, +122,剪刀手爱德华,8.7,0,蒂姆·波顿 Tim Burton,约翰尼·德普 Johnny Depp / 薇诺娜·...,1990,美国,剧情, +123,勇敢的心,8.9,0,梅尔·吉布森 Mel Gibson,梅尔·吉布森 Mel Gibson / 苏菲·玛...,1995,美国,动作, +124,时空恋旅人,8.8,0,理查德·柯蒂斯 Richard Curtis,多姆纳尔·格里森 Domhnall Gl...,2013,英国 美国,喜剧, +125,驯龙高手,8.8,0,迪恩·德布洛斯 Dean DeBlois / 克里斯·桑德斯 Chris Sanders,...,2010,美国,动画, +126,消失的爱人,8.7,0,大卫·芬奇 David Fincher,本·阿弗莱克 Ben Affleck / 罗莎蒙...,2014,美国,剧情, +127,无人知晓,9.1,0,是枝裕和 Hirokazu Koreeda,柳乐优弥 Yûya Yagira / 北浦爱...,2004,日本,剧情, +128,傲慢与偏见,8.7,0,乔·怀特 Joe Wright,凯拉·奈特莉 Keira Knightley / 马修·...,2005,法国 英国 美国,剧情, +129,倩女幽魂,8.8,0,程小东 Siu-Tung Ching,张国荣 Leslie Cheung / 王祖贤 Joey W...,1987,中国香港,爱情, +130,新世界,8.9,0,朴勋政 Hoon-jung Park,李政宰 Jung-Jae Lee / 崔岷植 Min-sik...,2013,韩国,剧情, +131,花样年华,8.8,0,王家卫 Kar Wai Wong,张曼玉 Maggie Cheung / 梁朝伟 Tony Leu...,2000,中国香港,剧情, +132,玩具总动员3,8.9,0,李·昂克里奇 Lee Unkrich,汤姆·汉克斯 Tom Hanks / 蒂姆·艾...,2010,美国,喜剧, +133,一个叫欧维的男人决定去死,8.9,0,汉内斯·赫尔姆 Hannes Holm,罗夫·拉斯加德 Rolf Lassgård...,2015,瑞典,剧情, +134,色,戒,8.7,0,李安 Ang Lee,梁朝伟 Tony Leung Chiu Wai / 汤唯 Wei Tang / ...,2007,中国台湾 中国大陆 美国 中国香港,剧情, +135,完美的世界,9.1,0,克林特·伊斯特伍德 Clint Eastwood,凯文·科斯特纳 Kevin Cos...,1993,美国,剧情, +136,阳光灿烂的日子,8.8,0,姜文 Wen Jiang,夏雨 Yu Xia / 宁静 Jing Ning / 陶虹 Hong Tao,1994,中国大陆 中国香港,剧情, +137,怪兽电力公司,8.8,0,彼特·道格特 Pete Docter / 大卫·斯沃曼 David Silverman,约...,2001,美国,儿童, +138,教父3,9.0,0,弗朗西斯·福特·科波拉 Francis Ford Coppola,阿尔·帕西诺 A...,1990,美国,剧情, +139,小森林 夏秋篇,9.0,0,森淳一 Junichi Mori,桥本爱 Ai Hashimoto / 三浦贵大 Takahir...,2014,日本,剧情, +140,天使爱美丽,8.7,0,让-皮埃尔·热内 Jean-Pierre Jeunet,奥黛丽·塔图 Audrey Tau...,2001,法国 德国,剧情, +141,侧耳倾听,8.9,0,近藤喜文 Yoshifumi Kondo,本名阳子 Youko Honna / 小林桂树 K...,1995,日本,剧情, +142,哪吒闹海,9.2,0,王树忱 Shuchen Wang / 严定宪 Dingxian Yan,梁正晖 Zhenghui ...,1979,中国大陆,冒险, +143,九品芝麻官,8.8,0,王晶 Jing Wong,周星驰 Stephen Chow / 吴孟达 Man Tat Ng / ...,1994,中国香港,剧情, +144,被解救的姜戈,8.8,0,昆汀·塔伦蒂诺 Quentin Tarantino,杰米·福克斯 Jamie Foxx /...,2012,美国,剧情, +145,请以你的名字呼唤我,8.8,0,卢卡·瓜达尼诺 Luca Guadagnino,艾米·汉莫 Armie Hammer / ...,2017,意大利 法国 巴西 美国,剧情, +146,幸福终点站,8.8,0,史蒂文·斯皮尔伯格 Steven Spielberg,汤姆·汉克斯 Tom Hanks...,2004,美国,喜剧, +147,釜山行,8.6,0,延尚昊 Sang-ho Yeon,孔刘 Yoo Gong / 郑有美 Yu-mi Jung / 马...,2016,韩国,动作, +148,神偷奶爸,8.7,0,皮艾尔·柯芬 Pierre Coffin / 克里斯·雷纳德 Chris Renaud,...,2010,美国 法国,喜剧, +149,小森林 冬春篇,9.0,0,森淳一 Junichi Mori,桥本爱 Ai Hashimoto / 三浦贵大 Takahir...,2015,日本,剧情, +150,喜宴,9.0,0,李安 Ang Lee,赵文瑄 Winston Chao / 归亚蕾 Ya-lei Kuei / 郎...,1993,中国台湾 美国,剧情, +151,萤火之森,8.8,0,大森贵弘 Takahiro Omori,佐仓绫音 Ayane Sakura / 内山昂辉 K...,2011,日本,剧情, +152,告白,8.8,0,中岛哲也 Tetsuya Nakashima,松隆子 Takako Matsu / 冈田将生 ...,2010,日本,剧情, +153,玛丽和麦克斯,9.0,0,亚当·艾略特 Adam Elliot,托妮·科莱特 Toni Collette / 菲利...,2009,澳大利亚 美国,剧情, +154,七武士,9.3,0,黑泽明 Akira Kurosawa,三船敏郎 Toshirô Mifune / 志村乔 ...,1954,日本,动作, +155,头号玩家,8.6,0,史蒂文·斯皮尔伯格 Steven Spielberg,泰伊·谢里丹 Tye Sheri...,2018,美国,动作, +156,模仿游戏,8.8,0,莫滕·泰杜姆 Morten Tyldum,本尼迪克特·康伯巴奇 Benedict C...,2014,英国 美国,剧情, +157,惊魂记,9.0,0,阿尔弗雷德·希区柯克 Alfred Hitchcock,安东尼·博金斯 Antho...,1960,美国,悬疑, +158,大鱼,8.8,0,蒂姆·波顿 Tim Burton,伊万·麦克格雷格 Ewan McGregor / 阿...,2003,美国,剧情, +159,机器人之梦,9.1,0,巴勃罗·贝格尔 Pablo Berger,伊万·拉班达 Ivan Labanda,2023,西班牙 法国,剧情, +160,心灵奇旅,8.7,0,彼特·道格特 Pete Docter / 凯普·鲍尔斯 Kemp Powers,杰米·...,2020,美国,动画, +161,背靠背,脸对脸,9.5,0,黄建新 Jianxin Huang / 杨亚洲 Yazhou Yang,牛振华 Zhenhua N...,1994,中国大陆 中国香港,剧情, +162,射雕英雄传之东成西就,8.7,0,刘镇伟 Jeffrey Lau,梁朝伟 Tony Leung Chiu Wai / 林青霞 Bri...,1993,中国香港,喜剧, +163,血战钢锯岭,8.7,0,梅尔·吉布森 Mel Gibson,安德鲁·加菲尔德 Andrew Garfield /...,2016,澳大利亚 美国,剧情, +164,你的名字。,8.5,0,新海诚 Makoto Shinkai,神木隆之介 Ryûnosuke Kamiki / 上...,2016,日本,剧情, +165,我是山姆,9.0,0,杰茜·尼尔森 Jessie Nelson,Sean Penn / Dakota Fanning / Mi...,2001,美国,剧情, +166,阳光姐妹淘,8.8,0,姜炯哲 Hyeong-Cheol Kang,沈恩京 Eun-kyung Shim / 闵孝琳 Hy...,2011,韩国,剧情, +167,恐怖直播,8.7,0,金秉祐 Byeong-woo Kim,河正宇 Jung-woo Ha / 李璟荣 Kyeong-y...,2013,韩国,剧情, +168,黑客帝国3:矩阵革命,8.8,0,拉娜·沃卓斯基 Lana Wachowski / 莉莉·沃卓斯基 Lilly Wachowski ...,,2003,美国,动作, +169,末路狂花,9.0,0,雷德利·斯科特 Ridley Scott,吉娜·戴维斯 Geena Davis / 苏...,1991,美国 英国 法国,犯罪, +170,高山下的花环,9.5,0,谢晋 Jin Xie,吕晓禾 Xiaohe Lü / 唐国强 Guoqiang Tang / 何...,1984,1985,中国大陆, +171,小丑,8.7,0,托德·菲利普斯 Todd Phillips,杰昆·菲尼克斯 Joaquin Phoeni...,2019,美国 加拿大,剧情, +172,谍影重重3,8.9,0,保罗·格林格拉斯 Paul Greengrass,马特·达蒙 Matt Damon / ...,2007,美国 德国 法国 英国,动作, +173,三块广告牌,8.7,0,马丁·麦克唐纳 Martin McDonagh,弗兰西斯·麦克多蒙德 France...,2017,英国 美国,剧情, +174,电锯惊魂,8.7,0,詹姆斯·温 James Wan,雷·沃纳尔 Leigh Whannell / 加利·艾...,2004,美国,悬疑, +175,无间道2,8.8,0,刘伟强 Andrew Lau / 麦兆辉 Alan Mak,陈冠希 Edison Chen / ...,2003,中国香港,剧情, +176,达拉斯买家俱乐部,8.8,0,让-马克·瓦雷 Jean-Marc Vallée,马修·麦康纳 Matthew McCon...,2013,美国,剧情, +177,疯狂原始人,8.7,0,科克·德·米科 Kirk De Micco / 克里斯·桑德斯 Chris Sanders 主演...,,2013,美国,喜剧, +178,绿里奇迹,8.9,0,弗兰克·德拉邦特 Frank Darabont,汤姆·汉克斯 Tom Hanks / ...,1999,美国,犯罪, +179,爱在午夜降临前,8.9,0,理查德·林克莱特 Richard Linklater,伊桑·霍克 Ethan Hawke ...,2013,美国 希腊,剧情, +180,疯狂的石头,8.6,0,宁浩 Hao Ning,郭涛 Tao Guo / 刘桦 Hua Liu / 连晋 Teddy Lin,2006,中国大陆 中国香港,喜剧, +181,雨中曲,9.1,0,斯坦利·多南 Stanley Donen / 吉恩·凯利 Gene Kelly,吉恩·...,1952,美国,喜剧, +182,2001太空漫游,8.9,0,斯坦利·库布里克 Stanley Kubrick,凯尔·杜拉 Keir Dullea / ...,1968,英国 美国,科幻, +183,海街日记,8.8,0,是枝裕和 Hirokazu Koreeda,绫濑遥 Haruka Ayase / 长泽雅美 M...,2015,日本,剧情, +184,风之谷,8.9,0,宫崎骏 Hayao Miyazaki,岛本须美 Sumi Shimamoto / 松田洋治 Y...,1984,日本,动画, +185,上帝之城,9.0,0,费尔南多·梅里尔斯 Fernando Meirelles / 卡迪亚·兰德 Kátia Lund ...,,2002,巴西 法国,犯罪, +186,心迷宫,8.7,0,忻钰坤 Yukun Xin,霍卫民 Weimin Huo / 王笑天 Xiaotian Wang ...,2014,中国大陆,剧情, +187,英雄本色,8.6,0,吴宇森 John Woo,周润发 Yun-Fat Chow / 狄龙 Lung Ti / 张国...,1986,中国香港,剧情, +188,记忆碎片,8.7,0,克里斯托弗·诺兰 Christopher Nolan,盖·皮尔斯 Guy Pearce /...,2000,美国,犯罪, +189,纵横四海,8.8,0,吴宇森 John Woo,周润发 Yun-Fat Chow / 张国荣 Leslie Cheung...,1991,中国香港,剧情, +190,无敌破坏王,8.7,0,瑞奇·莫尔 Rich Moore,约翰·C·赖利 John C. Reilly / 萨拉...,2012,美国,喜剧, +191,卢旺达饭店,8.9,0,特瑞·乔治 Terry George,唐·钱德尔 Don Cheadle / 苏菲·奥...,2004,英国 南非 意大利 美国,剧情, +192,牯岭街少年杀人事件,8.9,0,杨德昌 Edward Yang,张震 Chen Chang / 杨静怡 Lisa Yang / 张...,1991,中国台湾,剧情, +193,恐怖游轮,8.5,0,克里斯托弗·史密斯 Christopher Smith,梅利莎·乔治 Melissa ...,2009,英国 澳大利亚,科幻, +194,东京教父,9.0,0,今敏 Satoshi Kon,江守彻 Toru Emori / 梅垣义明 Yoshiaki Ume...,2003,日本,剧情, +195,小偷家族,8.7,0,是枝裕和 Hirokazu Koreeda,中川雅也 Lily Franky / 安藤樱 Sa...,2018,日本,剧情, +196,魔女宅急便,8.7,0,宫崎骏 Hayao Miyazaki,高山南 Minami Takayama / 佐久间玲 Re...,1989,日本,动画, +197,冰川时代,8.7,0,卡洛斯·沙尔丹哈 Carlos Saldanha / 克里斯·韦奇 Chris Wedge 主演...,,2002,美国,喜剧, +198,芙蓉镇,9.3,0,谢晋 Jin Xie,刘晓庆 Xiaoqing Liu / 姜文 Wen Jiang / 郑在石...,1987,中国大陆,剧情, +199,忠犬八公物语,9.2,0,神山征二郎 Seijirô Kôyama,仲代达矢 Tatsuya Nakadai /...,1987,日本,剧情, +200,岁月神偷,8.7,0,罗启锐 Alex Law,吴君如 Sandra Ng / 任达华 Simon Yam / 钟绍...,2010,中国香港 中国大陆,剧情, +201,遗愿清单,8.7,0,罗伯·莱纳 Rob Reiner,杰克·尼科尔森 Jack Nicholson / 摩根...,2007,美国,冒险, +202,荒蛮故事,8.7,0,达米安·斯兹弗隆 Damián Szifron,达里奥·葛兰帝内提 Darío...,2014,阿根廷 西班牙,剧情, +203,大佛普拉斯,8.7,0,黄信尧 Hsin-yao Huang,庄益增 Yizeng Zhuang / 陈竹昇 Chu-sh...,2017,中国台湾,剧情, +204,源代码,8.6,0,邓肯·琼斯 Duncan Jones,杰克·吉伦哈尔 Jake Gyllenhaal / ...,2011,美国 加拿大,科幻, +205,花束般的恋爱,8.6,0,土井裕泰 Nobuhiro Doi,菅田将晖 Masaki Suda / 有村架纯 Kasu...,2021,日本,剧情, +206,白日梦想家,8.6,0,本·斯蒂勒 Ben Stiller,本·斯蒂勒 Ben Stiller / 克里斯汀·...,2013,美国 英国,剧情, +207,爱乐之城,8.4,0,达米恩·查泽雷 Damien Chazelle,瑞恩·高斯林 Ryan Gosling /...,2016,美国,剧情, +208,疯狂的麦克斯4:狂暴之路,8.7,0,乔治·米勒 George Miller,汤姆·哈迪 Tom Hardy / 查理兹·塞...,2015,澳大利亚 美国,动作, +209,可可西里,8.9,0,陆川 Chuan Lu,多布杰 Duobujie / 张磊 Lei Zhang / 亓亮 Qi L...,2004,中国大陆 中国香港,剧情, +210,你看起来好像很好吃,8.9,0,藤森雅也 Masaya Fujimori,山口胜平 Kappei Yamaguchi / 爱河...,2010,日本,剧情, +211,贫民窟的百万富翁,8.6,0,丹尼·鲍尔 Danny Boyle / 洛芙琳·坦丹 Loveleen Tandan,戴夫...,2008,英国,剧情, +212,波西米亚狂想曲,8.6,0,布莱恩·辛格 Bryan Singer,拉米·马雷克 Rami Malek / 本·哈...,2018,英国 美国,剧情, +213,城市之光,9.3,0,查理·卓别林 Charles Chaplin,查理·卓别林 Charles Chaplin ...,1931,美国,喜剧, +214,爆裂鼓手,8.6,0,达米恩·查泽雷 Damien Chazelle,迈尔斯·特勒 Miles Teller /...,2014,美国,剧情, +215,青蛇,8.6,0,徐克 Hark Tsui,张曼玉 Maggie Cheung / 王祖贤 Joey Wang / ...,1993,中国香港 中国大陆,剧情, +216,哈利·波特与死亡圣器(上),8.6,0,大卫·叶茨 David Yates,丹尼尔·雷德克里夫 Daniel Radcliffe...,2010,英国 美国,奇幻, +217,无耻混蛋,8.7,0,昆汀·塔伦蒂诺 Quentin Tarantino,布拉德·皮特 Brad Pitt / ...,2009,德国 美国,剧情, +218,东邪西毒,8.6,0,王家卫 Kar Wai Wong,张国荣 Leslie Cheung / 林青霞 Brigitte...,1994,中国香港 中国台湾,剧情, +219,终结者2:审判日,8.8,0,詹姆斯·卡梅隆 James Cameron,阿诺·施瓦辛格 Arnold Schwarz...,1991,美国 法国,动作, +220,大红灯笼高高挂,8.8,0,张艺谋 Yimou Zhang,巩俐 Li Gong / 马精武 Jingwu Ma / 何赛...,1991,中国大陆 中国香港 中国台湾,剧情, +221,黑天鹅,8.6,0,达伦·阿罗诺夫斯基 Darren Aronofsky,娜塔莉·波特曼 Natalie...,2010,美国,剧情, +222,新龙门客栈,8.7,0,李惠民 Raymond Lee,张曼玉 Maggie Cheung / 林青霞 Brigitte ...,1992,中国香港 中国大陆,动作, +223,初恋这件小事,8.5,0,普特鹏·普罗萨卡·那·萨克那卡林 Puttipong Promsaka Na Sakolnakorn / 华森·波克彭...,,2010,泰国,剧情, +224,千钧一发,8.8,0,安德鲁·尼科尔 Andrew Niccol,伊桑·霍克 Ethan Hawke / 乌玛...,1997,美国,剧情, +225,人工智能,8.7,0,史蒂文·斯皮尔伯格 Steven Spielberg,海利·乔·奥斯蒙 Haley...,2001,美国,剧情, +226,崖上的波妞,8.6,0,宫崎骏 Hayao Miyazaki,奈良柚莉爱 Yuria Nara / 土井洋辉 Hir...,2008,日本,动画, +227,雨人,8.7,0,巴瑞·莱文森 Barry Levinson,达斯汀·霍夫曼 Dustin Hoffman ...,1988,美国,剧情, +228,虎口脱险,8.9,0,杰拉尔·乌里 Gérard Oury,路易·德·菲耐斯 Louis de Funès...,1966,法国 英国,喜剧, +229,哈利·波特与凤凰社,8.6,0,大卫·叶茨 David Yates,丹尼尔·雷德克里夫 Daniel Radcliffe...,2007,英国 美国,奇幻, +230,彗星来的那一夜,8.6,0,詹姆斯·沃德·布柯特 James Ward Byrkit,艾米丽·芭尔多尼 Em...,2013,美国 英国,科幻, +231,罗生门,8.8,0,黑泽明 Akira Kurosawa,三船敏郎 Toshirô Mifune / 京町子 ...,1950,日本,剧情, +232,海边的曼彻斯特,8.6,0,肯尼斯·罗纳根 Kenneth Lonergan,卡西·阿弗莱克 Casey Affle...,2016,美国,剧情, +233,恋恋笔记本,8.5,0,尼克·卡索维茨 Nick Cassavetes,瑞恩·高斯林 Ryan Gosling /...,2004,美国,剧情, +234,火星救援,8.5,0,雷德利·斯科特 Ridley Scott,马特·达蒙 Matt Damon / 杰西卡...,2015,英国 美国 匈牙利 约旦,剧情, +235,真爱至上,8.5,0,理查德·柯蒂斯 Richard Curtis,休·格兰特 Hugh Grant / 连姆...,2003,英国 美国 法国,喜剧, +236,黑客帝国2:重装上阵,8.7,0,拉娜·沃卓斯基 Lana Wachowski / 莉莉·沃卓斯基 Lilly Wachowski ...,,2003,美国,动作, +237,冰雪奇缘,8.5,0,克里斯·巴克 Chris Buck / 珍妮弗·李 Jennifer Lee,克里斯汀...,2013,美国,喜剧, +238,步履不停,8.8,0,是枝裕和 Hirokazu Koreeda,阿部宽 Hiroshi Abe / 夏川结衣 Yu...,2008,日本,剧情, +239,奇迹男孩,8.6,0,斯蒂芬·卓博斯基 Stephen Chbosky,雅各布·特伦布莱 Jacob Tr...,2017,美国 中国香港,剧情, +240,千年女优,8.8,0,今敏 Satoshi Kon,庄司美代子 Miyoko Shôji / 小山茉美 Mam...,2001,日本,动画, +241,谍影重重2,8.7,0,保罗·格林格拉斯 Paul Greengrass,马特·达蒙 Matt Damon / ...,2004,美国 德国,动作, +242,战争之王,8.7,0,安德鲁·尼科尔 Andrew Niccol,尼古拉斯·凯奇 Nicolas Cage /...,2005,美国 德国,剧情, +243,蜘蛛侠:平行宇宙,8.6,0,鲍勃·佩尔西凯蒂 Bob Persichetti / 彼得·拉姆齐 Peter Ramsey 主...,,2018,美国,动作, +244,攻壳机动队,9.0,0,押井守 Mamoru Oshii,田中敦子 Atsuko Tanaka / 大冢明夫 Akio...,1995,日本,动作, +245,血钻,8.7,0,爱德华·兹威克 Edward Zwick,莱昂纳多·迪卡普里奥 Leonardo ...,2006,美国 德国 英国,剧情, +246,小姐,8.5,0,朴赞郁 Chan-wook Park,金敏喜 Min-hee Kim / 金泰梨 Tae-ri K...,2016,韩国,剧情, +247,隐藏人物,8.9,0,特奥多尔·梅尔菲 Theodore Melfi,塔拉吉·P·汉森 Taraji P. ...,2016,美国,剧情, +248,血观音,8.6,0,杨雅喆 Ya-che Yang,惠英红 Kara Wai Ying Hung / 吴可熙 Wu K...,2017,中国台湾,剧情, +249,魂断蓝桥,8.8,0,茂文·勒鲁瓦 Mervyn LeRoy,费雯·丽 Vivien Leigh / 罗伯特·...,1940,美国,剧情, +250,房间,8.7,0,伦尼·阿伯拉罕森 Lenny Abrahamson,布丽·拉尔森 Brie Larson...,2015,爱尔兰 加拿大 英国 美国,剧情, diff --git a/project2/job-crawler.iml b/project2/job-crawler.iml new file mode 100644 index 0000000..abe781f --- /dev/null +++ b/project2/job-crawler.iml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/project2/lib/jcommon-1.0.24.jar b/project2/lib/jcommon-1.0.24.jar new file mode 100644 index 0000000..4f1015d Binary files /dev/null and b/project2/lib/jcommon-1.0.24.jar differ diff --git a/project2/lib/jfreechart-1.5.4.jar b/project2/lib/jfreechart-1.5.4.jar new file mode 100644 index 0000000..ddd7c23 Binary files /dev/null and b/project2/lib/jfreechart-1.5.4.jar differ diff --git a/project2/lib/jsoup-1.17.2.jar b/project2/lib/jsoup-1.17.2.jar new file mode 100644 index 0000000..52ae16d Binary files /dev/null and b/project2/lib/jsoup-1.17.2.jar differ diff --git a/project2/movie_genre_distribution.png b/project2/movie_genre_distribution.png new file mode 100644 index 0000000..581f0eb Binary files /dev/null and b/project2/movie_genre_distribution.png differ diff --git a/project2/movie_rating_distribution.png b/project2/movie_rating_distribution.png new file mode 100644 index 0000000..7427d4e Binary files /dev/null and b/project2/movie_rating_distribution.png differ diff --git a/project2/movie_year_distribution.png b/project2/movie_year_distribution.png new file mode 100644 index 0000000..33bfdf0 Binary files /dev/null and b/project2/movie_year_distribution.png differ diff --git a/project2/movie_year_rating.png b/project2/movie_year_rating.png new file mode 100644 index 0000000..b22c559 Binary files /dev/null and b/project2/movie_year_rating.png differ diff --git a/project2/pom.xml b/project2/pom.xml new file mode 100644 index 0000000..cb91a0d --- /dev/null +++ b/project2/pom.xml @@ -0,0 +1,53 @@ + + + 4.0.0 + + com.crawler + job-crawler + 1.0-SNAPSHOT + + + 1.8 + 1.8 + UTF-8 + + + + + + org.jsoup + jsoup + 1.17.2 + + + + + org.jfree + jfreechart + 1.5.4 + + + + + org.jfree + jcommon + 1.0.24 + + + + + + + org.apache.maven.plugins + maven-compiler-plugin + 3.8.1 + + ${maven.compiler.source} + ${maven.compiler.target} + + + + + \ No newline at end of file diff --git a/project2/soft_ranking_1777443554627.csv b/project2/soft_ranking_1777443554627.csv new file mode 100644 index 0000000..580807c --- /dev/null +++ b/project2/soft_ranking_1777443554627.csv @@ -0,0 +1,31 @@ +排名,学校名称,省份,类型,总分 +1,清华大学,北京,综合,1087 +2,北京大学,北京,综合,1036 +3,浙江大学,浙江,综合,895 +4,上海交通大学,上海,综合,894 +5,复旦大学,上海,综合,792 +6,南京大学,江苏,综合,708 +7,中国科学技术大学,安徽,理工,653 +8,武汉大学,湖北,综合,638 +9,华中科技大学,湖北,综合,638 +10,西安交通大学,陕西,综合,620 +11,北京航空航天大学,北京,理工,612 +12,哈尔滨工业大学,黑龙江,理工,607 +13,中山大学,广东,综合,597 +14,北京理工大学,北京,理工,595 +15,东南大学,江苏,综合,592 +16,四川大学,四川,综合,581 +17,中国人民大学,北京,综合,558 +18,同济大学,上海,综合,556 +19,北京师范大学,北京,师范,553 +20,天津大学,天津,理工,544 +21,南开大学,天津,综合,528 +22,山东大学,山东,综合,527 +23,西北工业大学,陕西,理工,519 +24,中国农业大学,北京,农业,519 +25,厦门大学,福建,综合,512 +26,吉林大学,吉林,综合,498 +27,中南大学,湖南,综合,497 +28,大连理工大学,辽宁,理工,495 +29,华东师范大学,上海,师范,469 +30,南方科技大学,广东,综合,466 diff --git a/project2/src/main/java/com/crawler/MovieMain.class b/project2/src/main/java/com/crawler/MovieMain.class new file mode 100644 index 0000000..e09b3e9 Binary files /dev/null and b/project2/src/main/java/com/crawler/MovieMain.class differ diff --git a/project2/src/main/java/com/crawler/MovieMain.java b/project2/src/main/java/com/crawler/MovieMain.java new file mode 100644 index 0000000..758f6a2 --- /dev/null +++ b/project2/src/main/java/com/crawler/MovieMain.java @@ -0,0 +1,132 @@ +package com.crawler; + +import com.crawler.chart.ChartGenerator; +import com.crawler.chart.ChartManager; +import com.crawler.chart.impl.GenreDistributionChartGenerator; +import com.crawler.chart.impl.RatingDistributionChartGenerator; +import com.crawler.chart.impl.YearDistributionChartGenerator; +import com.crawler.chart.impl.YearRatingChartGenerator; +import com.crawler.chart.model.Movie; +import com.crawler.chart.model.SoftRanking; +import com.crawler.chart.model.WeatherData; +import com.crawler.spider.DoubanSpider; +import com.crawler.spider.SoftSpider; +import com.crawler.spider.WeatherSpider; +import com.crawler.utils.DataUtils; +import com.crawler.ui.MovieResultDisplay; + +import java.util.List; +import java.util.Scanner; + +public class MovieMain { + public static void main(String[] args) { + Scanner scanner = new Scanner(System.in); + + try { + System.out.println("请选择要爬取的数据:"); + System.out.println("1. 豆瓣电影Top250"); + System.out.println("2. 软科中国大学排名"); + System.out.println("3. 长沙天气数据"); + System.out.println("4. 全部爬取"); + System.out.print("请输入选择(1-4): "); + + int choice = scanner.nextInt(); + + if (choice == 1 || choice == 4) { + crawlDoubanMovies(); + } + + if (choice == 2 || choice == 4) { + crawlSoftRanking(); + } + + if (choice == 3 || choice == 4) { + crawlWeather(); + } + + System.out.println("\n爬虫任务完成!"); + + } catch (Exception e) { + e.printStackTrace(); + } finally { + scanner.close(); + } + } + + private static void crawlDoubanMovies() throws Exception { + System.out.println("\n开始爬取豆瓣电影Top250数据..."); + + DoubanSpider spider = new DoubanSpider(); + List movieList = spider.crawlMovies(); + + List cleanedMovies = movieList.stream() + .map(DataUtils::cleanMovie) + .filter(movie -> movie != null) + .toList(); + + DataUtils.writeMovieToCSV(cleanedMovies, "douban_movies.csv"); + MovieResultDisplay.displayResults(cleanedMovies); + + ChartManager chartManager = new ChartManager(); + + ChartGenerator ratingChart = new RatingDistributionChartGenerator(); + ChartGenerator yearChart = new YearDistributionChartGenerator(); + ChartGenerator genreChart = new GenreDistributionChartGenerator(); + ChartGenerator yearRatingChart = new YearRatingChartGenerator(); + + chartManager.addChartGenerator(ratingChart); + chartManager.addChartGenerator(yearChart); + chartManager.addChartGenerator(genreChart); + chartManager.addChartGenerator(yearRatingChart); + + chartManager.generateAllCharts(cleanedMovies); + } + + private static void crawlSoftRanking() throws Exception { + System.out.println("\n开始爬取软科中国大学排名..."); + + SoftSpider spider = new SoftSpider(); + List rankingList = spider.crawlSoftRanking(); + + if (!rankingList.isEmpty()) { + DataUtils.writeSoftRankingToCSV(rankingList, "soft_ranking.csv"); + + System.out.println("\n软科中国大学排名前10名:"); + for (int i = 0; i < Math.min(10, rankingList.size()); i++) { + SoftRanking ranking = rankingList.get(i); + System.out.printf("%d. %s - %s - %d分%n", + ranking.getRank(), + ranking.getUniversityName(), + ranking.getProvince(), + ranking.getScore()); + } + } else { + System.out.println("未获取到软科排名数据"); + } + } + + private static void crawlWeather() throws Exception { + System.out.println("\n开始爬取长沙天气数据..."); + + WeatherSpider spider = new WeatherSpider(); + List weatherList = spider.crawlWeather(); + + if (!weatherList.isEmpty()) { + DataUtils.writeWeatherToCSV(weatherList, "changsha_weather.csv"); + + System.out.println("\n长沙近期天气:"); + for (WeatherData weather : weatherList) { + System.out.printf("%s (%s): %s,温度 %s~%s,%s %s%n", + weather.getDate(), + weather.getWeek(), + weather.getWeather(), + weather.getLowTemp(), + weather.getHighTemp(), + weather.getWindDirection(), + weather.getWindLevel()); + } + } else { + System.out.println("未获取到长沙天气数据"); + } + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/analysis/MovieAnalyzer.class b/project2/src/main/java/com/crawler/analysis/MovieAnalyzer.class new file mode 100644 index 0000000..a634245 Binary files /dev/null and b/project2/src/main/java/com/crawler/analysis/MovieAnalyzer.class differ diff --git a/project2/src/main/java/com/crawler/analysis/MovieAnalyzer.java b/project2/src/main/java/com/crawler/analysis/MovieAnalyzer.java new file mode 100644 index 0000000..9aff4a3 --- /dev/null +++ b/project2/src/main/java/com/crawler/analysis/MovieAnalyzer.java @@ -0,0 +1,119 @@ +package com.crawler.analysis; + +import com.crawler.chart.model.Movie; + +import java.util.*; +import java.util.stream.Collectors; + +public class MovieAnalyzer { + // 统计电影评分分布 + public static Map analyzeRatingDistribution(List movieList) { + Map ratingMap = new TreeMap<>(); + + for (Movie movie : movieList) { + if (movie != null) { + double rating = movie.getRating(); + ratingMap.put(rating, ratingMap.getOrDefault(rating, 0) + 1); + } + } + + return ratingMap; + } + + // 统计电影年份分布 + public static Map analyzeYearDistribution(List movieList) { + Map yearMap = new TreeMap<>(); + + for (Movie movie : movieList) { + if (movie != null && movie.getYear() != null) { + String year = movie.getYear(); + yearMap.put(year, yearMap.getOrDefault(year, 0) + 1); + } + } + + return yearMap; + } + + // 统计电影类型分布 + public static Map analyzeGenreDistribution(List movieList) { + Map genreMap = new HashMap<>(); + + for (Movie movie : movieList) { + if (movie != null && movie.getGenre() != null) { + String genre = movie.getGenre(); + genreMap.put(genre, genreMap.getOrDefault(genre, 0) + 1); + } + } + + return genreMap; + } + + // 统计电影国家/地区分布 + public static Map analyzeCountryDistribution(List movieList) { + Map countryMap = new HashMap<>(); + + for (Movie movie : movieList) { + if (movie != null && movie.getCountry() != null) { + String country = movie.getCountry(); + countryMap.put(country, countryMap.getOrDefault(country, 0) + 1); + } + } + + return countryMap; + } + + // 分析导演作品数量排行 + public static Map analyzeDirectorWorks(List movieList) { + Map directorMap = new HashMap<>(); + + for (Movie movie : movieList) { + if (movie != null && movie.getDirector() != null) { + String director = movie.getDirector(); + directorMap.put(director, directorMap.getOrDefault(director, 0) + 1); + } + } + + // 按作品数量排序 + return directorMap.entrySet().stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .collect(Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (e1, e2) -> e1, + LinkedHashMap::new + )); + } + + // 计算平均评分 + public static double calculateAverageRating(List movieList) { + return movieList.stream() + .filter(Objects::nonNull) + .mapToDouble(Movie::getRating) + .average() + .orElse(0.0); + } + + // 计算评分与年份的相关性(简单计算) + public static Map analyzeYearRatingCorrelation(List movieList) { + Map> yearRatingsMap = new TreeMap<>(); + + for (Movie movie : movieList) { + if (movie != null && movie.getYear() != null) { + String year = movie.getYear(); + double rating = movie.getRating(); + yearRatingsMap.computeIfAbsent(year, k -> new ArrayList<>()).add(rating); + } + } + + // 计算每年的平均评分 + Map yearAverageRatingMap = new TreeMap<>(); + for (Map.Entry> entry : yearRatingsMap.entrySet()) { + String year = entry.getKey(); + List ratings = entry.getValue(); + double average = ratings.stream().mapToDouble(Double::doubleValue).average().orElse(0.0); + yearAverageRatingMap.put(year, average); + } + + return yearAverageRatingMap; + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/chart/ChartGenerator.class b/project2/src/main/java/com/crawler/chart/ChartGenerator.class new file mode 100644 index 0000000..27f984a Binary files /dev/null and b/project2/src/main/java/com/crawler/chart/ChartGenerator.class differ diff --git a/project2/src/main/java/com/crawler/chart/ChartGenerator.java b/project2/src/main/java/com/crawler/chart/ChartGenerator.java new file mode 100644 index 0000000..739481b --- /dev/null +++ b/project2/src/main/java/com/crawler/chart/ChartGenerator.java @@ -0,0 +1,8 @@ +package com.crawler.chart; + +import com.crawler.chart.model.Movie; + +public interface ChartGenerator { + void generateChart(Movie[] movies); + String getChartName(); +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/chart/ChartManager.class b/project2/src/main/java/com/crawler/chart/ChartManager.class new file mode 100644 index 0000000..471d347 Binary files /dev/null and b/project2/src/main/java/com/crawler/chart/ChartManager.class differ diff --git a/project2/src/main/java/com/crawler/chart/ChartManager.java b/project2/src/main/java/com/crawler/chart/ChartManager.java new file mode 100644 index 0000000..c7b8d0e --- /dev/null +++ b/project2/src/main/java/com/crawler/chart/ChartManager.java @@ -0,0 +1,30 @@ +package com.crawler.chart; + +import com.crawler.chart.impl.GenreDistributionChartGenerator; +import com.crawler.chart.impl.RatingDistributionChartGenerator; +import com.crawler.chart.impl.YearDistributionChartGenerator; +import com.crawler.chart.impl.YearRatingChartGenerator; +import com.crawler.chart.model.Movie; + +import java.util.ArrayList; +import java.util.List; + +public class ChartManager { + private List chartGenerators; + + public ChartManager() { + chartGenerators = new ArrayList<>(); + } + + public void addChartGenerator(ChartGenerator generator) { + chartGenerators.add(generator); + } + + public void generateAllCharts(List movies) { + Movie[] movieArray = movies.toArray(new Movie[0]); + for (ChartGenerator generator : chartGenerators) { + System.out.println("生成图表: " + generator.getChartName()); + generator.generateChart(movieArray); + } + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.class b/project2/src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.class new file mode 100644 index 0000000..b718cb5 Binary files /dev/null and b/project2/src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.class differ diff --git a/project2/src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.java b/project2/src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.java new file mode 100644 index 0000000..9ace338 --- /dev/null +++ b/project2/src/main/java/com/crawler/chart/impl/GenreDistributionChartGenerator.java @@ -0,0 +1,26 @@ +package com.crawler.chart.impl; + +import com.crawler.chart.ChartGenerator; +import com.crawler.chart.model.Movie; +import com.crawler.ui.MovieResultDisplay; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class GenreDistributionChartGenerator implements ChartGenerator { + @Override + public void generateChart(Movie[] movies) { + List movieList = List.of(movies); + try { + MovieResultDisplay.generateGenreDistributionChart(movieList); + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Override + public String getChartName() { + return "Genre Distribution Chart"; + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.class b/project2/src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.class new file mode 100644 index 0000000..edded97 Binary files /dev/null and b/project2/src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.class differ diff --git a/project2/src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.java b/project2/src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.java new file mode 100644 index 0000000..305731f --- /dev/null +++ b/project2/src/main/java/com/crawler/chart/impl/RatingDistributionChartGenerator.java @@ -0,0 +1,27 @@ +package com.crawler.chart.impl; + +import com.crawler.chart.ChartGenerator; +import com.crawler.chart.model.Movie; +import com.crawler.ui.MovieResultDisplay; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +public class RatingDistributionChartGenerator implements ChartGenerator { + @Override + public void generateChart(Movie[] movies) { + List movieList = List.of(movies); + try { + MovieResultDisplay.generateRatingDistributionChart(movieList); + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Override + public String getChartName() { + return "Rating Distribution Chart"; + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.class b/project2/src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.class new file mode 100644 index 0000000..d5fd445 Binary files /dev/null and b/project2/src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.class differ diff --git a/project2/src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.java b/project2/src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.java new file mode 100644 index 0000000..49a4351 --- /dev/null +++ b/project2/src/main/java/com/crawler/chart/impl/YearDistributionChartGenerator.java @@ -0,0 +1,25 @@ +package com.crawler.chart.impl; + +import com.crawler.chart.ChartGenerator; +import com.crawler.chart.model.Movie; +import com.crawler.ui.MovieResultDisplay; + +import java.io.IOException; +import java.util.List; + +public class YearDistributionChartGenerator implements ChartGenerator { + @Override + public void generateChart(Movie[] movies) { + List movieList = List.of(movies); + try { + MovieResultDisplay.generateYearDistributionChart(movieList); + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Override + public String getChartName() { + return "Year Distribution Chart"; + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.class b/project2/src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.class new file mode 100644 index 0000000..94f64b1 Binary files /dev/null and b/project2/src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.class differ diff --git a/project2/src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.java b/project2/src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.java new file mode 100644 index 0000000..c514eeb --- /dev/null +++ b/project2/src/main/java/com/crawler/chart/impl/YearRatingChartGenerator.java @@ -0,0 +1,25 @@ +package com.crawler.chart.impl; + +import com.crawler.chart.ChartGenerator; +import com.crawler.chart.model.Movie; +import com.crawler.ui.MovieResultDisplay; + +import java.io.IOException; +import java.util.List; + +public class YearRatingChartGenerator implements ChartGenerator { + @Override + public void generateChart(Movie[] movies) { + List movieList = List.of(movies); + try { + MovieResultDisplay.generateYearRatingChart(movieList); + } catch (IOException e) { + e.printStackTrace(); + } + } + + @Override + public String getChartName() { + return "Year Rating Correlation Chart"; + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/chart/model/Movie.class b/project2/src/main/java/com/crawler/chart/model/Movie.class new file mode 100644 index 0000000..ce439cc Binary files /dev/null and b/project2/src/main/java/com/crawler/chart/model/Movie.class differ diff --git a/project2/src/main/java/com/crawler/chart/model/Movie.java b/project2/src/main/java/com/crawler/chart/model/Movie.java new file mode 100644 index 0000000..a169678 --- /dev/null +++ b/project2/src/main/java/com/crawler/chart/model/Movie.java @@ -0,0 +1,108 @@ +package com.crawler.chart.model; + +public class Movie { + private int rank; + private String title; + private double rating; + private int ratingPeople; + private String director; + private String actors; + private String year; + private String country; + private String genre; + private String quote; + + // Getters and Setters + public int getRank() { + return rank; + } + + public void setRank(int rank) { + this.rank = rank; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public double getRating() { + return rating; + } + + public void setRating(double rating) { + this.rating = rating; + } + + public int getRatingPeople() { + return ratingPeople; + } + + public void setRatingPeople(int ratingPeople) { + this.ratingPeople = ratingPeople; + } + + public String getDirector() { + return director; + } + + public void setDirector(String director) { + this.director = director; + } + + public String getActors() { + return actors; + } + + public void setActors(String actors) { + this.actors = actors; + } + + public String getYear() { + return year; + } + + public void setYear(String year) { + this.year = year; + } + + public String getCountry() { + return country; + } + + public void setCountry(String country) { + this.country = country; + } + + public String getGenre() { + return genre; + } + + public void setGenre(String genre) { + this.genre = genre; + } + + public String getQuote() { + return quote; + } + + public void setQuote(String quote) { + this.quote = quote; + } + + @Override + public String toString() { + return "Movie{" + + "rank=" + rank + + ", title='" + title + '\'' + + ", rating=" + rating + + ", ratingPeople=" + ratingPeople + + ", director='" + director + '\'' + + ", year='" + year + '\'' + + ", genre='" + genre + '\'' + + '}'; + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/chart/model/SoftRanking.java b/project2/src/main/java/com/crawler/chart/model/SoftRanking.java new file mode 100644 index 0000000..2280b28 --- /dev/null +++ b/project2/src/main/java/com/crawler/chart/model/SoftRanking.java @@ -0,0 +1,96 @@ +package com.crawler.chart.model; + +public class SoftRanking { + private int rank; + private String universityName; + private String province; + private String type; + private int score; + private int alumniScore; + private int scientificResearchScore; + private int studentQualityScore; + private int resourceScore; + + public int getRank() { + return rank; + } + + public void setRank(int rank) { + this.rank = rank; + } + + public String getUniversityName() { + return universityName; + } + + public void setUniversityName(String universityName) { + this.universityName = universityName; + } + + public String getProvince() { + return province; + } + + public void setProvince(String province) { + this.province = province; + } + + public String getType() { + return type; + } + + public void setType(String type) { + this.type = type; + } + + public int getScore() { + return score; + } + + public void setScore(int score) { + this.score = score; + } + + public int getAlumniScore() { + return alumniScore; + } + + public void setAlumniScore(int alumniScore) { + this.alumniScore = alumniScore; + } + + public int getScientificResearchScore() { + return scientificResearchScore; + } + + public void setScientificResearchScore(int scientificResearchScore) { + this.scientificResearchScore = scientificResearchScore; + } + + public int getStudentQualityScore() { + return studentQualityScore; + } + + public void setStudentQualityScore(int studentQualityScore) { + this.studentQualityScore = studentQualityScore; + } + + public int getResourceScore() { + return resourceScore; + } + + public void setResourceScore(int resourceScore) { + this.resourceScore = resourceScore; + } + + @Override + public String toString() { + return "SoftRanking{" + + "rank=" + rank + + ", universityName='" + universityName + '\'' + + ", province='" + province + '\'' + + ", type='" + type + '\'' + + ", score=" + score + + '}'; + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/chart/model/WeatherData.java b/project2/src/main/java/com/crawler/chart/model/WeatherData.java new file mode 100644 index 0000000..2312943 --- /dev/null +++ b/project2/src/main/java/com/crawler/chart/model/WeatherData.java @@ -0,0 +1,77 @@ +package com.crawler.chart.model; + +public class WeatherData { + private String date; + private String week; + private String weather; + private String highTemp; + private String lowTemp; + private String windDirection; + private String windLevel; + + public String getDate() { + return date; + } + + public void setDate(String date) { + this.date = date; + } + + public String getWeek() { + return week; + } + + public void setWeek(String week) { + this.week = week; + } + + public String getWeather() { + return weather; + } + + public void setWeather(String weather) { + this.weather = weather; + } + + public String getHighTemp() { + return highTemp; + } + + public void setHighTemp(String highTemp) { + this.highTemp = highTemp; + } + + public String getLowTemp() { + return lowTemp; + } + + public void setLowTemp(String lowTemp) { + this.lowTemp = lowTemp; + } + + public String getWindDirection() { + return windDirection; + } + + public void setWindDirection(String windDirection) { + this.windDirection = windDirection; + } + + public String getWindLevel() { + return windLevel; + } + + public void setWindLevel(String windLevel) { + this.windLevel = windLevel; + } + + @Override + public String toString() { + return "WeatherData{" + + "date='" + date + '\'' + + ", weather='" + weather + '\'' + + ", highTemp='" + highTemp + '\'' + + ", lowTemp='" + lowTemp + '\'' + + '}'; + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/spider/DoubanSpider.class b/project2/src/main/java/com/crawler/spider/DoubanSpider.class new file mode 100644 index 0000000..8b49dc9 Binary files /dev/null and b/project2/src/main/java/com/crawler/spider/DoubanSpider.class differ diff --git a/project2/src/main/java/com/crawler/spider/DoubanSpider.java b/project2/src/main/java/com/crawler/spider/DoubanSpider.java new file mode 100644 index 0000000..cfb63b6 --- /dev/null +++ b/project2/src/main/java/com/crawler/spider/DoubanSpider.java @@ -0,0 +1,206 @@ +package com.crawler.spider; + +import com.crawler.chart.model.Movie; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.*; + +public class DoubanSpider { + private static final String BASE_URL = "https://movie.douban.com/top250"; + private static final int MAX_PAGES = 12; + private static final int THREAD_POOL_SIZE = 3; + private static final int REQUEST_DELAY = 1000; + + public List crawlMovies() { + List movieList = new ArrayList<>(); + ExecutorService executorService = Executors.newFixedThreadPool(THREAD_POOL_SIZE); + List>> futures = new ArrayList<>(); + + try { + for (int page = 0; page < MAX_PAGES; page++) { + final int currentPage = page; + futures.add(executorService.submit(() -> { + try { + Thread.sleep(REQUEST_DELAY); + return crawlPage(currentPage); + } catch (Exception e) { + e.printStackTrace(); + return new ArrayList<>(); + } + })); + } + + for (Future> future : futures) { + try { + movieList.addAll(future.get()); + } catch (Exception e) { + e.printStackTrace(); + } + } + } finally { + executorService.shutdown(); + } + + return movieList; + } + + private List crawlPage(int page) throws IOException { + List movieList = new ArrayList<>(); + String url = BASE_URL + "?start=" + (page * 25); + System.out.println("爬取页面: " + url); + + Document document = Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") + .timeout(10000) + .get(); + + System.out.println("页面标题: " + document.title()); + + // 选择电影条目 + Elements movieItems = document.select(".grid_view li"); + System.out.println("找到电影条目数: " + movieItems.size()); + + for (Element item : movieItems) { + Movie movie = parseMovie(item); + if (movie != null) { + movieList.add(movie); + } + } + + System.out.println("页面" + (page + 1) + "爬取成功,获取电影数: " + movieList.size()); + return movieList; + } + + private Movie parseMovie(Element item) { + Movie movie = new Movie(); + + try { + // 排名 + Element rankElement = item.selectFirst(".pic em"); + if (rankElement != null) { + movie.setRank(Integer.parseInt(rankElement.text().trim())); + } + + // 标题 + Element titleElement = item.selectFirst(".title"); + if (titleElement != null) { + movie.setTitle(titleElement.text().trim()); + } + + // 评分 + Element ratingElement = item.selectFirst(".rating_num"); + if (ratingElement != null) { + movie.setRating(Double.parseDouble(ratingElement.text().trim())); + } + + // 评价人数 + Element ratingPeopleElement = item.selectFirst(".star span:nth-child(4)"); + if (ratingPeopleElement != null) { + String ratingPeople = ratingPeopleElement.text().trim(); + movie.setRatingPeople(Integer.parseInt(ratingPeople.replaceAll("[^0-9]", ""))); + } + + // 导演和演员 + Element infoElement = item.selectFirst(".bd p:first-child"); + if (infoElement != null) { + String info = infoElement.text().trim(); + + // 提取导演 + if (info.contains("导演:")) { + int directorStart = info.indexOf("导演:") + 3; + int directorEnd = info.indexOf("主演:"); + if (directorEnd == -1) { + directorEnd = info.indexOf(" "); + // 找到第一个数字年份的位置 + for (int i = 0; i < info.length(); i++) { + if (Character.isDigit(info.charAt(i))) { + directorEnd = i; + break; + } + } + } + if (directorEnd != -1) { + movie.setDirector(info.substring(directorStart, directorEnd).trim()); + } + } + + // 提取主演 + if (info.contains("主演:")) { + int actorsStart = info.indexOf("主演:") + 3; + int actorsEnd = info.length(); + // 找到第一个数字年份的位置 + for (int i = actorsStart; i < info.length(); i++) { + if (Character.isDigit(info.charAt(i))) { + actorsEnd = i; + break; + } + } + movie.setActors(info.substring(actorsStart, actorsEnd).trim()); + } + + // 提取年份、国家/地区和类型 + // 找到年份的开始位置(第一个数字) + int yearStart = -1; + for (int i = 0; i < info.length(); i++) { + if (Character.isDigit(info.charAt(i))) { + yearStart = i; + break; + } + } + + if (yearStart != -1) { + // 提取年份(4位数字) + if (yearStart + 4 <= info.length()) { + String year = info.substring(yearStart, yearStart + 4); + if (year.matches("\\d{4}")) { + movie.setYear(year); + } + } + + // 提取国家/地区和类型 + int slashIndex = info.indexOf("/", yearStart); + if (slashIndex != -1) { + // 提取国家/地区 + int nextSlashIndex = info.indexOf("/", slashIndex + 1); + if (nextSlashIndex != -1) { + String country = info.substring(slashIndex + 1, nextSlashIndex).trim(); + movie.setCountry(country); + + // 提取类型 + String genre = info.substring(nextSlashIndex + 1).trim(); + // 取第一个类型 + if (!genre.isEmpty()) { + String[] genres = genre.split(" "); + if (genres.length > 0) { + movie.setGenre(genres[0]); + } + } + } + } + } + } + + // 简介 + Element quoteElement = item.selectFirst(".inq"); + if (quoteElement != null) { + movie.setQuote(quoteElement.text().trim()); + } + + // 过滤无效电影 + if (movie.getTitle() == null || movie.getTitle().isEmpty()) { + return null; + } + + return movie; + } catch (Exception e) { + e.printStackTrace(); + return null; + } + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/spider/SoftSpider.java b/project2/src/main/java/com/crawler/spider/SoftSpider.java new file mode 100644 index 0000000..94dd727 --- /dev/null +++ b/project2/src/main/java/com/crawler/spider/SoftSpider.java @@ -0,0 +1,104 @@ +package com.crawler.spider; + +import com.crawler.chart.model.SoftRanking; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class SoftSpider { + private static final String BASE_URL = "https://www.shanghairanking.cn/rankings/bcur/2026"; + private static final int MAX_RANKINGS = 30; + private static final int PAGE_SIZE = 50; + private static final int REQUEST_DELAY = 1000; + + public List crawlSoftRanking() { + List rankingList = new ArrayList<>(); + int totalPages = (int) Math.ceil((double) MAX_RANKINGS / PAGE_SIZE); + + try { + System.out.println("爬取软科中国大学排名前" + MAX_RANKINGS + "名..."); + + for (int page = 0; page < totalPages; page++) { + String url = BASE_URL + "?page=" + (page + 1); + System.out.println("爬取页面: " + url); + + Document document = Jsoup.connect(url) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36") + .timeout(30000) + .get(); + + Elements rows = document.select(".rk-table tbody tr"); + System.out.println("页面" + (page + 1) + "找到条目数: " + rows.size()); + + for (Element row : rows) { + SoftRanking ranking = parseRanking(row); + if (ranking != null) { + rankingList.add(ranking); + if (rankingList.size() >= MAX_RANKINGS) { + break; + } + } + } + + if (rankingList.size() >= MAX_RANKINGS) { + break; + } + + Thread.sleep(REQUEST_DELAY); + } + + System.out.println("爬取成功,获取排名数: " + rankingList.size()); + } catch (IOException | InterruptedException e) { + e.printStackTrace(); + } + + return rankingList; + } + + private SoftRanking parseRanking(Element row) { + SoftRanking ranking = new SoftRanking(); + + try { + Element rankElement = row.selectFirst("td:nth-child(1)"); + if (rankElement != null) { + String rankText = rankElement.text().trim(); + ranking.setRank(Integer.parseInt(rankText)); + } + + Element nameElement = row.selectFirst("td:nth-child(2) .name-cn"); + if (nameElement != null) { + ranking.setUniversityName(nameElement.text().trim()); + } + + Element provinceElement = row.selectFirst("td:nth-child(3)"); + if (provinceElement != null) { + ranking.setProvince(provinceElement.text().trim()); + } + + Element typeElement = row.selectFirst("td:nth-child(4)"); + if (typeElement != null) { + ranking.setType(typeElement.text().trim()); + } + + Element scoreElement = row.selectFirst("td:nth-child(5)"); + if (scoreElement != null) { + String scoreText = scoreElement.text().trim(); + ranking.setScore((int) Double.parseDouble(scoreText)); + } + + if (ranking.getUniversityName() == null || ranking.getUniversityName().isEmpty()) { + return null; + } + + return ranking; + } catch (Exception e) { + e.printStackTrace(); + return null; + } + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/spider/WeatherSpider.java b/project2/src/main/java/com/crawler/spider/WeatherSpider.java new file mode 100644 index 0000000..0feb0fc --- /dev/null +++ b/project2/src/main/java/com/crawler/spider/WeatherSpider.java @@ -0,0 +1,201 @@ +package com.crawler.spider; + +import com.crawler.chart.model.WeatherData; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class WeatherSpider { + // 中国气象局 - 长沙天气页面 + private static final String WEATHER_URL = "https://weather.cma.cn/web/weather/57679.htm"; + + public List crawlWeather() { + List weatherList = new ArrayList<>(); + + try { + System.out.println("爬取长沙天气数据(中国气象局)..."); + + // 尝试中国气象局网站 + weatherList = crawlFromCMA(); + + // 如果爬取失败,生成模拟数据 + if (weatherList.isEmpty()) { + System.out.println("气象局网站爬取失败,生成模拟天气数据..."); + weatherList = generateMockWeatherData(); + } + + System.out.println("爬取成功,获取天气数据数: " + weatherList.size()); + } catch (Exception e) { + System.out.println("爬取天气数据时发生错误: " + e.getMessage()); + e.printStackTrace(); + // 生成模拟数据作为备选 + weatherList = generateMockWeatherData(); + } + + return weatherList; + } + + private List crawlFromCMA() { + List weatherList = new ArrayList<>(); + try { + Document document = Jsoup.connect(WEATHER_URL) + .userAgent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36") + .timeout(30000) + .get(); + + System.out.println("页面标题: " + document.title()); + + // 获取页面文本内容 + String bodyText = document.body().text(); + System.out.println("页面内容预览(前1000字符): " + bodyText.substring(0, Math.min(1000, bodyText.length()))); + + // 解析7天天气预报数据 + // 格式:星期三04/29 小雨 北风 3~4级 18℃9℃ + Pattern pattern = Pattern.compile("(星期[一二三四五六日])(\\d{2}/\\d{2})\\s*(\\S+)\\s*(\\S+)\\s*(\\S+)\\s*(\\d+℃)(\\d+℃)"); + Matcher matcher = pattern.matcher(bodyText); + + while (matcher.find()) { + WeatherData weather = new WeatherData(); + weather.setWeek(matcher.group(1)); + weather.setDate(matcher.group(2)); + weather.setWeather(matcher.group(3)); + weather.setWindDirection(matcher.group(4)); + weather.setWindLevel(matcher.group(5)); + weather.setLowTemp(matcher.group(7)); + weather.setHighTemp(matcher.group(6)); + + weatherList.add(weather); + System.out.println("解析到天气: " + weather.getDate() + " " + weather.getWeek() + " " + weather.getWeather() + " " + weather.getLowTemp() + "-" + weather.getHighTemp()); + } + + // 如果正则解析失败,尝试其他方法 + if (weatherList.isEmpty()) { + weatherList = parseFromElements(document); + } + + } catch (IOException e) { + System.out.println("访问气象局网站失败: " + e.getMessage()); + } + return weatherList; + } + + private List parseFromElements(Document document) { + List weatherList = new ArrayList<>(); + + // 尝试查找包含日期的元素 + Elements dateElements = document.select("*:contains(星期)"); + System.out.println("找到包含星期的元素数: " + dateElements.size()); + + for (Element element : dateElements) { + String text = element.text(); + if (text.contains("星期") && text.contains("℃")) { + // 提取日期和天气信息 + WeatherData weather = parseWeatherText(text); + if (weather != null) { + weatherList.add(weather); + } + } + } + + return weatherList; + } + + private WeatherData parseWeatherText(String text) { + WeatherData weather = new WeatherData(); + + try { + // 提取星期 + Pattern weekPattern = Pattern.compile("星期[一二三四五六日]"); + Matcher weekMatcher = weekPattern.matcher(text); + if (weekMatcher.find()) { + weather.setWeek(weekMatcher.group()); + } + + // 提取日期 (格式: 04/29) + Pattern datePattern = Pattern.compile("\\d{2}/\\d{2}"); + Matcher dateMatcher = datePattern.matcher(text); + if (dateMatcher.find()) { + weather.setDate(dateMatcher.group()); + } + + // 提取天气状况 + String[] conditions = {"晴", "多云", "阴", "小雨", "中雨", "大雨", "雷阵雨", "雾", "霾"}; + for (String condition : conditions) { + if (text.contains(condition)) { + weather.setWeather(condition); + break; + } + } + + // 提取温度 + Pattern tempPattern = Pattern.compile("(\\d+℃)"); + Matcher tempMatcher = tempPattern.matcher(text); + List temps = new ArrayList<>(); + while (tempMatcher.find()) { + temps.add(tempMatcher.group()); + } + if (temps.size() >= 2) { + weather.setLowTemp(temps.get(0)); + weather.setHighTemp(temps.get(1)); + } + + // 提取风向 + String[] directions = {"北风", "南风", "东风", "西风", "东北风", "东南风", "西北风", "西南风"}; + for (String direction : directions) { + if (text.contains(direction)) { + weather.setWindDirection(direction); + break; + } + } + + // 提取风力 + Pattern windPattern = Pattern.compile("(\\d+~\\d+级|微风)"); + Matcher windMatcher = windPattern.matcher(text); + if (windMatcher.find()) { + weather.setWindLevel(windMatcher.group()); + } + + if (weather.getDate() != null && !weather.getDate().isEmpty()) { + return weather; + } + } catch (Exception e) { + // 忽略解析错误 + } + return null; + } + + private List generateMockWeatherData() { + List weatherList = new ArrayList<>(); + LocalDate now = LocalDate.now(); + String[] weeks = {"星期日", "星期一", "星期二", "星期三", "星期四", "星期五", "星期六"}; + String[] weathers = {"晴", "多云", "晴转多云", "多云转晴", "阴", "小雨", "阵雨"}; + + for (int i = 0; i < 7; i++) { + LocalDate date = now.plusDays(i); + WeatherData day = new WeatherData(); + day.setDate(date.format(DateTimeFormatter.ofPattern("MM/dd"))); + // getDayOfWeek().getValue() 返回 1-7,需要转换为数组索引 0-6 + int dayOfWeekValue = date.getDayOfWeek().getValue(); + int weekIndex = dayOfWeekValue == 7 ? 0 : dayOfWeekValue; + day.setWeek(weeks[weekIndex]); + day.setWeather(weathers[i % weathers.length]); + int high = 24 + (int)(Math.random() * 6); + int low = 15 + (int)(Math.random() * 5); + day.setHighTemp(String.valueOf(high) + "℃"); + day.setLowTemp(String.valueOf(low) + "℃"); + day.setWindDirection(i % 2 == 0 ? "南风" : "北风"); + day.setWindLevel("2-3级"); + weatherList.add(day); + } + return weatherList; + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/ui/MovieResultDisplay.class b/project2/src/main/java/com/crawler/ui/MovieResultDisplay.class new file mode 100644 index 0000000..87004a1 Binary files /dev/null and b/project2/src/main/java/com/crawler/ui/MovieResultDisplay.class differ diff --git a/project2/src/main/java/com/crawler/ui/MovieResultDisplay.java b/project2/src/main/java/com/crawler/ui/MovieResultDisplay.java new file mode 100644 index 0000000..0b111d1 --- /dev/null +++ b/project2/src/main/java/com/crawler/ui/MovieResultDisplay.java @@ -0,0 +1,216 @@ +package com.crawler.ui; + +import com.crawler.analysis.MovieAnalyzer; +import com.crawler.chart.model.Movie; +import org.jfree.chart.ChartFactory; +import org.jfree.chart.ChartUtils; +import org.jfree.chart.JFreeChart; +import org.jfree.chart.plot.PlotOrientation; +import org.jfree.data.category.DefaultCategoryDataset; +import org.jfree.data.general.DefaultPieDataset; +import org.jfree.data.statistics.HistogramDataset; +import org.jfree.chart.plot.PiePlot; +import org.jfree.chart.labels.StandardPieSectionLabelGenerator; +import java.text.DecimalFormat; +import java.text.NumberFormat; + +import java.io.File; +import java.io.IOException; +import java.util.List; +import java.util.Map; + +public class MovieResultDisplay { + // 控制台输出统计结果 + public static void displayResults(List movieList) { + System.out.println("\n=== 电影数据统计结果 ==="); + System.out.println("爬取电影总数: " + movieList.size()); + + // 平均评分 + double averageRating = MovieAnalyzer.calculateAverageRating(movieList); + System.out.printf("平均评分: %.2f\n", averageRating); + + // 电影评分分布 + System.out.println("\n=== 电影评分分布 ==="); + Map ratingDistribution = MovieAnalyzer.analyzeRatingDistribution(movieList); + for (Map.Entry entry : ratingDistribution.entrySet()) { + System.out.printf("评分 %.1f: %d部\n", entry.getKey(), entry.getValue()); + } + + // 电影年份分布(最近20年) + System.out.println("\n=== 电影年份分布(最近20年)==="); + Map yearDistribution = MovieAnalyzer.analyzeYearDistribution(movieList); + int count = 0; + for (Map.Entry entry : yearDistribution.entrySet()) { + if (count >= yearDistribution.size() - 20) { // 只显示最近20年 + System.out.printf("%s年: %d部\n", entry.getKey(), entry.getValue()); + } + count++; + } + + // 电影类型分布 + System.out.println("\n=== 电影类型分布 ==="); + Map genreDistribution = MovieAnalyzer.analyzeGenreDistribution(movieList); + genreDistribution.entrySet().stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .limit(10) // 只显示前10种类型 + .forEach(entry -> System.out.printf("%-10s: %d部\n", entry.getKey(), entry.getValue())); + + // 导演作品数量排行 + System.out.println("\n=== 导演作品数量排行 ==="); + Map directorWorks = MovieAnalyzer.analyzeDirectorWorks(movieList); + count = 0; + for (Map.Entry entry : directorWorks.entrySet()) { + if (count < 10) { // 只显示前10位导演 + System.out.printf("%-20s: %d部\n", entry.getKey(), entry.getValue()); + count++; + } else { + break; + } + } + + // 评分与年份相关性 + System.out.println("\n=== 评分与年份相关性 ==="); + Map yearRatingCorrelation = MovieAnalyzer.analyzeYearRatingCorrelation(movieList); + for (Map.Entry entry : yearRatingCorrelation.entrySet()) { + System.out.printf("%s年: 平均评分 %.2f\n", entry.getKey(), entry.getValue()); + } + } + + // 生成电影评分分布直方图 + public static void generateRatingDistributionChart(List movieList) throws IOException { + Map ratingDistribution = MovieAnalyzer.analyzeRatingDistribution(movieList); + DefaultCategoryDataset dataset = new DefaultCategoryDataset(); + + for (Map.Entry entry : ratingDistribution.entrySet()) { + dataset.addValue(entry.getValue(), "Count", entry.getKey().toString()); + } + + JFreeChart chart = ChartFactory.createBarChart( + "Movie Rating Distribution", + "Rating", + "Count", + dataset, + PlotOrientation.VERTICAL, + true, + true, + false + ); + + ChartUtils.saveChartAsPNG(new File("movie_rating_distribution.png"), chart, 800, 600); + System.out.println("电影评分分布图表已保存为 movie_rating_distribution.png"); + } + + // 生成电影年份分布折线图 + public static void generateYearDistributionChart(List movieList) throws IOException { + Map yearDistribution = MovieAnalyzer.analyzeYearDistribution(movieList); + DefaultCategoryDataset dataset = new DefaultCategoryDataset(); + + System.out.println("年份分布数据:"); + for (Map.Entry entry : yearDistribution.entrySet()) { + System.out.println("年份: '" + entry.getKey() + "', 数量: " + entry.getValue()); + // 尝试提取年份数字 + String year = entry.getKey(); + // 提取4位数字作为年份 + String yearMatch = year.replaceAll("[^0-9]", ""); + if (yearMatch.length() >= 4) { + yearMatch = yearMatch.substring(0, 4); + dataset.addValue(entry.getValue(), "Count", yearMatch); + } + } + + JFreeChart chart = ChartFactory.createLineChart( + "Movie Year Distribution", + "Year", + "Count", + dataset, + PlotOrientation.VERTICAL, + true, + true, + false + ); + + ChartUtils.saveChartAsPNG(new File("movie_year_distribution.png"), chart, 800, 600); + System.out.println("电影年份分布图表已保存为 movie_year_distribution.png"); + } + + // 生成电影类型分布饼图 + public static void generateGenreDistributionChart(List movieList) throws IOException { + Map genreDistribution = MovieAnalyzer.analyzeGenreDistribution(movieList); + DefaultPieDataset dataset = new DefaultPieDataset(); + + // 只显示前10种类型 + genreDistribution.entrySet().stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .limit(10) + .forEach(entry -> { + // 使用英文标签避免中文显示问题 + String englishLabel = getEnglishGenre(entry.getKey()) + " (" + entry.getValue() + ")"; + dataset.setValue(englishLabel, entry.getValue()); + }); + + JFreeChart chart = ChartFactory.createPieChart( + "Movie Genre Distribution", // 使用英文标题 + dataset, + true, // 显示图例 + true, // 显示工具提示 + false // 不显示URL + ); + + ChartUtils.saveChartAsPNG(new File("movie_genre_distribution.png"), chart, 800, 600); + System.out.println("电影类型分布图表已保存为 movie_genre_distribution.png"); + } + + // 将中文类型转换为英文 + private static String getEnglishGenre(String chineseGenre) { + switch (chineseGenre) { + case "冒险": return "Adventure"; + case "奇幻": return "Fantasy"; + case "爱情": return "Romance"; + case "惊悚": return "Thriller"; + case "动画": return "Animation"; + case "悬疑": return "Mystery"; + case "家庭": return "Family"; + case "犯罪": return "Crime"; + case "同性": return "LGBTQ+"; + case "历史": return "History"; + case "剧情": return "Drama"; + case "动作": return "Action"; + case "喜剧": return "Comedy"; + case "科幻": return "Sci-Fi"; + default: return chineseGenre; + } + } + + // 生成评分与年份相关性图表 + public static void generateYearRatingChart(List movieList) throws IOException { + Map yearRatingCorrelation = MovieAnalyzer.analyzeYearRatingCorrelation(movieList); + DefaultCategoryDataset dataset = new DefaultCategoryDataset(); + + System.out.println("评分与年份相关性数据:"); + for (Map.Entry entry : yearRatingCorrelation.entrySet()) { + System.out.println("年份: '" + entry.getKey() + "', 平均评分: " + entry.getValue()); + // 尝试提取年份数字 + String year = entry.getKey(); + // 提取4位数字作为年份 + String yearMatch = year.replaceAll("[^0-9]", ""); + if (yearMatch.length() >= 4) { + yearMatch = yearMatch.substring(0, 4); + dataset.addValue(entry.getValue(), "Avg Rating", yearMatch); + } + } + + JFreeChart chart = ChartFactory.createLineChart( + "Year vs Rating Correlation", + "Year", + "Average Rating", + dataset, + PlotOrientation.VERTICAL, + true, + true, + false + ); + + ChartUtils.saveChartAsPNG(new File("movie_year_rating.png"), chart, 800, 600); + System.out.println("评分与年份相关性图表已保存为 movie_year_rating.png"); + } +} \ No newline at end of file diff --git a/project2/src/main/java/com/crawler/utils/DataUtils.class b/project2/src/main/java/com/crawler/utils/DataUtils.class new file mode 100644 index 0000000..50cc932 Binary files /dev/null and b/project2/src/main/java/com/crawler/utils/DataUtils.class differ diff --git a/project2/src/main/java/com/crawler/utils/DataUtils.java b/project2/src/main/java/com/crawler/utils/DataUtils.java new file mode 100644 index 0000000..2d11480 --- /dev/null +++ b/project2/src/main/java/com/crawler/utils/DataUtils.java @@ -0,0 +1,139 @@ +package com.crawler.utils; + +import com.crawler.chart.model.Movie; +import com.crawler.chart.model.SoftRanking; +import com.crawler.chart.model.WeatherData; + +import java.io.FileWriter; +import java.io.IOException; +import java.util.List; + +public class DataUtils { + // 清洗电影数据 + public static Movie cleanMovie(Movie movie) { + if (movie == null) return null; + + // 清洗标题 + if (movie.getTitle() != null) { + movie.setTitle(movie.getTitle().trim().replaceAll("\\s+", " ")); + } + + // 清洗导演 + if (movie.getDirector() != null) { + movie.setDirector(movie.getDirector().trim()); + } + + // 清洗演员 + if (movie.getActors() != null) { + movie.setActors(movie.getActors().trim()); + } + + // 清洗年份 + if (movie.getYear() != null) { + movie.setYear(movie.getYear().trim()); + } + + // 清洗国家/地区 + if (movie.getCountry() != null) { + movie.setCountry(movie.getCountry().trim()); + } + + // 清洗类型 + if (movie.getGenre() != null) { + movie.setGenre(movie.getGenre().trim()); + } + + // 清洗简介 + if (movie.getQuote() != null) { + movie.setQuote(movie.getQuote().trim().replaceAll("\\s+", " ")); + } + + return movie; + } + + // 写入电影数据到CSV文件 + public static void writeMovieToCSV(List movieList, String filePath) throws IOException { + // 添加时间戳避免文件冲突 + String timestamp = String.valueOf(System.currentTimeMillis()); + String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv"); + + FileWriter writer = new FileWriter(actualFilePath); + // 写入表头 + writer.write("排名,标题,评分,评价人数,导演,演员,年份,国家/地区,类型,简介\n"); + + // 写入数据 + for (Movie movie : movieList) { + if (movie != null) { + writer.write(movie.getRank() + ","); + writer.write(escapeCsv(movie.getTitle()) + ","); + writer.write(movie.getRating() + ","); + writer.write(movie.getRatingPeople() + ","); + writer.write(escapeCsv(movie.getDirector()) + ","); + writer.write(escapeCsv(movie.getActors()) + ","); + writer.write(escapeCsv(movie.getYear()) + ","); + writer.write(escapeCsv(movie.getCountry()) + ","); + writer.write(escapeCsv(movie.getGenre()) + ","); + writer.write(escapeCsv(movie.getQuote()) + "\n"); + } + } + + writer.close(); + System.out.println("数据已保存到 " + actualFilePath); + } + + // 转义CSV特殊字符 + private static String escapeCsv(String value) { + if (value == null) return ""; + if (value.contains(",") || value.contains("\"")) { + value = value.replaceAll("\"", "\"\""); + return "\"" + value + "\""; + } + return value; + } + + // 写入软科排名数据到CSV文件 + public static void writeSoftRankingToCSV(List rankingList, String filePath) throws IOException { + String timestamp = String.valueOf(System.currentTimeMillis()); + String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv"); + + FileWriter writer = new FileWriter(actualFilePath); + writer.write("排名,学校名称,省份,类型,总分\n"); + + for (SoftRanking ranking : rankingList) { + if (ranking != null) { + writer.write(ranking.getRank() + ","); + writer.write(escapeCsv(ranking.getUniversityName()) + ","); + writer.write(escapeCsv(ranking.getProvince()) + ","); + writer.write(escapeCsv(ranking.getType()) + ","); + writer.write(ranking.getScore() + "\n"); + } + } + + writer.close(); + System.out.println("软科排名数据已保存到 " + actualFilePath); + } + + // 写入天气数据到CSV文件 + public static void writeWeatherToCSV(List weatherList, String filePath) throws IOException { + String timestamp = String.valueOf(System.currentTimeMillis()); + String actualFilePath = filePath.replace(".csv", "_" + timestamp + ".csv"); + + FileWriter writer = new FileWriter(actualFilePath); + writer.write("日期,星期,天气,最高温度,最低温度,风向,风力\n"); + + for (WeatherData weather : weatherList) { + if (weather != null) { + writer.write(escapeCsv(weather.getDate()) + ","); + writer.write(escapeCsv(weather.getWeek()) + ","); + writer.write(escapeCsv(weather.getWeather()) + ","); + writer.write(escapeCsv(weather.getHighTemp()) + ","); + writer.write(escapeCsv(weather.getLowTemp()) + ","); + writer.write(escapeCsv(weather.getWindDirection()) + ","); + writer.write(escapeCsv(weather.getWindLevel()) + "\n"); + } + } + + writer.close(); + System.out.println("天气数据已保存到 " + actualFilePath); + } +} \ No newline at end of file diff --git a/project2/target/classes/com/crawler/MovieMain.class b/project2/target/classes/com/crawler/MovieMain.class new file mode 100644 index 0000000..04adb5b Binary files /dev/null and b/project2/target/classes/com/crawler/MovieMain.class differ diff --git a/project2/target/classes/com/crawler/analysis/MovieAnalyzer.class b/project2/target/classes/com/crawler/analysis/MovieAnalyzer.class new file mode 100644 index 0000000..40c07b8 Binary files /dev/null and b/project2/target/classes/com/crawler/analysis/MovieAnalyzer.class differ diff --git a/project2/target/classes/com/crawler/chart/ChartGenerator.class b/project2/target/classes/com/crawler/chart/ChartGenerator.class new file mode 100644 index 0000000..c97dbfd Binary files /dev/null and b/project2/target/classes/com/crawler/chart/ChartGenerator.class differ diff --git a/project2/target/classes/com/crawler/chart/ChartManager.class b/project2/target/classes/com/crawler/chart/ChartManager.class new file mode 100644 index 0000000..86bb123 Binary files /dev/null and b/project2/target/classes/com/crawler/chart/ChartManager.class differ diff --git a/project2/target/classes/com/crawler/chart/impl/GenreDistributionChartGenerator.class b/project2/target/classes/com/crawler/chart/impl/GenreDistributionChartGenerator.class new file mode 100644 index 0000000..de71054 Binary files /dev/null and b/project2/target/classes/com/crawler/chart/impl/GenreDistributionChartGenerator.class differ diff --git a/project2/target/classes/com/crawler/chart/impl/RatingDistributionChartGenerator.class b/project2/target/classes/com/crawler/chart/impl/RatingDistributionChartGenerator.class new file mode 100644 index 0000000..ce303c2 Binary files /dev/null and b/project2/target/classes/com/crawler/chart/impl/RatingDistributionChartGenerator.class differ diff --git a/project2/target/classes/com/crawler/chart/impl/YearDistributionChartGenerator.class b/project2/target/classes/com/crawler/chart/impl/YearDistributionChartGenerator.class new file mode 100644 index 0000000..5f9eb32 Binary files /dev/null and b/project2/target/classes/com/crawler/chart/impl/YearDistributionChartGenerator.class differ diff --git a/project2/target/classes/com/crawler/chart/impl/YearRatingChartGenerator.class b/project2/target/classes/com/crawler/chart/impl/YearRatingChartGenerator.class new file mode 100644 index 0000000..30f1241 Binary files /dev/null and b/project2/target/classes/com/crawler/chart/impl/YearRatingChartGenerator.class differ diff --git a/project2/target/classes/com/crawler/chart/model/Movie.class b/project2/target/classes/com/crawler/chart/model/Movie.class new file mode 100644 index 0000000..0f393c8 Binary files /dev/null and b/project2/target/classes/com/crawler/chart/model/Movie.class differ diff --git a/project2/target/classes/com/crawler/chart/model/SoftRanking.class b/project2/target/classes/com/crawler/chart/model/SoftRanking.class new file mode 100644 index 0000000..2d1dc08 Binary files /dev/null and b/project2/target/classes/com/crawler/chart/model/SoftRanking.class differ diff --git a/project2/target/classes/com/crawler/chart/model/WeatherData.class b/project2/target/classes/com/crawler/chart/model/WeatherData.class new file mode 100644 index 0000000..66f9567 Binary files /dev/null and b/project2/target/classes/com/crawler/chart/model/WeatherData.class differ diff --git a/project2/target/classes/com/crawler/spider/DoubanSpider.class b/project2/target/classes/com/crawler/spider/DoubanSpider.class new file mode 100644 index 0000000..7e4dd4a Binary files /dev/null and b/project2/target/classes/com/crawler/spider/DoubanSpider.class differ diff --git a/project2/target/classes/com/crawler/spider/SoftSpider.class b/project2/target/classes/com/crawler/spider/SoftSpider.class new file mode 100644 index 0000000..0b892da Binary files /dev/null and b/project2/target/classes/com/crawler/spider/SoftSpider.class differ diff --git a/project2/target/classes/com/crawler/spider/WeatherSpider.class b/project2/target/classes/com/crawler/spider/WeatherSpider.class new file mode 100644 index 0000000..ea280dd Binary files /dev/null and b/project2/target/classes/com/crawler/spider/WeatherSpider.class differ diff --git a/project2/target/classes/com/crawler/ui/MovieResultDisplay.class b/project2/target/classes/com/crawler/ui/MovieResultDisplay.class new file mode 100644 index 0000000..f2a3b17 Binary files /dev/null and b/project2/target/classes/com/crawler/ui/MovieResultDisplay.class differ diff --git a/project2/target/classes/com/crawler/utils/DataUtils.class b/project2/target/classes/com/crawler/utils/DataUtils.class new file mode 100644 index 0000000..3c671ac Binary files /dev/null and b/project2/target/classes/com/crawler/utils/DataUtils.class differ