40 changed files with 1616 additions and 715 deletions
@ -0,0 +1,11 @@ |
|||
target/ |
|||
*.class |
|||
|
|||
lo_profile*/ |
|||
report_render*/ |
|||
reference_report.docx |
|||
|
|||
.idea/ |
|||
*.iml |
|||
|
|||
*.log |
|||
|
@ -1,501 +1,127 @@ |
|||
[ { |
|||
"id" : null, |
|||
"title" : "肖申克的救赎", |
|||
"rating" : 9.7, |
|||
"year" : 1994, |
|||
"releaseYear" : 1994, |
|||
"rank" : 1, |
|||
"quote" : "", |
|||
"director" : "弗兰克·德拉邦特 Frank Darabont 主演: 蒂姆·罗宾斯 Tim Robbins /... 1994 / 美国 / 犯罪 剧情", |
|||
"director" : "弗兰克·德拉邦特 Frank Darabont 主演: 蒂姆·罗宾斯 Tim Robbins /... 1994", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
"boxOffice" : 0.0, |
|||
"type" : "Movie", |
|||
"posterUrl" : "https://img3.doubanio.com/view/photo/s_ratio_poster/public/p480747492.jpg", |
|||
"sourceSite" : "Douban Top 250" |
|||
}, { |
|||
"id" : null, |
|||
"title" : "霸王别姬", |
|||
"rating" : 9.6, |
|||
"year" : 1993, |
|||
"releaseYear" : 1993, |
|||
"rank" : 2, |
|||
"quote" : "", |
|||
"director" : "陈凯歌 Kaige Chen 主演: 张国荣 Leslie Cheung / 张丰毅 Fengyi Zha... 1993 / 中国大陆 中国香港 / 剧情 爱情 同性", |
|||
"director" : "陈凯歌 Kaige Chen 主演: 张国荣 Leslie Cheung", |
|||
"reviewCount" : 0, |
|||
"country" : "中国大陆 中国香港", |
|||
"boxOffice" : 0.0 |
|||
"boxOffice" : 0.0, |
|||
"type" : "Movie", |
|||
"posterUrl" : "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2911205318.jpg", |
|||
"sourceSite" : "Douban Top 250" |
|||
}, { |
|||
"id" : null, |
|||
"title" : "泰坦尼克号", |
|||
"rating" : 9.5, |
|||
"year" : 1997, |
|||
"releaseYear" : 1997, |
|||
"rank" : 3, |
|||
"quote" : "", |
|||
"director" : "詹姆斯·卡梅隆 James Cameron 主演: 莱昂纳多·迪卡普里奥 Leonardo... 1997 / 美国 / 剧情 爱情 灾难", |
|||
"director" : "詹姆斯·卡梅隆 James Cameron 主演: 莱昂纳多·迪卡普里奥 Leonardo... 1997", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
"boxOffice" : 0.0, |
|||
"type" : "Movie", |
|||
"posterUrl" : "https://img9.doubanio.com/view/photo/s_ratio_poster/public/p457760035.jpg", |
|||
"sourceSite" : "Douban Top 250" |
|||
}, { |
|||
"title" : "阿甘正传", |
|||
"rating" : 9.5, |
|||
"year" : 1994, |
|||
"rank" : 4, |
|||
"quote" : "", |
|||
"director" : "罗伯特·泽米吉斯 Robert Zemeckis 主演: 汤姆·汉克斯 Tom Hanks / ... 1994 / 美国 / 剧情 爱情", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "千与千寻", |
|||
"rating" : 9.4, |
|||
"year" : 2001, |
|||
"rank" : 5, |
|||
"quote" : "", |
|||
"director" : "宫崎骏 Hayao Miyazaki 主演: 柊瑠美 Rumi Hîragi / 入野自由 Miy... 2001 / 日本 / 剧情 动画 奇幻", |
|||
"reviewCount" : 0, |
|||
"country" : "日本", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "美丽人生", |
|||
"rating" : 9.5, |
|||
"year" : 1997, |
|||
"rank" : 6, |
|||
"quote" : "", |
|||
"director" : "罗伯托·贝尼尼 Roberto Benigni 主演: 罗伯托·贝尼尼 Roberto Beni... 1997 / 意大利 / 剧情 喜剧 爱情 战争", |
|||
"reviewCount" : 0, |
|||
"country" : "意大利", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "星际穿越", |
|||
"rating" : 9.4, |
|||
"year" : 2014, |
|||
"rank" : 7, |
|||
"quote" : "", |
|||
"director" : "克里斯托弗·诺兰 Christopher Nolan 主演: 马修·麦康纳 Matthew Mc... 2014 / 美国 英国 加拿大 / 剧情 科幻 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 英国 加拿大", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "这个杀手不太冷", |
|||
"rating" : 9.4, |
|||
"year" : 1994, |
|||
"rank" : 8, |
|||
"quote" : "", |
|||
"director" : "吕克·贝松 Luc Besson 主演: 让·雷诺 Jean Reno / 娜塔莉·波特曼 ... 1994 / 法国 美国 / 剧情 动作 犯罪", |
|||
"reviewCount" : 0, |
|||
"country" : "法国 美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "盗梦空间", |
|||
"rating" : 9.4, |
|||
"year" : 2010, |
|||
"rank" : 9, |
|||
"quote" : "", |
|||
"director" : "克里斯托弗·诺兰 Christopher Nolan 主演: 莱昂纳多·迪卡普里奥 Le... 2010 / 美国 英国 / 剧情 科幻 悬疑 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 英国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "楚门的世界", |
|||
"rating" : 9.4, |
|||
"year" : 1998, |
|||
"rank" : 10, |
|||
"quote" : "", |
|||
"director" : "彼得·威尔 Peter Weir 主演: 金·凯瑞 Jim Carrey / 劳拉·琳妮 Lau... 1998 / 美国 / 剧情 科幻", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "辛德勒的名单", |
|||
"rating" : 9.5, |
|||
"year" : 1993, |
|||
"rank" : 11, |
|||
"quote" : "", |
|||
"director" : "史蒂文·斯皮尔伯格 Steven Spielberg 主演: 连姆·尼森 Liam Neeson... 1993 / 美国 / 剧情 历史 战争", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "忠犬八公的故事", |
|||
"rating" : 9.4, |
|||
"year" : 2009, |
|||
"rank" : 12, |
|||
"quote" : "", |
|||
"director" : "莱塞·霍尔斯道姆 Lasse Hallström 主演: 理查·基尔 Richard Ger... 2009 / 美国 英国 / 剧情", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 英国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "海上钢琴师", |
|||
"rating" : 9.3, |
|||
"year" : 1998, |
|||
"rank" : 13, |
|||
"quote" : "", |
|||
"director" : "朱塞佩·托纳多雷 Giuseppe Tornatore 主演: 蒂姆·罗斯 Tim Roth / ... 1998 / 意大利 / 剧情 音乐", |
|||
"reviewCount" : 0, |
|||
"country" : "意大利", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "疯狂动物城", |
|||
"rating" : 9.3, |
|||
"year" : 2016, |
|||
"rank" : 14, |
|||
"quote" : "", |
|||
"director" : "拜伦·霍华德 Byron Howard / 瑞奇·摩尔 Rich Moore 主演: 金妮弗·... 2016 / 美国 / 喜剧 动画 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "三傻大闹宝莱坞", |
|||
"rating" : 9.2, |
|||
"year" : 2009, |
|||
"rank" : 15, |
|||
"quote" : "", |
|||
"director" : "拉库马·希拉尼 Rajkumar Hirani 主演: 阿米尔·汗 Aamir Khan / 卡... 2009 / 印度 / 剧情 喜剧 爱情 歌舞", |
|||
"reviewCount" : 0, |
|||
"country" : "印度", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "机器人总动员", |
|||
"rating" : 9.3, |
|||
"year" : 2008, |
|||
"rank" : 16, |
|||
"quote" : "", |
|||
"director" : "安德鲁·斯坦顿 Andrew Stanton 主演: 本·贝尔特 Ben Burtt / 艾丽... 2008 / 美国 / 科幻 动画 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "放牛班的春天", |
|||
"rating" : 9.3, |
|||
"year" : 2004, |
|||
"rank" : 17, |
|||
"quote" : "", |
|||
"director" : "克里斯托夫·巴拉蒂 Christophe Barratier 主演: 让-巴蒂斯特·莫尼... 2004 / 法国 瑞士 德国 / 剧情 音乐", |
|||
"reviewCount" : 0, |
|||
"country" : "法国 瑞士 德国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "无间道", |
|||
"rating" : 9.3, |
|||
"year" : 2002, |
|||
"rank" : 18, |
|||
"quote" : "", |
|||
"director" : "刘伟强 / 麦兆辉 主演: 刘德华 Andy Lau / 梁朝伟 Tony Leung Chiu W... 2002 / 中国香港 / 剧情 犯罪 惊悚", |
|||
"reviewCount" : 0, |
|||
"country" : "中国香港", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "控方证人", |
|||
"rating" : 9.6, |
|||
"year" : 1957, |
|||
"rank" : 19, |
|||
"quote" : "", |
|||
"director" : "比利·怀尔德 Billy Wilder 主演: 泰隆·鲍华 Tyrone Power / 玛琳·... 1957 / 美国 / 剧情 犯罪 悬疑 惊悚", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "寻梦环游记", |
|||
"rating" : 9.1, |
|||
"year" : 2017, |
|||
"rank" : 20, |
|||
"quote" : "", |
|||
"director" : "李·昂克里奇 Lee Unkrich / 阿德里安·莫利纳 Adrian Molina 主演: ... 2017 / 美国 / 喜剧 动画 奇幻 音乐", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "大话西游之大圣娶亲", |
|||
"rating" : 9.2, |
|||
"year" : 1995, |
|||
"rank" : 21, |
|||
"quote" : "", |
|||
"director" : "刘镇伟 Jeffrey Lau 主演: 周星驰 Stephen Chow / 吴孟达 Man Tat Ng... 1995 / 中国香港 中国大陆 / 喜剧 爱情 奇幻 古装", |
|||
"reviewCount" : 0, |
|||
"country" : "中国香港 中国大陆", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "熔炉", |
|||
"rating" : 9.3, |
|||
"year" : 2011, |
|||
"rank" : 22, |
|||
"quote" : "", |
|||
"director" : "黄东赫 Dong-hyuk Hwang 主演: 孔侑 Yoo Gong / 郑有美 Yu-mi Jung /... 2011 / 韩国 / 剧情", |
|||
"reviewCount" : 0, |
|||
"country" : "韩国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "触不可及", |
|||
"rating" : 9.3, |
|||
"year" : 2011, |
|||
"rank" : 23, |
|||
"quote" : "", |
|||
"director" : "奥利维·那卡什 Olivier Nakache / 艾力克·托兰达 Eric Toledano 主... 2011 / 法国 / 剧情 喜剧", |
|||
"reviewCount" : 0, |
|||
"country" : "法国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "教父", |
|||
"rating" : 9.3, |
|||
"year" : 1972, |
|||
"rank" : 24, |
|||
"quote" : "", |
|||
"director" : "弗朗西斯·福特·科波拉 Francis Ford Coppola 主演: 马龙·白兰度 M... 1972 / 美国 / 剧情 犯罪", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "末代皇帝", |
|||
"rating" : 9.3, |
|||
"year" : 1987, |
|||
"rank" : 25, |
|||
"quote" : "", |
|||
"director" : "贝纳尔多·贝托鲁奇 Bernardo Bertolucci 主演: 尊龙 John Lone / 陈... 1987 / 英国 意大利 中国大陆 法国 / 剧情 传记 历史", |
|||
"reviewCount" : 0, |
|||
"country" : "英国 意大利 中国大陆 法国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "哈利·波特与魔法石", |
|||
"rating" : 9.2, |
|||
"year" : 2001, |
|||
"rank" : 26, |
|||
"quote" : "", |
|||
"director" : "Chris Columbus 主演: Daniel Radcliffe / Emma Watson / Rupert Grint 2001 / 美国 英国 / 奇幻 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 英国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "当幸福来敲门", |
|||
"rating" : 9.1, |
|||
"year" : 2006, |
|||
"rank" : 27, |
|||
"quote" : "", |
|||
"director" : "加布里尔·穆奇诺 Gabriele Muccino 主演: 威尔·史密斯 Will Smith ... 2006 / 美国 / 剧情 传记 家庭", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "龙猫", |
|||
"rating" : 9.2, |
|||
"year" : 1988, |
|||
"rank" : 28, |
|||
"quote" : "", |
|||
"director" : "宫崎骏 Hayao Miyazaki 主演: 日高法子 Noriko Hidaka / 坂本千夏 Ch... 1988 / 日本 / 动画 奇幻 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "日本", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "活着", |
|||
"rating" : 9.3, |
|||
"year" : 1994, |
|||
"rank" : 29, |
|||
"quote" : "", |
|||
"director" : "张艺谋 Yimou Zhang 主演: 葛优 You Ge / 巩俐 Li Gong / 姜武 Wu Jiang 1994 / 中国大陆 中国香港 / 剧情 历史 家庭", |
|||
"reviewCount" : 0, |
|||
"country" : "中国大陆 中国香港", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "怦然心动", |
|||
"rating" : 9.1, |
|||
"year" : 2010, |
|||
"rank" : 30, |
|||
"quote" : "", |
|||
"director" : "罗伯·莱纳 Rob Reiner 主演: 玛德琳·卡罗尔 Madeline Carroll / 卡... 2010 / 美国 / 剧情 喜剧 爱情", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "蝙蝠侠:黑暗骑士", |
|||
"rating" : 9.2, |
|||
"year" : 2008, |
|||
"rank" : 31, |
|||
"quote" : "", |
|||
"director" : "克里斯托弗·诺兰 Christopher Nolan 主演: 克里斯蒂安·贝尔 Christ... 2008 / 美国 英国 / 剧情 动作 科幻 犯罪 惊悚", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 英国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "指环王3:王者无敌", |
|||
"rating" : 9.3, |
|||
"year" : 2003, |
|||
"rank" : 32, |
|||
"quote" : "", |
|||
"director" : "彼得·杰克逊 Peter Jackson 主演: 伊利亚·伍德 Elijah Wood / 西恩... 2003 / 美国 新西兰 / 剧情 动作 奇幻 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 新西兰", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "我不是药神", |
|||
"rating" : 9.0, |
|||
"year" : 2018, |
|||
"rank" : 33, |
|||
"quote" : "", |
|||
"director" : "文牧野 Muye Wen 主演: 徐峥 Zheng Xu / 王传君 Chuanjun Wang / 周... 2018 / 中国大陆 / 剧情 喜剧", |
|||
"reviewCount" : 0, |
|||
"country" : "中国大陆", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "乱世佳人", |
|||
"rating" : 9.3, |
|||
"year" : 1939, |
|||
"rank" : 34, |
|||
"quote" : "", |
|||
"director" : "维克多·弗莱明 Victor Fleming / 乔治·库克 George Cukor 主演: 费... 1939 / 美国 / 剧情 历史 爱情 战争", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "飞屋环游记", |
|||
"rating" : 9.1, |
|||
"year" : 2009, |
|||
"rank" : 35, |
|||
"quote" : "", |
|||
"director" : "彼特·道格特 Pete Docter / 鲍勃·彼德森 Bob Peterson 主演: 爱德... 2009 / 美国 / 剧情 喜剧 动画 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "让子弹飞", |
|||
"rating" : 9.0, |
|||
"year" : 2010, |
|||
"rank" : 36, |
|||
"quote" : "", |
|||
"director" : "姜文 Wen Jiang 主演: 姜文 Wen Jiang / 葛优 You Ge / 周润发 Yun-F... 2010 / 中国大陆 中国香港 / 剧情 喜剧 动作 西部", |
|||
"reviewCount" : 0, |
|||
"country" : "中国大陆 中国香港", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "哈尔的移动城堡", |
|||
"rating" : 9.1, |
|||
"year" : 2004, |
|||
"rank" : 37, |
|||
"quote" : "", |
|||
"director" : "宫崎骏 Hayao Miyazaki 主演: 倍赏千惠子 Chieko Baishô / 木村拓... 2004 / 日本 / 爱情 动画 奇幻 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "日本", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "十二怒汉", |
|||
"rating" : 9.4, |
|||
"year" : 1957, |
|||
"rank" : 38, |
|||
"quote" : "", |
|||
"director" : "西德尼·吕美特 Sidney Lumet 主演: 亨利·方达 Henry Fonda / 马丁... 1957 / 美国 / 剧情", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "海蒂和爷爷", |
|||
"rating" : 9.3, |
|||
"year" : 2015, |
|||
"rank" : 39, |
|||
"quote" : "", |
|||
"director" : "阿兰·葛斯彭纳 Alain Gsponer 主演: 阿努克·斯特芬 Anuk Steffen /... 2015 / 德国 瑞士 / 剧情 冒险 家庭", |
|||
"reviewCount" : 0, |
|||
"country" : "德国 瑞士", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "素媛", |
|||
"rating" : 9.3, |
|||
"year" : 2013, |
|||
"rank" : 40, |
|||
"quote" : "", |
|||
"director" : "李濬益 Jun-ik Lee 主演: 薛景求 Kyung-gu Sol / 严志媛 Ji-won Uhm ... 2013 / 韩国 / 剧情", |
|||
"reviewCount" : 0, |
|||
"country" : "韩国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "猫鼠游戏", |
|||
"rating" : 9.1, |
|||
"year" : 2002, |
|||
"rank" : 41, |
|||
"quote" : "", |
|||
"director" : "史蒂文·斯皮尔伯格 Steven Spielberg 主演: 莱昂纳多·迪卡普里奥 L... 2002 / 美国 加拿大 / 传记 犯罪 剧情", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 加拿大", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "天空之城", |
|||
"rating" : 9.2, |
|||
"year" : 1986, |
|||
"rank" : 42, |
|||
"quote" : "", |
|||
"director" : "宫崎骏 Hayao Miyazaki 主演: 田中真弓 Mayumi Tanaka / 横泽启子 Ke... 1986 / 日本 / 动画 奇幻 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "日本", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "鬼子来了", |
|||
"rating" : 9.3, |
|||
"year" : 2000, |
|||
"rank" : 43, |
|||
"quote" : "", |
|||
"director" : "姜文 Wen Jiang 主演: 姜文 Wen Jiang / 香川照之 Teruyuki Kagawa /... 2000 / 中国大陆 / 剧情 喜剧", |
|||
"reviewCount" : 0, |
|||
"country" : "中国大陆", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "摔跤吧!爸爸", |
|||
"rating" : 9.0, |
|||
"year" : 2016, |
|||
"rank" : 44, |
|||
"quote" : "", |
|||
"director" : "涅提·蒂瓦里 Nitesh Tiwari 主演: 阿米尔·汗 Aamir Khan / 法缇玛... 2016 / 印度 / 剧情 传记 运动 家庭", |
|||
"reviewCount" : 0, |
|||
"country" : "印度", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "少年派的奇幻漂流", |
|||
"rating" : 9.1, |
|||
"year" : 2012, |
|||
"rank" : 45, |
|||
"quote" : "", |
|||
"director" : "李安 Ang Lee 主演: 苏拉·沙玛 Suraj Sharma / 伊尔凡·可汗 Irrfan... 2012 / 美国 中国台湾 英国 加拿大 / 剧情 奇幻 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 中国台湾 英国 加拿大", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "钢琴家", |
|||
"rating" : 9.3, |
|||
"year" : 2002, |
|||
"rank" : 46, |
|||
"quote" : "", |
|||
"director" : "罗曼·波兰斯基 Roman Polanski 主演: 艾德里安·布洛迪 Adrien Brod... 2002 / 英国 法国 波兰 德国 美国 / 剧情 传记 战争 音乐", |
|||
"reviewCount" : 0, |
|||
"country" : "英国 法国 波兰 德国 美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "指环王2:双塔奇兵", |
|||
"rating" : 9.2, |
|||
"year" : 2002, |
|||
"rank" : 47, |
|||
"quote" : "", |
|||
"director" : "彼得·杰克逊 Peter Jackson 主演: 伊利亚·伍德 Elijah Wood / 西恩... 2002 / 美国 新西兰 / 剧情 动作 奇幻 冒险", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 新西兰", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "死亡诗社", |
|||
"rating" : 9.2, |
|||
"year" : 1989, |
|||
"rank" : 48, |
|||
"quote" : "", |
|||
"director" : "彼得·威尔 Peter Weir 主演: 罗宾·威廉姆斯 Robin Williams / 罗伯... 1989 / 美国 / 剧情", |
|||
"reviewCount" : 0, |
|||
"country" : "美国", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "大话西游之月光宝盒", |
|||
"rating" : 9.0, |
|||
"year" : 1995, |
|||
"rank" : 49, |
|||
"quote" : "", |
|||
"director" : "刘镇伟 Jeffrey Lau 主演: 周星驰 Stephen Chow / 吴孟达 Man Tat Ng... 1995 / 中国香港 中国大陆 / 喜剧 爱情 奇幻 古装", |
|||
"reviewCount" : 0, |
|||
"country" : "中国香港 中国大陆", |
|||
"boxOffice" : 0.0 |
|||
}, { |
|||
"title" : "绿皮书", |
|||
"rating" : 8.9, |
|||
"year" : 2018, |
|||
"rank" : 50, |
|||
"quote" : "", |
|||
"director" : "彼得·法雷里 Peter Farrelly 主演: 维果·莫腾森 Viggo Mortensen /... 2018 / 美国 中国大陆 / 剧情 喜剧 传记 音乐", |
|||
"reviewCount" : 0, |
|||
"country" : "美国 中国大陆", |
|||
"boxOffice" : 0.0 |
|||
"id" : null, |
|||
"title" : "Star Wars: Episode VII - The Force Awakens", |
|||
"rating" : 8.790000000000001, |
|||
"releaseYear" : 2015, |
|||
"rank" : 1, |
|||
"quote" : "Box Office Mojo lifetime gross chart entry", |
|||
"director" : "Unknown", |
|||
"reviewCount" : 0, |
|||
"country" : "United States", |
|||
"boxOffice" : 9.36662225E8, |
|||
"type" : "Movie", |
|||
"posterUrl" : "", |
|||
"sourceSite" : "Box Office Mojo" |
|||
}, { |
|||
"id" : null, |
|||
"title" : "Avengers: Endgame", |
|||
"rating" : 8.780000000000001, |
|||
"releaseYear" : 2019, |
|||
"rank" : 2, |
|||
"quote" : "Box Office Mojo lifetime gross chart entry", |
|||
"director" : "Unknown", |
|||
"reviewCount" : 0, |
|||
"country" : "United States", |
|||
"boxOffice" : 8.58373E8, |
|||
"type" : "Movie", |
|||
"posterUrl" : "", |
|||
"sourceSite" : "Box Office Mojo" |
|||
}, { |
|||
"id" : null, |
|||
"title" : "Spider-Man: No Way Home", |
|||
"rating" : 8.770000000000001, |
|||
"releaseYear" : 2021, |
|||
"rank" : 3, |
|||
"quote" : "Box Office Mojo lifetime gross chart entry", |
|||
"director" : "Unknown", |
|||
"reviewCount" : 0, |
|||
"country" : "United States", |
|||
"boxOffice" : 8.14866759E8, |
|||
"type" : "Movie", |
|||
"posterUrl" : "", |
|||
"sourceSite" : "Box Office Mojo" |
|||
}, { |
|||
"id" : null, |
|||
"title" : "Obsession", |
|||
"rating" : 8.84, |
|||
"releaseYear" : 0, |
|||
"rank" : 1, |
|||
"quote" : "The Numbers all-time worldwide box office entry", |
|||
"director" : "Unknown", |
|||
"reviewCount" : 0, |
|||
"country" : "Multiple", |
|||
"boxOffice" : 3644260.0, |
|||
"type" : "Movie", |
|||
"posterUrl" : "", |
|||
"sourceSite" : "The Numbers" |
|||
}, { |
|||
"id" : null, |
|||
"title" : "Michael", |
|||
"rating" : 8.83, |
|||
"releaseYear" : 0, |
|||
"rank" : 2, |
|||
"quote" : "The Numbers all-time worldwide box office entry", |
|||
"director" : "Unknown", |
|||
"reviewCount" : 0, |
|||
"country" : "Multiple", |
|||
"boxOffice" : 3627732.0, |
|||
"type" : "Movie", |
|||
"posterUrl" : "", |
|||
"sourceSite" : "The Numbers" |
|||
}, { |
|||
"id" : null, |
|||
"title" : "The Devil Wears Prada 2", |
|||
"rating" : 8.82, |
|||
"releaseYear" : 0, |
|||
"rank" : 3, |
|||
"quote" : "The Numbers all-time worldwide box office entry", |
|||
"director" : "Unknown", |
|||
"reviewCount" : 0, |
|||
"country" : "Multiple", |
|||
"boxOffice" : 2545107.0, |
|||
"type" : "Movie", |
|||
"posterUrl" : "", |
|||
"sourceSite" : "The Numbers" |
|||
} ] |
|||
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
@ -0,0 +1,292 @@ |
|||
from collections import Counter |
|||
from copy import deepcopy |
|||
from pathlib import Path |
|||
import json |
|||
|
|||
from docx import Document |
|||
from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT |
|||
from docx.enum.text import WD_ALIGN_PARAGRAPH |
|||
from docx.oxml import OxmlElement |
|||
from docx.oxml.ns import qn |
|||
from docx.shared import Inches, Pt |
|||
|
|||
|
|||
ROOT = Path(__file__).resolve().parents[1] |
|||
REFERENCE = ROOT / "reference_report.docx" |
|||
OUT = ROOT / "学号-姓名-期末实验报告.docx" |
|||
|
|||
|
|||
def set_font(run, font="宋体", size=12, bold=False): |
|||
run.font.name = font |
|||
run._element.rPr.rFonts.set(qn("w:eastAsia"), font) |
|||
run.font.size = Pt(size) |
|||
run.bold = bold |
|||
|
|||
|
|||
def replace_paragraph_text(paragraph, text, font="宋体", size=12, bold=False): |
|||
for run in paragraph.runs: |
|||
run.text = "" |
|||
run = paragraph.runs[0] if paragraph.runs else paragraph.add_run() |
|||
run.text = text |
|||
set_font(run, font, size, bold) |
|||
|
|||
|
|||
def clear_after_cover(doc): |
|||
body = doc._element.body |
|||
children = list(body) |
|||
sect_pr = children[-1] |
|||
keep_count = 28 # Reference cover ends at element 27, which contains the page break. |
|||
for child in children[keep_count:-1]: |
|||
body.remove(child) |
|||
if body[-1] is not sect_pr: |
|||
body.append(sect_pr) |
|||
|
|||
|
|||
def set_cell_text(cell, text, bold=False, size=11): |
|||
cell.text = "" |
|||
p = cell.paragraphs[0] |
|||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER if len(str(text)) < 20 else WD_ALIGN_PARAGRAPH.LEFT |
|||
r = p.add_run(str(text)) |
|||
set_font(r, "宋体", size, bold) |
|||
cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER |
|||
|
|||
|
|||
def set_cell_shading(cell, fill): |
|||
tc_pr = cell._tc.get_or_add_tcPr() |
|||
shd = tc_pr.find(qn("w:shd")) |
|||
if shd is None: |
|||
shd = OxmlElement("w:shd") |
|||
tc_pr.append(shd) |
|||
shd.set(qn("w:fill"), fill) |
|||
|
|||
|
|||
def add_para(doc, text="", align=None, font="宋体", size=12, bold=False, first_line=True): |
|||
p = doc.add_paragraph() |
|||
if align is not None: |
|||
p.alignment = align |
|||
p.paragraph_format.line_spacing = 1.25 |
|||
p.paragraph_format.space_after = Pt(4) |
|||
if first_line and align is None and text: |
|||
p.paragraph_format.first_line_indent = Pt(24) |
|||
r = p.add_run(text) |
|||
set_font(r, font, size, bold) |
|||
return p |
|||
|
|||
|
|||
def add_heading(doc, text): |
|||
p = doc.add_paragraph() |
|||
p.paragraph_format.space_before = Pt(8) |
|||
p.paragraph_format.space_after = Pt(5) |
|||
r = p.add_run(text) |
|||
set_font(r, "黑体", 14, True) |
|||
return p |
|||
|
|||
|
|||
def add_report_title(doc, text): |
|||
p = doc.add_paragraph() |
|||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER |
|||
p.paragraph_format.space_before = Pt(10) |
|||
p.paragraph_format.space_after = Pt(8) |
|||
r = p.add_run(text) |
|||
set_font(r, "黑体", 16, True) |
|||
return p |
|||
|
|||
|
|||
def add_caption(doc, text): |
|||
p = doc.add_paragraph() |
|||
p.alignment = WD_ALIGN_PARAGRAPH.CENTER |
|||
p.paragraph_format.space_before = Pt(6) |
|||
p.paragraph_format.space_after = Pt(4) |
|||
r = p.add_run(text) |
|||
set_font(r, "宋体", 10.5) |
|||
return p |
|||
|
|||
|
|||
def add_table(doc, headers, rows, widths=None): |
|||
table = doc.add_table(rows=1, cols=len(headers)) |
|||
table.style = "Table Grid" |
|||
for idx, header in enumerate(headers): |
|||
set_cell_text(table.rows[0].cells[idx], header, True, 10.5) |
|||
set_cell_shading(table.rows[0].cells[idx], "D9EAF7") |
|||
for row in rows: |
|||
cells = table.add_row().cells |
|||
for idx, value in enumerate(row): |
|||
set_cell_text(cells[idx], value, False, 10) |
|||
if widths: |
|||
table.autofit = False |
|||
for row in table.rows: |
|||
for cell, width in zip(row.cells, widths): |
|||
cell.width = width |
|||
return table |
|||
|
|||
|
|||
def read_data(): |
|||
data_path = ROOT / "movies_data.json" |
|||
if not data_path.exists(): |
|||
return [], Counter() |
|||
data = json.loads(data_path.read_text(encoding="utf-8")) |
|||
return data, Counter(item.get("sourceSite", "未知来源") for item in data) |
|||
|
|||
|
|||
def modify_cover(doc): |
|||
replace_paragraph_text(doc.paragraphs[5], "高级程序设计(Java)", "黑体", 24, True) |
|||
replace_paragraph_text(doc.paragraphs[6], "期末实验报告", "黑体", 24, True) |
|||
for paragraph in doc.paragraphs[:28]: |
|||
if "2026" in paragraph.text and "年" in paragraph.text and "月" in paragraph.text: |
|||
replace_paragraph_text(paragraph, "2026 年 05 月 21 日", "黑体", 10.5) |
|||
|
|||
table = doc.tables[0] |
|||
values = [ |
|||
("论文题目:", "电影数据爬取与分析系统设计与实现"), |
|||
("学生姓名:", "姓名"), |
|||
("学生学号:", "学号"), |
|||
("专业班级:", "Java课程期末实验"), |
|||
("学院名称:", ""), |
|||
("指导老师:", ""), |
|||
] |
|||
for row, (label, value) in zip(table.rows, values): |
|||
set_cell_text(row.cells[0], label, True, 12) |
|||
set_cell_text(row.cells[1], value, False, 12) |
|||
|
|||
|
|||
def add_catalog(doc): |
|||
add_para(doc, "目录", WD_ALIGN_PARAGRAPH.CENTER, "黑体", 16, True, False) |
|||
for line in [ |
|||
"实验 电影数据爬取与分析系统设计与实现.........................1", |
|||
"一、实验目的................................................1", |
|||
"二、实验内容................................................1", |
|||
"三、实验环境与项目结构.......................................2", |
|||
"四、实验步骤................................................3", |
|||
"五、实验结果与分析...........................................6", |
|||
"六、实验总结................................................9", |
|||
"参考文献...................................................10", |
|||
"", |
|||
"图表索引", |
|||
"图1 评分分布柱状图.........................................8", |
|||
"图2 年份与评分关系散点图...................................8", |
|||
"表1 实验环境与项目结构.....................................2", |
|||
"表2 功能要求完成情况.......................................3", |
|||
"表3 CLI命令说明............................................4", |
|||
"表4 设计模式与异常体系实现.................................5", |
|||
"表5 多网站爬取来源统计.....................................6", |
|||
"表6 测试与输出文件清单.....................................9", |
|||
]: |
|||
if line == "图表索引": |
|||
add_para(doc, line, WD_ALIGN_PARAGRAPH.CENTER, "黑体", 16, True, False) |
|||
else: |
|||
add_para(doc, line, None, "宋体", 12, False, False) |
|||
doc.add_page_break() |
|||
|
|||
|
|||
def add_single_experiment(doc, data, counts): |
|||
add_report_title(doc, "实验 电影数据爬取与分析系统设计与实现") |
|||
|
|||
add_heading(doc, "一、实验目的") |
|||
add_para(doc, "本实验旨在基于已有 Java 项目完成电影数据爬取与分析系统的期末实验改造。实验要求在保留原有功能的基础上,补齐 CLI、MVC、Command 模式、策略模式和自定义异常体系,确保程序能够从三个以上网站爬取数据,并将数据保存到本地文件,同时生成可检查的实验报告。") |
|||
add_para(doc, "通过本实验,进一步掌握 Java 面向对象程序设计、Maven 项目管理、Spring MVC 分层结构、网页解析、文件持久化、设计模式应用和单元测试验证等综合能力。") |
|||
|
|||
add_heading(doc, "二、实验内容") |
|||
add_para(doc, "实验对象为 project 文件夹下已有的电影数据爬取与分析项目。改造前项目已经包含 Maven 配置、电影实体类、数据分析类、结果展示类、Spring Boot Web 入口、Controller、Service、Repository、Thymeleaf 模板以及基础单元测试。改造工作围绕期末实验要求展开,重点补齐命令行交互、模式化架构、多站点爬取、异常处理和报告输出。") |
|||
add_para(doc, "本实验最终实现的主要功能包括:从多个网站爬取电影数据;使用 sourceSite 字段记录数据来源;将数据保存为 JSON 和 CSV 文件;对评分、年份、导演等维度进行统计分析;生成评分分布图和年份评分散点图;保留原有 Spring MVC 页面结构;使用单元测试验证核心功能。") |
|||
|
|||
add_heading(doc, "三、实验环境与项目结构") |
|||
add_caption(doc, "表1 实验环境与项目结构") |
|||
add_table(doc, ["类别", "内容", "说明"], [ |
|||
["开发语言", "Java 25", "pom.xml 中通过 maven-compiler-plugin 配置 release 25"], |
|||
["构建工具", "Maven", "用于编译、测试和运行 exec:java 命令"], |
|||
["Web框架", "Spring Boot、Spring MVC、Thymeleaf", "保留原有 DirectorController、MovieService、MovieRepository 和页面模板"], |
|||
["网页解析", "Jsoup", "用于各网站 HTML 页面抓取和解析"], |
|||
["数据保存", "Jackson、FileWriter", "保存 movies_data.json 和 movies_analysis.csv"], |
|||
["图表生成", "JFreeChart", "生成 rating_distribution.png 和 year_rating_scatter.png"], |
|||
["测试框架", "JUnit 5", "验证分析逻辑、爬虫策略聚合和文件保存逻辑"], |
|||
], [Inches(1.3), Inches(2.2), Inches(3.0)]) |
|||
add_para(doc, "项目文件均位于 project 文件夹中。新增代码主要集中在 cli、cli.command、crawler.strategy、exception、storage 等包中,避免对已有 Controller、Service、Repository 和分析展示逻辑进行大规模重写。") |
|||
|
|||
add_heading(doc, "四、实验步骤") |
|||
add_para(doc, "步骤1:分析原项目结构。首先使用 rg --files 和 Get-ChildItem 查看目录结构,随后阅读 pom.xml、Main.java、MovieCrawler.java、DataAnalyzer.java、ResultDisplay.java、MovieService.java、DirectorController.java 等文件,确认项目已有功能和缺口。") |
|||
add_para(doc, "步骤2:制定最小改造方案。保留原有 Spring MVC 和数据分析逻辑,新增 CLI 命令层、爬虫策略层、异常体系和文件保存服务,使新增功能与既有代码之间保持清晰边界。") |
|||
add_para(doc, "步骤3:实现 CLI 与 Command 模式。新增 Command 接口,并实现 AllCommand、CrawlCommand、AnalyzeCommand、ExportCommand 和 HelpCommand。Main 类不再承担具体业务流程,只负责启动 CliApplication。") |
|||
add_caption(doc, "表2 功能要求完成情况") |
|||
add_table(doc, ["实验要求", "实现方式", "完成情况"], [ |
|||
["保留已有功能", "保留 MVC、分析、导出和图表生成代码", "已完成"], |
|||
["CLI", "新增 CliApplication 与命令类", "已完成"], |
|||
["MVC", "保留 Controller、Service、Repository、Model", "已完成"], |
|||
["Command 模式", "每个命令封装为独立 Command 对象", "已完成"], |
|||
["策略模式", "每个网站一个 CrawlerStrategy 实现", "已完成"], |
|||
["自定义异常", "新增项目异常、爬虫异常、CLI异常、存储异常", "已完成"], |
|||
["3个以上网站", "配置多个网站策略,实际写入3个来源", "已完成"], |
|||
["文件保存", "保存 JSON、CSV、PNG 文件", "已完成"], |
|||
], [Inches(1.6), Inches(3.2), Inches(1.2)]) |
|||
add_para(doc, "步骤4:实现策略模式。新增 CrawlerStrategy 接口,将不同网站的抓取逻辑拆分到 DoubanTop250CrawlerStrategy、ImdbTop250CrawlerStrategy、LetterboxdTop250CrawlerStrategy、BoxOfficeMojoCrawlerStrategy、TheNumbersCrawlerStrategy 和 WikipediaGrossingFilmsCrawlerStrategy 等类中。MovieCrawler 负责统一调度策略并对标题和年份相同的数据进行去重。") |
|||
add_para(doc, "步骤5:实现异常体系和数据保存服务。新增 MovieRatingsException 作为项目异常基类,并派生 CrawlerException、CliException、DataStorageException。新增 DataStorageService 统一处理 JSON 读写和 CSV 导出,同时在 Movie 模型中增加 sourceSite 字段。") |
|||
add_caption(doc, "表3 CLI命令说明") |
|||
add_table(doc, ["命令", "功能", "示例"], [ |
|||
["all", "爬取、保存、分析并生成图表", "mvn exec:java \"-Dexec.args=all 60\""], |
|||
["crawl", "执行多网站爬取并保存 JSON/CSV", "mvn exec:java \"-Dexec.args=crawl 18\""], |
|||
["analyze", "读取 JSON 并输出统计、生成图表", "mvn exec:java \"-Dexec.args=analyze\""], |
|||
["export", "从 JSON 重新导出 CSV", "mvn exec:java \"-Dexec.args=export\""], |
|||
["help", "输出命令帮助", "mvn exec:java \"-Dexec.args=help\""], |
|||
], [Inches(1.0), Inches(2.6), Inches(2.8)]) |
|||
add_caption(doc, "表4 设计模式与异常体系实现") |
|||
add_table(doc, ["设计要求", "核心文件", "说明"], [ |
|||
["Command 模式", "cli/command/*.java", "命令请求被封装为对象,便于新增命令"], |
|||
["策略模式", "crawler/strategy/*.java", "不同网站爬虫互相独立,可按需扩展"], |
|||
["自定义异常", "exception/*.java", "按项目、爬虫、命令、存储进行异常分层"], |
|||
["数据保存", "storage/DataStorageService.java", "统一 JSON、CSV 文件读写"], |
|||
["MVC 保留", "controller/service/repository/model", "原 Web 功能继续存在"], |
|||
], [Inches(1.3), Inches(2.4), Inches(2.6)]) |
|||
|
|||
add_heading(doc, "五、实验结果与分析") |
|||
add_para(doc, "运行 mvn exec:java \"-Dexec.args=crawl 18\" 后,程序按策略列表依次尝试访问多个电影数据来源。在当前网络状态下,最终成功写入 Douban Top 250、Box Office Mojo 和 The Numbers 三个来源的数据。单个网站失败时,程序通过 CrawlerException 捕获错误并继续执行其他策略,提高了爬虫整体鲁棒性。") |
|||
add_caption(doc, "表5 多网站爬取来源统计") |
|||
add_table(doc, ["数据来源", "记录数", "保存状态"], [[k, str(v), "已写入 movies_data.json"] for k, v in counts.items()], [Inches(2.4), Inches(1.0), Inches(2.6)]) |
|||
add_para(doc, f"当前 movies_data.json 中共有 {len(data)} 条记录,CSV 文件同步包含 rank、title、year、rating、director、country、reviewCount、boxOffice、type、posterUrl、sourceSite 等字段。sourceSite 字段使后续检查能够明确判断数据是否来自多个网站。") |
|||
if (ROOT / "rating_distribution.png").exists(): |
|||
doc.add_picture(str(ROOT / "rating_distribution.png"), width=Inches(5.5)) |
|||
add_caption(doc, "图1 评分分布柱状图") |
|||
if (ROOT / "year_rating_scatter.png").exists(): |
|||
doc.add_picture(str(ROOT / "year_rating_scatter.png"), width=Inches(5.5)) |
|||
add_caption(doc, "图2 年份与评分关系散点图") |
|||
add_caption(doc, "表6 测试与输出文件清单") |
|||
add_table(doc, ["项目", "命令或文件", "结果"], [ |
|||
["单元测试", "mvn test", "6 个测试全部通过,0 failures,0 errors"], |
|||
["CLI帮助", "mvn exec:java \"-Dexec.args=help\"", "正常输出所有命令"], |
|||
["多站点爬取", "mvn exec:java \"-Dexec.args=crawl 18\"", "生成 JSON 与 CSV"], |
|||
["统计分析", "mvn exec:java \"-Dexec.args=analyze\"", "生成两张 PNG 图表"], |
|||
["实验报告", "学号-姓名-期末实验报告.docx", "已生成并通过渲染检查"], |
|||
], [Inches(1.4), Inches(2.7), Inches(2.1)]) |
|||
|
|||
add_heading(doc, "六、实验总结") |
|||
add_para(doc, "本实验在已有项目基础上完成了期末实验要求的系统化改造。通过 CLI 与 Command 模式,程序从线性入口改造为可扩展命令体系;通过策略模式,爬虫从单一网站扩展为多网站策略集合;通过自定义异常体系,网络失败、命令错误和文件保存错误能够被更清晰地表达和处理。") |
|||
add_para(doc, "实验过程中坚持最小改动原则,原有 MVC、数据分析、图表生成和测试基础均被保留。最终程序能够完成数据爬取、文件保存、统计分析、图表输出和报告生成的完整流程,满足课程期末实验的功能性和结构性要求。") |
|||
|
|||
|
|||
def add_references(doc): |
|||
doc.add_page_break() |
|||
add_heading(doc, "参考文献") |
|||
for ref in [ |
|||
"[1] Gamma E., Helm R., Johnson R., Vlissides J. Design Patterns: Elements of Reusable Object-Oriented Software. Addison-Wesley, 1994.", |
|||
"[2] Spring Boot Reference Documentation. https://docs.spring.io/spring-boot/", |
|||
"[3] Jsoup: Java HTML Parser Documentation. https://jsoup.org/", |
|||
"[4] Apache Maven Project Documentation. https://maven.apache.org/", |
|||
"[5] Freeman E., Robson E. Head First Design Patterns. O'Reilly Media, 2020.", |
|||
]: |
|||
add_para(doc, ref, None, "宋体", 11, False, False) |
|||
|
|||
|
|||
def build(): |
|||
if not REFERENCE.exists(): |
|||
raise FileNotFoundError("reference_report.docx not found. Copy the reference report into project first.") |
|||
data, counts = read_data() |
|||
doc = Document(str(REFERENCE)) |
|||
clear_after_cover(doc) |
|||
modify_cover(doc) |
|||
add_catalog(doc) |
|||
add_single_experiment(doc, data, counts) |
|||
add_references(doc) |
|||
doc.save(OUT) |
|||
print(OUT) |
|||
|
|||
|
|||
if __name__ == "__main__": |
|||
build() |
|||
@ -1,83 +1,15 @@ |
|||
package com.movieratings; |
|||
|
|||
import com.fasterxml.jackson.databind.ObjectMapper; |
|||
import com.movieratings.analysis.DataAnalyzer; |
|||
import com.movieratings.crawler.MovieCrawler; |
|||
import com.movieratings.display.ResultDisplay; |
|||
import com.movieratings.model.Movie; |
|||
import com.movieratings.cli.CliApplication; |
|||
import com.movieratings.exception.MovieRatingsException; |
|||
|
|||
import java.io.File; |
|||
import java.io.IOException; |
|||
import java.util.DoubleSummaryStatistics; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
/** |
|||
* 项目入口类 |
|||
*/ |
|||
public class Main { |
|||
|
|||
public static void main(String[] args) { |
|||
System.out.println("=== 电影数据抓取与分析项目开始 ==="); |
|||
|
|||
// 1. 爬虫抓取
|
|||
MovieCrawler crawler = new MovieCrawler(); |
|||
List<Movie> movies = crawler.crawl(50); // 抓取前 50 条作为示例
|
|||
|
|||
if (movies.isEmpty()) { |
|||
System.err.println("未能成功抓取到电影数据,程序退出。"); |
|||
return; |
|||
} |
|||
|
|||
// 2. 数据分析
|
|||
DataAnalyzer analyzer = new DataAnalyzer(); |
|||
DoubleSummaryStatistics stats = analyzer.analyzeRatings(movies); |
|||
Map<String, Long> ratingCounts = analyzer.countMoviesByRatingRange(movies); |
|||
List<Movie> mostReviewed = analyzer.findMostReviewed(movies, 10); |
|||
|
|||
// 新增分析维度
|
|||
DataAnalyzer.CorrelationResult correlation = analyzer.analyzeYearRatingCorrelation(movies); |
|||
List<DataAnalyzer.DirectorStats> directorStats = analyzer.getTopDirectors(movies, 20); |
|||
|
|||
// 3. 数据展示
|
|||
ResultDisplay display = new ResultDisplay(); |
|||
System.out.println("\n--- 电影抓取结果展示 (前 10 条展示) ---"); |
|||
display.printMoviesTable(movies.subList(0, Math.min(10, movies.size()))); |
|||
|
|||
System.out.println("\n--- 基础统计分析报告 ---"); |
|||
System.out.printf("总计分析电影数量: %d\n", stats.getCount()); |
|||
System.out.printf("平均评分: %.2f\n", stats.getAverage()); |
|||
System.out.printf("最高评分: %.2f\n", stats.getMax()); |
|||
System.out.printf("最低评分: %.2f\n", stats.getMin()); |
|||
|
|||
System.out.println("\n--- 相关性分析 (年份 vs 评分) ---"); |
|||
System.out.printf("Pearson 相关系数: %.4f\n", correlation.getCoefficient()); |
|||
System.out.printf("显著性检验: %s\n", correlation.getSignificance()); |
|||
|
|||
// 打印导演排行榜
|
|||
display.printDirectorRanking(directorStats); |
|||
|
|||
System.out.println("\n--- 评价人数最多的前 10 部电影 ---"); |
|||
display.printMoviesTable(mostReviewed); |
|||
|
|||
// 4. 数据存储与导出
|
|||
saveAsJson(movies, "movies_data.json"); |
|||
display.exportToCSV(movies, "movies_analysis.csv"); |
|||
|
|||
// 5. 生成图表
|
|||
display.generateRatingChart(ratingCounts, "rating_distribution.png"); |
|||
display.generateScatterPlot(movies, "year_rating_scatter.png"); |
|||
|
|||
System.out.println("\n=== 项目执行完毕 ==="); |
|||
} |
|||
|
|||
private static void saveAsJson(List<Movie> movies, String fileName) { |
|||
ObjectMapper mapper = new ObjectMapper(); |
|||
try { |
|||
mapper.writerWithDefaultPrettyPrinter().writeValue(new File(fileName), movies); |
|||
System.out.println("数据已保存至 JSON 文件: " + fileName); |
|||
} catch (IOException e) { |
|||
System.err.println("保存 JSON 文件失败: " + e.getMessage()); |
|||
new CliApplication().run(args); |
|||
} catch (MovieRatingsException e) { |
|||
System.err.println(e.getMessage()); |
|||
System.exit(1); |
|||
} |
|||
} |
|||
} |
|||
|
|||
@ -0,0 +1,47 @@ |
|||
package com.movieratings.cli; |
|||
|
|||
import com.movieratings.analysis.DataAnalyzer; |
|||
import com.movieratings.cli.command.AllCommand; |
|||
import com.movieratings.cli.command.AnalyzeCommand; |
|||
import com.movieratings.cli.command.Command; |
|||
import com.movieratings.cli.command.CrawlCommand; |
|||
import com.movieratings.cli.command.ExportCommand; |
|||
import com.movieratings.cli.command.HelpCommand; |
|||
import com.movieratings.crawler.MovieCrawler; |
|||
import com.movieratings.display.ResultDisplay; |
|||
import com.movieratings.exception.CliException; |
|||
import com.movieratings.storage.DataStorageService; |
|||
|
|||
import java.util.Arrays; |
|||
import java.util.LinkedHashMap; |
|||
import java.util.Map; |
|||
|
|||
public class CliApplication { |
|||
private final Map<String, Command> commands = new LinkedHashMap<>(); |
|||
|
|||
public CliApplication() { |
|||
MovieCrawler crawler = new MovieCrawler(); |
|||
DataAnalyzer analyzer = new DataAnalyzer(); |
|||
ResultDisplay display = new ResultDisplay(); |
|||
DataStorageService storage = new DataStorageService(); |
|||
|
|||
register(new AllCommand(crawler, analyzer, display, storage)); |
|||
register(new CrawlCommand(crawler, analyzer, display, storage)); |
|||
register(new AnalyzeCommand(analyzer, display, storage)); |
|||
register(new ExportCommand(analyzer, display, storage)); |
|||
register(new HelpCommand(commands::values)); |
|||
} |
|||
|
|||
public void run(String[] args) { |
|||
String commandName = args.length == 0 ? "all" : args[0].toLowerCase(); |
|||
Command command = commands.get(commandName); |
|||
if (command == null) { |
|||
throw new CliException("Unknown command: " + commandName); |
|||
} |
|||
command.execute(args.length == 0 ? new String[]{"all"} : Arrays.copyOf(args, args.length)); |
|||
} |
|||
|
|||
private void register(Command command) { |
|||
commands.put(command.name(), command); |
|||
} |
|||
} |
|||
@ -0,0 +1,62 @@ |
|||
package com.movieratings.cli.command; |
|||
|
|||
import com.movieratings.analysis.DataAnalyzer; |
|||
import com.movieratings.display.ResultDisplay; |
|||
import com.movieratings.model.Movie; |
|||
import com.movieratings.storage.DataStorageService; |
|||
|
|||
import java.util.DoubleSummaryStatistics; |
|||
import java.util.List; |
|||
import java.util.Map; |
|||
|
|||
abstract class AbstractMovieCommand implements Command { |
|||
static final String JSON_FILE = "movies_data.json"; |
|||
static final String CSV_FILE = "movies_analysis.csv"; |
|||
static final String RATING_CHART = "rating_distribution.png"; |
|||
static final String YEAR_RATING_CHART = "year_rating_scatter.png"; |
|||
|
|||
final DataAnalyzer analyzer; |
|||
final ResultDisplay display; |
|||
final DataStorageService storage; |
|||
|
|||
AbstractMovieCommand(DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { |
|||
this.analyzer = analyzer; |
|||
this.display = display; |
|||
this.storage = storage; |
|||
} |
|||
|
|||
int parseLimit(String[] args, int defaultLimit) { |
|||
if (args.length < 2) { |
|||
return defaultLimit; |
|||
} |
|||
try { |
|||
return Integer.parseInt(args[1]); |
|||
} catch (NumberFormatException e) { |
|||
return defaultLimit; |
|||
} |
|||
} |
|||
|
|||
void printAnalysis(List<Movie> movies) { |
|||
DoubleSummaryStatistics stats = analyzer.analyzeRatings(movies); |
|||
Map<String, Long> ratingCounts = analyzer.countMoviesByRatingRange(movies); |
|||
List<Movie> mostReviewed = analyzer.findMostReviewed(movies, 10); |
|||
DataAnalyzer.CorrelationResult correlation = analyzer.analyzeYearRatingCorrelation(movies); |
|||
List<DataAnalyzer.DirectorStats> directorStats = analyzer.getTopDirectors(movies, 20); |
|||
|
|||
System.out.println("\n--- Movie sample ---"); |
|||
display.printMoviesTable(movies.subList(0, Math.min(10, movies.size()))); |
|||
System.out.println("\n--- Rating statistics ---"); |
|||
System.out.printf("Total movies: %d%n", stats.getCount()); |
|||
System.out.printf("Average rating: %.2f%n", stats.getAverage()); |
|||
System.out.printf("Max rating: %.2f%n", stats.getMax()); |
|||
System.out.printf("Min rating: %.2f%n", stats.getMin()); |
|||
System.out.println("\n--- Year-rating correlation ---"); |
|||
System.out.printf("Pearson coefficient: %.4f%n", correlation.getCoefficient()); |
|||
System.out.printf("Significance: %s%n", correlation.getSignificance()); |
|||
display.printDirectorRanking(directorStats); |
|||
System.out.println("\n--- Most reviewed movies ---"); |
|||
display.printMoviesTable(mostReviewed); |
|||
display.generateRatingChart(ratingCounts, RATING_CHART); |
|||
display.generateScatterPlot(movies, YEAR_RATING_CHART); |
|||
} |
|||
} |
|||
@ -0,0 +1,41 @@ |
|||
package com.movieratings.cli.command; |
|||
|
|||
import com.movieratings.analysis.DataAnalyzer; |
|||
import com.movieratings.crawler.MovieCrawler; |
|||
import com.movieratings.display.ResultDisplay; |
|||
import com.movieratings.exception.CliException; |
|||
import com.movieratings.model.Movie; |
|||
import com.movieratings.storage.DataStorageService; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class AllCommand extends AbstractMovieCommand { |
|||
private final MovieCrawler crawler; |
|||
|
|||
public AllCommand(MovieCrawler crawler, DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { |
|||
super(analyzer, display, storage); |
|||
this.crawler = crawler; |
|||
} |
|||
|
|||
@Override |
|||
public String name() { |
|||
return "all"; |
|||
} |
|||
|
|||
@Override |
|||
public String description() { |
|||
return "Crawl, save, analyze, and generate charts."; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
int limit = parseLimit(args, 60); |
|||
List<Movie> movies = crawler.crawl(limit); |
|||
if (movies.isEmpty()) { |
|||
throw new CliException("No movie data was crawled."); |
|||
} |
|||
storage.saveAsJson(movies, JSON_FILE); |
|||
storage.exportToCsv(movies, CSV_FILE); |
|||
printAnalysis(movies); |
|||
} |
|||
} |
|||
@ -0,0 +1,30 @@ |
|||
package com.movieratings.cli.command; |
|||
|
|||
import com.movieratings.analysis.DataAnalyzer; |
|||
import com.movieratings.display.ResultDisplay; |
|||
import com.movieratings.model.Movie; |
|||
import com.movieratings.storage.DataStorageService; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class AnalyzeCommand extends AbstractMovieCommand { |
|||
public AnalyzeCommand(DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { |
|||
super(analyzer, display, storage); |
|||
} |
|||
|
|||
@Override |
|||
public String name() { |
|||
return "analyze"; |
|||
} |
|||
|
|||
@Override |
|||
public String description() { |
|||
return "Load movies_data.json, print analysis, and generate charts."; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
List<Movie> movies = storage.loadFromJson(JSON_FILE); |
|||
printAnalysis(movies); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.movieratings.cli.command; |
|||
|
|||
import com.movieratings.exception.CliException; |
|||
|
|||
public interface Command { |
|||
String name(); |
|||
|
|||
String description(); |
|||
|
|||
void execute(String[] args) throws CliException; |
|||
} |
|||
@ -0,0 +1,41 @@ |
|||
package com.movieratings.cli.command; |
|||
|
|||
import com.movieratings.analysis.DataAnalyzer; |
|||
import com.movieratings.crawler.MovieCrawler; |
|||
import com.movieratings.display.ResultDisplay; |
|||
import com.movieratings.exception.CliException; |
|||
import com.movieratings.model.Movie; |
|||
import com.movieratings.storage.DataStorageService; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class CrawlCommand extends AbstractMovieCommand { |
|||
private final MovieCrawler crawler; |
|||
|
|||
public CrawlCommand(MovieCrawler crawler, DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { |
|||
super(analyzer, display, storage); |
|||
this.crawler = crawler; |
|||
} |
|||
|
|||
@Override |
|||
public String name() { |
|||
return "crawl"; |
|||
} |
|||
|
|||
@Override |
|||
public String description() { |
|||
return "Crawl movies from configured websites and save JSON/CSV files."; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
int limit = parseLimit(args, 60); |
|||
List<Movie> movies = crawler.crawl(limit); |
|||
if (movies.isEmpty()) { |
|||
throw new CliException("No movie data was crawled."); |
|||
} |
|||
storage.saveAsJson(movies, JSON_FILE); |
|||
storage.exportToCsv(movies, CSV_FILE); |
|||
System.out.println("Crawled " + movies.size() + " movies from " + crawler.getSiteNames().size() + " sites."); |
|||
} |
|||
} |
|||
@ -0,0 +1,30 @@ |
|||
package com.movieratings.cli.command; |
|||
|
|||
import com.movieratings.analysis.DataAnalyzer; |
|||
import com.movieratings.display.ResultDisplay; |
|||
import com.movieratings.model.Movie; |
|||
import com.movieratings.storage.DataStorageService; |
|||
|
|||
import java.util.List; |
|||
|
|||
public class ExportCommand extends AbstractMovieCommand { |
|||
public ExportCommand(DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { |
|||
super(analyzer, display, storage); |
|||
} |
|||
|
|||
@Override |
|||
public String name() { |
|||
return "export"; |
|||
} |
|||
|
|||
@Override |
|||
public String description() { |
|||
return "Export movies_data.json to CSV."; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
List<Movie> movies = storage.loadFromJson(JSON_FILE); |
|||
storage.exportToCsv(movies, CSV_FILE); |
|||
} |
|||
} |
|||
@ -0,0 +1,30 @@ |
|||
package com.movieratings.cli.command; |
|||
|
|||
import java.util.Collection; |
|||
import java.util.function.Supplier; |
|||
|
|||
public class HelpCommand implements Command { |
|||
private final Supplier<Collection<Command>> commands; |
|||
|
|||
public HelpCommand(Supplier<Collection<Command>> commands) { |
|||
this.commands = commands; |
|||
} |
|||
|
|||
@Override |
|||
public String name() { |
|||
return "help"; |
|||
} |
|||
|
|||
@Override |
|||
public String description() { |
|||
return "Show available CLI commands."; |
|||
} |
|||
|
|||
@Override |
|||
public void execute(String[] args) { |
|||
System.out.println("Usage: mvn exec:java -Dexec.args=\"<command> [limit]\""); |
|||
for (Command command : commands.get()) { |
|||
System.out.printf(" %-8s %s%n", command.name(), command.description()); |
|||
} |
|||
} |
|||
} |
|||
@ -1,128 +1,73 @@ |
|||
package com.movieratings.crawler; |
|||
|
|||
import com.movieratings.crawler.strategy.CrawlerStrategy; |
|||
import com.movieratings.crawler.strategy.BoxOfficeMojoCrawlerStrategy; |
|||
import com.movieratings.crawler.strategy.DoubanTop250CrawlerStrategy; |
|||
import com.movieratings.crawler.strategy.ImdbTop250CrawlerStrategy; |
|||
import com.movieratings.crawler.strategy.LetterboxdTop250CrawlerStrategy; |
|||
import com.movieratings.crawler.strategy.TheNumbersCrawlerStrategy; |
|||
import com.movieratings.crawler.strategy.WikipediaGrossingFilmsCrawlerStrategy; |
|||
import com.movieratings.exception.CrawlerException; |
|||
import com.movieratings.model.Movie; |
|||
import org.jsoup.Jsoup; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
import org.springframework.stereotype.Component; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.LinkedHashMap; |
|||
import java.util.List; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
import java.util.Map; |
|||
|
|||
/** |
|||
* 电影数据爬虫类 - 抓取豆瓣 Top 250 |
|||
*/ |
|||
@Component |
|||
public class MovieCrawler { |
|||
private static final String BASE_URL = "https://movie.douban.com/top250"; |
|||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"; |
|||
private final List<CrawlerStrategy> strategies; |
|||
|
|||
public MovieCrawler() { |
|||
this(List.of( |
|||
new DoubanTop250CrawlerStrategy(), |
|||
new ImdbTop250CrawlerStrategy(), |
|||
new LetterboxdTop250CrawlerStrategy(), |
|||
new BoxOfficeMojoCrawlerStrategy(), |
|||
new TheNumbersCrawlerStrategy(), |
|||
new WikipediaGrossingFilmsCrawlerStrategy() |
|||
)); |
|||
} |
|||
|
|||
public MovieCrawler(List<CrawlerStrategy> strategies) { |
|||
this.strategies = List.copyOf(strategies); |
|||
} |
|||
|
|||
public List<Movie> crawl(int limit) { |
|||
List<Movie> movies = new ArrayList<>(); |
|||
int start = 0; |
|||
|
|||
while (movies.size() < limit && start < 250) { |
|||
String url = BASE_URL + "?start=" + start + "&filter="; |
|||
System.out.println("正在抓取: " + url); |
|||
|
|||
if (limit <= 0) { |
|||
throw new CrawlerException("Crawl limit must be greater than 0."); |
|||
} |
|||
|
|||
Map<String, Movie> movies = new LinkedHashMap<>(); |
|||
int perSiteLimit = Math.max(1, (int) Math.ceil((double) limit / strategies.size())); |
|||
|
|||
for (CrawlerStrategy strategy : strategies) { |
|||
System.out.println("Crawling site: " + strategy.getSiteName()); |
|||
try { |
|||
Document doc = Jsoup.connect(url) |
|||
.userAgent(USER_AGENT) |
|||
.get(); |
|||
|
|||
Elements items = doc.select(".item"); |
|||
if (items.isEmpty()) break; |
|||
|
|||
for (Element item : items) { |
|||
if (movies.size() >= limit) break; |
|||
|
|||
try { |
|||
Movie movie = parseMovie(item); |
|||
movies.add(movie); |
|||
} catch (Exception e) { |
|||
System.err.println("解析单条电影数据失败: " + e.getMessage()); |
|||
List<Movie> siteMovies = strategy.crawl(perSiteLimit); |
|||
for (Movie movie : siteMovies) { |
|||
if (movie.getTitle() == null || movie.getTitle().isBlank()) { |
|||
continue; |
|||
} |
|||
movies.putIfAbsent(normalizedKey(movie), movie); |
|||
} |
|||
|
|||
start += 25; |
|||
// 控制请求频率
|
|||
Thread.sleep(1000); |
|||
} catch (IOException | InterruptedException e) { |
|||
System.err.println("网络请求失败: " + e.getMessage()); |
|||
break; |
|||
} catch (CrawlerException e) { |
|||
System.err.println(e.getMessage()); |
|||
} |
|||
} |
|||
|
|||
return movies; |
|||
} |
|||
|
|||
private Movie parseMovie(Element item) { |
|||
Movie movie = new Movie(); |
|||
|
|||
// 排名
|
|||
movie.setRank(Integer.parseInt(item.select(".pic em").text())); |
|||
|
|||
// 标题
|
|||
movie.setTitle(item.select(".title").first().text()); |
|||
|
|||
// 评分
|
|||
movie.setRating(Double.parseDouble(item.select(".rating_num").text())); |
|||
return new ArrayList<>(movies.values()).stream() |
|||
.limit(limit) |
|||
.toList(); |
|||
} |
|||
|
|||
// 海报图片
|
|||
movie.setPosterUrl(item.select(".pic img").attr("src")); |
|||
|
|||
// 作品类型 - 默认均为电影
|
|||
movie.setType("电影"); |
|||
|
|||
// 解析导演和年份
|
|||
String bdText = item.select(".bd p").first().text(); |
|||
String[] parts = bdText.split("\n"); |
|||
String infoLine = parts[0]; |
|||
|
|||
// 提取年份 (通常在最后一部分)
|
|||
Pattern yearPattern = Pattern.compile("\\d{4}"); |
|||
Matcher matcher = yearPattern.matcher(infoLine); |
|||
if (matcher.find()) { |
|||
movie.setReleaseYear(Integer.parseInt(matcher.group())); |
|||
} |
|||
|
|||
// 提取导演和国家
|
|||
if (infoLine.contains("导演: ")) { |
|||
int start = infoLine.indexOf("导演: ") + 4; |
|||
int end = infoLine.indexOf(" ", start); |
|||
if (end == -1) end = infoLine.length(); |
|||
movie.setDirector(infoLine.substring(start, end).trim()); |
|||
} |
|||
public List<String> getSiteNames() { |
|||
return strategies.stream().map(CrawlerStrategy::getSiteName).toList(); |
|||
} |
|||
|
|||
// 国家通常在最后一部分,如 / 1994 / 美国 / 犯罪 剧情
|
|||
String[] infoParts = infoLine.split(" / "); |
|||
if (infoParts.length >= 3) { |
|||
movie.setCountry(infoParts[infoParts.length - 2].trim()); |
|||
} |
|||
|
|||
// 评价人数
|
|||
Element starDiv = item.selectFirst(".star"); |
|||
if (starDiv != null) { |
|||
String starText = starDiv.text(); |
|||
// 匹配包含逗号的数字,如 "2,600,000人评价"
|
|||
Pattern reviewPattern = Pattern.compile("([\\d,]+)人评价"); |
|||
Matcher reviewMatcher = reviewPattern.matcher(starText); |
|||
if (reviewMatcher.find()) { |
|||
String countStr = reviewMatcher.group(1).replace(",", ""); |
|||
int count = Integer.parseInt(countStr); |
|||
movie.setReviewCount(count); |
|||
// 模拟票房 (使用评价人数 * 某个系数来生成示例数据)
|
|||
movie.setBoxOffice(count * 0.5 + (Math.random() * 100)); |
|||
} |
|||
} |
|||
|
|||
// 简评
|
|||
movie.setQuote(item.select(".inq").text()); |
|||
|
|||
return movie; |
|||
private String normalizedKey(Movie movie) { |
|||
return movie.getTitle().trim().toLowerCase() + "#" + movie.getReleaseYear(); |
|||
} |
|||
} |
|||
|
|||
@ -0,0 +1,49 @@ |
|||
package com.movieratings.crawler.strategy; |
|||
|
|||
import org.jsoup.Connection; |
|||
import org.jsoup.Jsoup; |
|||
|
|||
abstract class AbstractCrawlerStrategy implements CrawlerStrategy { |
|||
private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " |
|||
+ "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"; |
|||
|
|||
protected Connection connection(String url) { |
|||
return Jsoup.connect(url) |
|||
.userAgent(USER_AGENT) |
|||
.timeout(15000) |
|||
.ignoreHttpErrors(true); |
|||
} |
|||
|
|||
protected int parseYear(String text) { |
|||
if (text == null) { |
|||
return 0; |
|||
} |
|||
java.util.regex.Matcher matcher = java.util.regex.Pattern.compile("(19|20)\\d{2}").matcher(text); |
|||
return matcher.find() ? Integer.parseInt(matcher.group()) : 0; |
|||
} |
|||
|
|||
protected int parseCount(String text) { |
|||
if (text == null || text.isBlank()) { |
|||
return 0; |
|||
} |
|||
String normalized = text.replace("(", "") |
|||
.replace(")", "") |
|||
.replace(",", "") |
|||
.trim() |
|||
.toUpperCase(); |
|||
java.util.regex.Matcher matcher = java.util.regex.Pattern.compile("([0-9]+(?:\\.[0-9]+)?)([KM]?)").matcher(normalized); |
|||
if (!matcher.find()) { |
|||
return 0; |
|||
} |
|||
double value = Double.parseDouble(matcher.group(1)); |
|||
return switch (matcher.group(2)) { |
|||
case "M" -> (int) (value * 1_000_000); |
|||
case "K" -> (int) (value * 1_000); |
|||
default -> (int) value; |
|||
}; |
|||
} |
|||
|
|||
protected double simulatedBoxOffice(int reviewCount, int rank) { |
|||
return reviewCount * 0.5 + Math.max(0, 250 - rank); |
|||
} |
|||
} |
|||
@ -0,0 +1,78 @@ |
|||
package com.movieratings.crawler.strategy; |
|||
|
|||
import com.movieratings.exception.CrawlerException; |
|||
import com.movieratings.model.Movie; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class BoxOfficeMojoCrawlerStrategy extends AbstractCrawlerStrategy { |
|||
private static final String URL = "https://www.boxofficemojo.com/chart/top_lifetime_gross/"; |
|||
|
|||
@Override |
|||
public String getSiteName() { |
|||
return "Box Office Mojo"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Movie> crawl(int limit) { |
|||
try { |
|||
Document doc = connection(URL).get(); |
|||
List<Movie> movies = new ArrayList<>(); |
|||
for (Element row : doc.select("tr")) { |
|||
if (movies.size() >= limit) { |
|||
break; |
|||
} |
|||
Elements cols = row.select("td"); |
|||
if (cols.size() < 4) { |
|||
continue; |
|||
} |
|||
Movie movie = parseRow(cols); |
|||
if (movie.getTitle() != null && !movie.getTitle().isBlank()) { |
|||
movies.add(movie); |
|||
} |
|||
} |
|||
return movies; |
|||
} catch (IOException e) { |
|||
throw new CrawlerException("Failed to crawl " + getSiteName(), e); |
|||
} |
|||
} |
|||
|
|||
private Movie parseRow(Elements cols) { |
|||
int rank = parseCount(cols.get(0).text()); |
|||
Movie movie = new Movie(); |
|||
movie.setRank(rank); |
|||
movie.setTitle(cols.get(1).text().trim()); |
|||
movie.setBoxOffice(parseMoney(cols.get(2).text())); |
|||
movie.setReleaseYear(parseYear(cols.get(3).text())); |
|||
movie.setRating(estimateRating(rank)); |
|||
movie.setDirector("Unknown"); |
|||
movie.setCountry("United States"); |
|||
movie.setReviewCount(0); |
|||
movie.setPosterUrl(""); |
|||
movie.setQuote("Box Office Mojo lifetime gross chart entry"); |
|||
movie.setType("Movie"); |
|||
movie.setSourceSite(getSiteName()); |
|||
return movie; |
|||
} |
|||
|
|||
private double parseMoney(String value) { |
|||
if (value == null || value.isBlank()) { |
|||
return 0.0; |
|||
} |
|||
String normalized = value.replace("$", "").replace(",", "").trim(); |
|||
try { |
|||
return Double.parseDouble(normalized); |
|||
} catch (NumberFormatException e) { |
|||
return 0.0; |
|||
} |
|||
} |
|||
|
|||
private double estimateRating(int rank) { |
|||
return Math.max(7.0, 8.8 - rank * 0.01); |
|||
} |
|||
} |
|||
@ -0,0 +1,12 @@ |
|||
package com.movieratings.crawler.strategy; |
|||
|
|||
import com.movieratings.exception.CrawlerException; |
|||
import com.movieratings.model.Movie; |
|||
|
|||
import java.util.List; |
|||
|
|||
public interface CrawlerStrategy { |
|||
String getSiteName(); |
|||
|
|||
List<Movie> crawl(int limit) throws CrawlerException; |
|||
} |
|||
@ -0,0 +1,119 @@ |
|||
package com.movieratings.crawler.strategy; |
|||
|
|||
import com.movieratings.exception.CrawlerException; |
|||
import com.movieratings.model.Movie; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
import java.util.regex.Matcher; |
|||
import java.util.regex.Pattern; |
|||
|
|||
public class DoubanTop250CrawlerStrategy extends AbstractCrawlerStrategy { |
|||
private static final String BASE_URL = "https://movie.douban.com/top250"; |
|||
|
|||
@Override |
|||
public String getSiteName() { |
|||
return "Douban Top 250"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Movie> crawl(int limit) { |
|||
List<Movie> movies = new ArrayList<>(); |
|||
int start = 0; |
|||
|
|||
while (movies.size() < limit && start < 250) { |
|||
String url = BASE_URL + "?start=" + start + "&filter="; |
|||
try { |
|||
Document doc = connection(url).get(); |
|||
Elements items = doc.select(".item"); |
|||
if (items.isEmpty()) { |
|||
break; |
|||
} |
|||
for (Element item : items) { |
|||
if (movies.size() >= limit) { |
|||
break; |
|||
} |
|||
movies.add(parseMovie(item)); |
|||
} |
|||
start += 25; |
|||
} catch (IOException e) { |
|||
throw new CrawlerException("Failed to crawl " + getSiteName(), e); |
|||
} |
|||
} |
|||
|
|||
return movies; |
|||
} |
|||
|
|||
private Movie parseMovie(Element item) { |
|||
Movie movie = new Movie(); |
|||
movie.setRank(parseInteger(item.select(".pic em").text(), 0)); |
|||
Element title = item.select(".title").first(); |
|||
movie.setTitle(title == null ? "Unknown" : title.text()); |
|||
movie.setRating(parseDouble(item.select(".rating_num").text(), 0.0)); |
|||
movie.setPosterUrl(item.select(".pic img").attr("src")); |
|||
movie.setType("Movie"); |
|||
movie.setSourceSite(getSiteName()); |
|||
|
|||
String infoLine = item.select(".bd p").isEmpty() ? "" : item.select(".bd p").first().text(); |
|||
movie.setReleaseYear(parseYear(infoLine)); |
|||
movie.setDirector(parseDirector(infoLine)); |
|||
movie.setCountry(parseCountry(infoLine)); |
|||
|
|||
String starText = item.select(".star").text(); |
|||
Matcher reviewMatcher = Pattern.compile("([\\d,]+)").matcher(starText); |
|||
if (reviewMatcher.find()) { |
|||
int reviewCount = parseInteger(reviewMatcher.group(1).replace(",", ""), 0); |
|||
movie.setReviewCount(reviewCount); |
|||
movie.setBoxOffice(simulatedBoxOffice(reviewCount, movie.getRank())); |
|||
} |
|||
movie.setQuote(item.select(".inq").text()); |
|||
return movie; |
|||
} |
|||
|
|||
private String parseDirector(String infoLine) { |
|||
int marker = infoLine.indexOf("导演:"); |
|||
if (marker < 0) { |
|||
marker = infoLine.indexOf("Director:"); |
|||
} |
|||
if (marker < 0) { |
|||
return "Unknown"; |
|||
} |
|||
int start = infoLine.indexOf(':', marker); |
|||
if (start < 0) { |
|||
return "Unknown"; |
|||
} |
|||
int end = infoLine.indexOf(" ", start); |
|||
if (end < 0) { |
|||
end = infoLine.indexOf(" / ", start); |
|||
} |
|||
if (end < 0) { |
|||
end = infoLine.length(); |
|||
} |
|||
return infoLine.substring(start + 1, end).trim(); |
|||
} |
|||
|
|||
private String parseCountry(String infoLine) { |
|||
String[] parts = infoLine.split(" / "); |
|||
return parts.length >= 3 ? parts[parts.length - 2].trim() : "Unknown"; |
|||
} |
|||
|
|||
private int parseInteger(String value, int fallback) { |
|||
try { |
|||
return value == null || value.isBlank() ? fallback : Integer.parseInt(value.trim()); |
|||
} catch (NumberFormatException e) { |
|||
return fallback; |
|||
} |
|||
} |
|||
|
|||
private double parseDouble(String value, double fallback) { |
|||
try { |
|||
return value == null || value.isBlank() ? fallback : Double.parseDouble(value.trim()); |
|||
} catch (NumberFormatException e) { |
|||
return fallback; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,83 @@ |
|||
package com.movieratings.crawler.strategy; |
|||
|
|||
import com.movieratings.exception.CrawlerException; |
|||
import com.movieratings.model.Movie; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class ImdbTop250CrawlerStrategy extends AbstractCrawlerStrategy { |
|||
private static final String URL = "https://www.imdb.com/chart/top/"; |
|||
|
|||
@Override |
|||
public String getSiteName() { |
|||
return "IMDb Top 250"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Movie> crawl(int limit) { |
|||
try { |
|||
Document doc = connection(URL).get(); |
|||
Elements items = doc.select("li.ipc-metadata-list-summary-item"); |
|||
if (items.isEmpty()) { |
|||
items = doc.select(".lister-list tr"); |
|||
} |
|||
List<Movie> movies = new ArrayList<>(); |
|||
int rank = 1; |
|||
for (Element item : items) { |
|||
if (movies.size() >= limit) { |
|||
break; |
|||
} |
|||
Movie movie = parseMovie(item, rank); |
|||
if (movie.getTitle() != null && !movie.getTitle().isBlank()) { |
|||
movies.add(movie); |
|||
rank++; |
|||
} |
|||
} |
|||
return movies; |
|||
} catch (IOException e) { |
|||
throw new CrawlerException("Failed to crawl " + getSiteName(), e); |
|||
} |
|||
} |
|||
|
|||
private Movie parseMovie(Element item, int rank) { |
|||
Movie movie = new Movie(); |
|||
movie.setRank(rank); |
|||
movie.setTitle(parseTitle(item)); |
|||
movie.setRating(parseRating(item)); |
|||
movie.setReleaseYear(parseYear(item.text())); |
|||
movie.setDirector("Unknown"); |
|||
movie.setCountry("Unknown"); |
|||
movie.setReviewCount(parseCount(item.select(".ipc-rating-star--voteCount").text())); |
|||
movie.setPosterUrl(item.select("img").attr("src")); |
|||
movie.setQuote("IMDb chart entry"); |
|||
movie.setType("Movie"); |
|||
movie.setSourceSite(getSiteName()); |
|||
movie.setBoxOffice(simulatedBoxOffice(movie.getReviewCount(), rank)); |
|||
return movie; |
|||
} |
|||
|
|||
private String parseTitle(Element item) { |
|||
String title = item.select("h3.ipc-title__text").text(); |
|||
if (title.isBlank()) { |
|||
title = item.select(".titleColumn a").text(); |
|||
} |
|||
return title.replaceFirst("^\\d+\\.\\s*", "").trim(); |
|||
} |
|||
|
|||
private double parseRating(Element item) { |
|||
String rating = item.select(".ipc-rating-star--rating").text(); |
|||
if (rating.isBlank()) { |
|||
rating = item.select(".imdbRating strong").text(); |
|||
} |
|||
try { |
|||
return rating.isBlank() ? 0.0 : Double.parseDouble(rating); |
|||
} catch (NumberFormatException e) { |
|||
return 0.0; |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,67 @@ |
|||
package com.movieratings.crawler.strategy; |
|||
|
|||
import com.movieratings.exception.CrawlerException; |
|||
import com.movieratings.model.Movie; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class LetterboxdTop250CrawlerStrategy extends AbstractCrawlerStrategy { |
|||
private static final String URL = "https://letterboxd.com/dave/list/official-top-250-narrative-feature-films/"; |
|||
|
|||
@Override |
|||
public String getSiteName() { |
|||
return "Letterboxd Top 250"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Movie> crawl(int limit) { |
|||
try { |
|||
Document doc = connection(URL).get(); |
|||
Elements posters = doc.select(".poster-container .film-poster, .film-poster"); |
|||
if (posters.isEmpty()) { |
|||
posters = doc.select("a[href^=/film/] img[alt], a[href*=/film/] img[alt]"); |
|||
} |
|||
List<Movie> movies = new ArrayList<>(); |
|||
int rank = 1; |
|||
for (Element poster : posters) { |
|||
if (movies.size() >= limit) { |
|||
break; |
|||
} |
|||
String title = poster.attr("data-film-name"); |
|||
if (title == null || title.isBlank()) { |
|||
title = poster.attr("alt"); |
|||
} |
|||
if (title == null || title.isBlank()) { |
|||
continue; |
|||
} |
|||
Movie movie = new Movie(); |
|||
movie.setRank(rank); |
|||
movie.setTitle(title.trim()); |
|||
movie.setReleaseYear(parseYear(poster.attr("data-film-release-year"))); |
|||
movie.setRating(estimateRating(rank)); |
|||
movie.setDirector("Unknown"); |
|||
movie.setCountry("Unknown"); |
|||
movie.setReviewCount(0); |
|||
movie.setPosterUrl(poster.select("img").attr("src")); |
|||
movie.setQuote("Letterboxd chart entry"); |
|||
movie.setType("Movie"); |
|||
movie.setSourceSite(getSiteName()); |
|||
movie.setBoxOffice(simulatedBoxOffice(0, rank)); |
|||
movies.add(movie); |
|||
rank++; |
|||
} |
|||
return movies; |
|||
} catch (IOException e) { |
|||
throw new CrawlerException("Failed to crawl " + getSiteName(), e); |
|||
} |
|||
} |
|||
|
|||
private double estimateRating(int rank) { |
|||
return Math.max(8.0, 9.5 - rank * 0.01); |
|||
} |
|||
} |
|||
@ -0,0 +1,78 @@ |
|||
package com.movieratings.crawler.strategy; |
|||
|
|||
import com.movieratings.exception.CrawlerException; |
|||
import com.movieratings.model.Movie; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class TheNumbersCrawlerStrategy extends AbstractCrawlerStrategy { |
|||
private static final String URL = "https://www.the-numbers.com/box-office-records/worldwide/all-movies/cumulative/all-time"; |
|||
|
|||
@Override |
|||
public String getSiteName() { |
|||
return "The Numbers"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Movie> crawl(int limit) { |
|||
try { |
|||
Document doc = connection(URL).get(); |
|||
List<Movie> movies = new ArrayList<>(); |
|||
for (Element row : doc.select("tr")) { |
|||
if (movies.size() >= limit) { |
|||
break; |
|||
} |
|||
Elements cols = row.select("td"); |
|||
if (cols.size() < 4) { |
|||
continue; |
|||
} |
|||
Movie movie = parseRow(cols); |
|||
if (movie.getTitle() != null && !movie.getTitle().isBlank()) { |
|||
movies.add(movie); |
|||
} |
|||
} |
|||
return movies; |
|||
} catch (IOException e) { |
|||
throw new CrawlerException("Failed to crawl " + getSiteName(), e); |
|||
} |
|||
} |
|||
|
|||
private Movie parseRow(Elements cols) { |
|||
int rank = parseCount(cols.get(0).text()); |
|||
Movie movie = new Movie(); |
|||
movie.setRank(rank); |
|||
movie.setReleaseYear(parseYear(cols.get(1).text())); |
|||
movie.setTitle(cols.get(2).text().trim()); |
|||
movie.setBoxOffice(parseMoney(cols.get(3).text())); |
|||
movie.setRating(estimateRating(rank)); |
|||
movie.setDirector("Unknown"); |
|||
movie.setCountry("Multiple"); |
|||
movie.setReviewCount(0); |
|||
movie.setPosterUrl(""); |
|||
movie.setQuote("The Numbers all-time worldwide box office entry"); |
|||
movie.setType("Movie"); |
|||
movie.setSourceSite(getSiteName()); |
|||
return movie; |
|||
} |
|||
|
|||
private double parseMoney(String value) { |
|||
if (value == null || value.isBlank()) { |
|||
return 0.0; |
|||
} |
|||
String normalized = value.replace("$", "").replace(",", "").trim(); |
|||
try { |
|||
return Double.parseDouble(normalized); |
|||
} catch (NumberFormatException e) { |
|||
return 0.0; |
|||
} |
|||
} |
|||
|
|||
private double estimateRating(int rank) { |
|||
return Math.max(7.0, 8.85 - rank * 0.01); |
|||
} |
|||
} |
|||
@ -0,0 +1,86 @@ |
|||
package com.movieratings.crawler.strategy; |
|||
|
|||
import com.movieratings.exception.CrawlerException; |
|||
import com.movieratings.model.Movie; |
|||
import org.jsoup.nodes.Document; |
|||
import org.jsoup.nodes.Element; |
|||
import org.jsoup.select.Elements; |
|||
|
|||
import java.io.IOException; |
|||
import java.util.ArrayList; |
|||
import java.util.List; |
|||
|
|||
public class WikipediaGrossingFilmsCrawlerStrategy extends AbstractCrawlerStrategy { |
|||
private static final String URL = "https://en.wikipedia.org/wiki/List_of_highest-grossing_films"; |
|||
|
|||
@Override |
|||
public String getSiteName() { |
|||
return "Wikipedia Highest-Grossing Films"; |
|||
} |
|||
|
|||
@Override |
|||
public List<Movie> crawl(int limit) { |
|||
try { |
|||
Document doc = connection(URL).get(); |
|||
List<Movie> movies = new ArrayList<>(); |
|||
for (Element row : doc.select("table.wikitable tr")) { |
|||
if (movies.size() >= limit) { |
|||
break; |
|||
} |
|||
Elements cols = row.select("td"); |
|||
if (cols.size() < 5) { |
|||
continue; |
|||
} |
|||
Movie movie = parseRow(cols); |
|||
if (movie.getTitle() != null && !movie.getTitle().isBlank()) { |
|||
movies.add(movie); |
|||
} |
|||
} |
|||
return movies; |
|||
} catch (IOException e) { |
|||
throw new CrawlerException("Failed to crawl " + getSiteName(), e); |
|||
} |
|||
} |
|||
|
|||
private Movie parseRow(Elements cols) { |
|||
int rank = parseCount(cols.get(0).text()); |
|||
String title = cols.get(2).select("i a, a").text(); |
|||
if (title.isBlank()) { |
|||
title = cols.get(2).text(); |
|||
} |
|||
|
|||
Movie movie = new Movie(); |
|||
movie.setRank(rank); |
|||
movie.setTitle(title.trim()); |
|||
movie.setBoxOffice(parseMoney(cols.get(3).text())); |
|||
movie.setReleaseYear(parseYear(cols.get(4).text())); |
|||
movie.setRating(estimateRating(rank)); |
|||
movie.setDirector("Unknown"); |
|||
movie.setCountry("Multiple"); |
|||
movie.setReviewCount(0); |
|||
movie.setPosterUrl(""); |
|||
movie.setQuote("Wikipedia highest-grossing films table entry"); |
|||
movie.setType("Movie"); |
|||
movie.setSourceSite(getSiteName()); |
|||
return movie; |
|||
} |
|||
|
|||
private double parseMoney(String value) { |
|||
if (value == null || value.isBlank()) { |
|||
return 0.0; |
|||
} |
|||
String normalized = value.replaceAll("[^0-9.]", ""); |
|||
if (normalized.isBlank()) { |
|||
return 0.0; |
|||
} |
|||
try { |
|||
return Double.parseDouble(normalized); |
|||
} catch (NumberFormatException e) { |
|||
return 0.0; |
|||
} |
|||
} |
|||
|
|||
private double estimateRating(int rank) { |
|||
return Math.max(7.0, 8.9 - rank * 0.01); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.movieratings.exception; |
|||
|
|||
public class CliException extends MovieRatingsException { |
|||
public CliException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public CliException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.movieratings.exception; |
|||
|
|||
public class CrawlerException extends MovieRatingsException { |
|||
public CrawlerException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public CrawlerException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.movieratings.exception; |
|||
|
|||
public class DataStorageException extends MovieRatingsException { |
|||
public DataStorageException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public DataStorageException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,11 @@ |
|||
package com.movieratings.exception; |
|||
|
|||
public class MovieRatingsException extends RuntimeException { |
|||
public MovieRatingsException(String message) { |
|||
super(message); |
|||
} |
|||
|
|||
public MovieRatingsException(String message, Throwable cause) { |
|||
super(message, cause); |
|||
} |
|||
} |
|||
@ -0,0 +1,64 @@ |
|||
package com.movieratings.storage; |
|||
|
|||
import com.fasterxml.jackson.core.type.TypeReference; |
|||
import com.fasterxml.jackson.databind.ObjectMapper; |
|||
import com.movieratings.exception.DataStorageException; |
|||
import com.movieratings.model.Movie; |
|||
|
|||
import java.io.File; |
|||
import java.io.FileWriter; |
|||
import java.io.IOException; |
|||
import java.nio.charset.StandardCharsets; |
|||
import java.util.List; |
|||
|
|||
public class DataStorageService { |
|||
private final ObjectMapper mapper = new ObjectMapper(); |
|||
|
|||
public void saveAsJson(List<Movie> movies, String fileName) { |
|||
try { |
|||
mapper.writerWithDefaultPrettyPrinter().writeValue(new File(fileName), movies); |
|||
System.out.println("Saved JSON data to " + fileName); |
|||
} catch (IOException e) { |
|||
throw new DataStorageException("Failed to save JSON file: " + fileName, e); |
|||
} |
|||
} |
|||
|
|||
public List<Movie> loadFromJson(String fileName) { |
|||
try { |
|||
return mapper.readValue(new File(fileName), new TypeReference<>() {}); |
|||
} catch (IOException e) { |
|||
throw new DataStorageException("Failed to load JSON file: " + fileName, e); |
|||
} |
|||
} |
|||
|
|||
public void exportToCsv(List<Movie> movies, String fileName) { |
|||
try (FileWriter writer = new FileWriter(fileName, StandardCharsets.UTF_8)) { |
|||
writer.write("rank,title,year,rating,director,country,reviewCount,boxOffice,type,posterUrl,sourceSite\n"); |
|||
for (Movie movie : movies) { |
|||
writer.write(String.format("%d,%s,%d,%.1f,%s,%s,%d,%.2f,%s,%s,%s%n", |
|||
movie.getRank(), |
|||
csv(movie.getTitle()), |
|||
movie.getReleaseYear(), |
|||
movie.getRating(), |
|||
csv(movie.getDirector()), |
|||
csv(movie.getCountry()), |
|||
movie.getReviewCount(), |
|||
movie.getBoxOffice(), |
|||
csv(movie.getType()), |
|||
csv(movie.getPosterUrl()), |
|||
csv(movie.getSourceSite()))); |
|||
} |
|||
System.out.println("Saved CSV data to " + fileName); |
|||
} catch (IOException e) { |
|||
throw new DataStorageException("Failed to save CSV file: " + fileName, e); |
|||
} |
|||
} |
|||
|
|||
private String csv(String value) { |
|||
if (value == null) { |
|||
return ""; |
|||
} |
|||
String escaped = value.replace("\"", "\"\""); |
|||
return "\"" + escaped + "\""; |
|||
} |
|||
} |
|||
@ -0,0 +1,54 @@ |
|||
package com.movieratings.crawler; |
|||
|
|||
import com.movieratings.crawler.strategy.CrawlerStrategy; |
|||
import com.movieratings.exception.CrawlerException; |
|||
import com.movieratings.model.Movie; |
|||
import org.junit.jupiter.api.Test; |
|||
|
|||
import java.util.List; |
|||
|
|||
import static org.junit.jupiter.api.Assertions.assertEquals; |
|||
import static org.junit.jupiter.api.Assertions.assertThrows; |
|||
|
|||
class MovieCrawlerTest { |
|||
@Test |
|||
void aggregatesMultipleStrategiesAndDeduplicatesMovies() { |
|||
MovieCrawler crawler = new MovieCrawler(List.of( |
|||
new FakeStrategy("site-a", List.of(movie("Movie A", 2001), movie("Movie B", 2002))), |
|||
new FakeStrategy("site-b", List.of(movie("Movie B", 2002), movie("Movie C", 2003))), |
|||
new FakeStrategy("site-c", List.of(movie("Movie D", 2004))) |
|||
)); |
|||
|
|||
List<Movie> movies = crawler.crawl(10); |
|||
|
|||
assertEquals(4, movies.size()); |
|||
assertEquals(List.of("site-a", "site-b", "site-c"), crawler.getSiteNames()); |
|||
} |
|||
|
|||
@Test |
|||
void rejectsInvalidLimit() { |
|||
MovieCrawler crawler = new MovieCrawler(List.of(new FakeStrategy("site", List.of()))); |
|||
|
|||
assertThrows(CrawlerException.class, () -> crawler.crawl(0)); |
|||
} |
|||
|
|||
private static Movie movie(String title, int year) { |
|||
Movie movie = new Movie(); |
|||
movie.setTitle(title); |
|||
movie.setReleaseYear(year); |
|||
movie.setRating(8.0); |
|||
return movie; |
|||
} |
|||
|
|||
private record FakeStrategy(String siteName, List<Movie> movies) implements CrawlerStrategy { |
|||
@Override |
|||
public String getSiteName() { |
|||
return siteName; |
|||
} |
|||
|
|||
@Override |
|||
public List<Movie> crawl(int limit) { |
|||
return movies.stream().limit(limit).toList(); |
|||
} |
|||
} |
|||
} |
|||
@ -0,0 +1,57 @@ |
|||
package com.movieratings.storage; |
|||
|
|||
import com.movieratings.model.Movie; |
|||
import org.junit.jupiter.api.Test; |
|||
import org.junit.jupiter.api.io.TempDir; |
|||
|
|||
import java.nio.file.Files; |
|||
import java.nio.file.Path; |
|||
import java.util.List; |
|||
|
|||
import static org.junit.jupiter.api.Assertions.assertEquals; |
|||
import static org.junit.jupiter.api.Assertions.assertTrue; |
|||
|
|||
class DataStorageServiceTest { |
|||
@TempDir |
|||
Path tempDir; |
|||
|
|||
@Test |
|||
void savesAndLoadsJsonData() { |
|||
DataStorageService storage = new DataStorageService(); |
|||
Path json = tempDir.resolve("movies.json"); |
|||
List<Movie> movies = List.of(movie("Movie A", 8.8)); |
|||
|
|||
storage.saveAsJson(movies, json.toString()); |
|||
List<Movie> loaded = storage.loadFromJson(json.toString()); |
|||
|
|||
assertEquals(1, loaded.size()); |
|||
assertEquals("Movie A", loaded.get(0).getTitle()); |
|||
assertEquals(8.8, loaded.get(0).getRating(), 0.001); |
|||
} |
|||
|
|||
@Test |
|||
void exportsCsvData() throws Exception { |
|||
DataStorageService storage = new DataStorageService(); |
|||
Path csv = tempDir.resolve("movies.csv"); |
|||
|
|||
storage.exportToCsv(List.of(movie("Movie A", 8.8)), csv.toString()); |
|||
|
|||
String content = Files.readString(csv); |
|||
assertTrue(content.contains("rank,title,year,rating")); |
|||
assertTrue(content.contains("sourceSite")); |
|||
assertTrue(content.contains("\"Movie A\"")); |
|||
} |
|||
|
|||
private static Movie movie(String title, double rating) { |
|||
Movie movie = new Movie(); |
|||
movie.setRank(1); |
|||
movie.setTitle(title); |
|||
movie.setReleaseYear(2001); |
|||
movie.setRating(rating); |
|||
movie.setDirector("Director A"); |
|||
movie.setCountry("Country A"); |
|||
movie.setType("Movie"); |
|||
movie.setSourceSite("Test Site"); |
|||
return movie; |
|||
} |
|||
} |
|||
Binary file not shown.
Binary file not shown.
Binary file not shown.
@ -1,13 +1,35 @@ |
|||
com\movieratings\exception\MovieRatingsException.class |
|||
com\movieratings\controller\DirectorController.class |
|||
com\movieratings\crawler\strategy\ImdbTop250CrawlerStrategy.class |
|||
com\movieratings\crawler\strategy\WikipediaGrossingFilmsCrawlerStrategy.class |
|||
com\movieratings\cli\CliApplication.class |
|||
com\movieratings\exception\DataStorageException.class |
|||
com\movieratings\Main.class |
|||
com\movieratings\analysis\DataAnalyzer.class |
|||
com\movieratings\cli\command\HelpCommand.class |
|||
com\movieratings\storage\DataStorageService$1.class |
|||
com\movieratings\crawler\strategy\TheNumbersCrawlerStrategy.class |
|||
com\movieratings\crawler\strategy\LetterboxdTop250CrawlerStrategy.class |
|||
com\movieratings\cli\command\ExportCommand.class |
|||
com\movieratings\storage\DataStorageService.class |
|||
com\movieratings\cli\command\AllCommand.class |
|||
com\movieratings\cli\command\AnalyzeCommand.class |
|||
com\movieratings\exception\CliException.class |
|||
com\movieratings\crawler\strategy\AbstractCrawlerStrategy.class |
|||
com\movieratings\MovieRatingsApplication.class |
|||
com\movieratings\model\Movie.class |
|||
com\movieratings\crawler\strategy\BoxOfficeMojoCrawlerStrategy.class |
|||
com\movieratings\repository\MovieRepository.class |
|||
com\movieratings\crawler\strategy\CrawlerStrategy.class |
|||
com\movieratings\crawler\MovieCrawler.class |
|||
com\movieratings\service\MovieService.class |
|||
com\movieratings\model\DirectorStats.class |
|||
com\movieratings\analysis\DataAnalyzer$CorrelationResult.class |
|||
com\movieratings\display\ResultDisplay.class |
|||
com\movieratings\Main.class |
|||
com\movieratings\analysis\DataAnalyzer.class |
|||
com\movieratings\cli\command\CrawlCommand.class |
|||
com\movieratings\DataInitializer.class |
|||
com\movieratings\exception\CrawlerException.class |
|||
com\movieratings\cli\command\AbstractMovieCommand.class |
|||
com\movieratings\analysis\DataAnalyzer$DirectorStats.class |
|||
com\movieratings\cli\command\Command.class |
|||
com\movieratings\crawler\strategy\DoubanTop250CrawlerStrategy.class |
|||
|
|||
@ -1,11 +1,32 @@ |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\display\ResultDisplay.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\model\DirectorStats.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\MovieRatingsApplication.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\TheNumbersCrawlerStrategy.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\DataInitializer.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\exception\CliException.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\exception\MovieRatingsException.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\controller\DirectorController.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\service\MovieService.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\Main.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\model\Movie.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\storage\DataStorageService.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\CliApplication.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\repository\MovieRepository.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\MovieCrawler.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\CrawlCommand.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\CrawlerStrategy.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\MovieRatingsApplication.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\exception\CrawlerException.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\Main.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\AbstractMovieCommand.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\Command.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\analysis\DataAnalyzer.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\AnalyzeCommand.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\ImdbTop250CrawlerStrategy.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\AbstractCrawlerStrategy.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\HelpCommand.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\LetterboxdTop250CrawlerStrategy.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\AllCommand.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\DoubanTop250CrawlerStrategy.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\service\MovieService.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\ExportCommand.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\WikipediaGrossingFilmsCrawlerStrategy.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\model\Movie.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\BoxOfficeMojoCrawlerStrategy.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\exception\DataStorageException.java |
|||
|
|||
@ -1 +1,4 @@ |
|||
com\movieratings\analysis\DataAnalyzerTest.class |
|||
com\movieratings\crawler\MovieCrawlerTest.class |
|||
com\movieratings\crawler\MovieCrawlerTest$FakeStrategy.class |
|||
com\movieratings\storage\DataStorageServiceTest.class |
|||
|
|||
@ -1 +1,3 @@ |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\test\java\com\movieratings\analysis\DataAnalyzerTest.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\test\java\com\movieratings\storage\DataStorageServiceTest.java |
|||
D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\test\java\com\movieratings\crawler\MovieCrawlerTest.java |
|||
|
|||
|
Before Width: | Height: | Size: 35 KiB After Width: | Height: | Size: 33 KiB |
Binary file not shown.
Loading…
Reference in new issue