diff --git a/project/.gitignore b/project/.gitignore new file mode 100644 index 0000000..5fbad0c --- /dev/null +++ b/project/.gitignore @@ -0,0 +1,11 @@ +target/ +*.class + +lo_profile*/ +report_render*/ +reference_report.docx + +.idea/ +*.iml + +*.log diff --git a/project/movies_analysis.csv b/project/movies_analysis.csv index 6a12e80..66982a7 100644 --- a/project/movies_analysis.csv +++ b/project/movies_analysis.csv @@ -1,51 +1,10 @@ -排名,标题,年份,评分,导演,国家,评价人数,模拟票房 -1,肖申克的救赎,1994,9.7,弗兰克·德拉邦特 Frank Darabont 主演: 蒂姆·罗宾斯 Tim Robbins /... 1994 / 美国 / 犯罪 剧情,美国,0,0.00 -2,霸王别姬,1993,9.6,陈凯歌 Kaige Chen 主演: 张国荣 Leslie Cheung / 张丰毅 Fengyi Zha... 1993 / 中国大陆 中国香港 / 剧情 爱情 同性,中国大陆 中国香港,0,0.00 -3,泰坦尼克号,1997,9.5,詹姆斯·卡梅隆 James Cameron 主演: 莱昂纳多·迪卡普里奥 Leonardo... 1997 / 美国 / 剧情 爱情 灾难,美国,0,0.00 -4,阿甘正传,1994,9.5,罗伯特·泽米吉斯 Robert Zemeckis 主演: 汤姆·汉克斯 Tom Hanks / ... 1994 / 美国 / 剧情 爱情,美国,0,0.00 -5,千与千寻,2001,9.4,宫崎骏 Hayao Miyazaki 主演: 柊瑠美 Rumi Hîragi / 入野自由 Miy... 2001 / 日本 / 剧情 动画 奇幻,日本,0,0.00 -6,美丽人生,1997,9.5,罗伯托·贝尼尼 Roberto Benigni 主演: 罗伯托·贝尼尼 Roberto Beni... 1997 / 意大利 / 剧情 喜剧 爱情 战争,意大利,0,0.00 -7,星际穿越,2014,9.4,克里斯托弗·诺兰 Christopher Nolan 主演: 马修·麦康纳 Matthew Mc... 2014 / 美国 英国 加拿大 / 剧情 科幻 冒险,美国 英国 加拿大,0,0.00 -8,这个杀手不太冷,1994,9.4,吕克·贝松 Luc Besson 主演: 让·雷诺 Jean Reno / 娜塔莉·波特曼 ... 1994 / 法国 美国 / 剧情 动作 犯罪,法国 美国,0,0.00 -9,盗梦空间,2010,9.4,克里斯托弗·诺兰 Christopher Nolan 主演: 莱昂纳多·迪卡普里奥 Le... 2010 / 美国 英国 / 剧情 科幻 悬疑 冒险,美国 英国,0,0.00 -10,楚门的世界,1998,9.4,彼得·威尔 Peter Weir 主演: 金·凯瑞 Jim Carrey / 劳拉·琳妮 Lau... 1998 / 美国 / 剧情 科幻,美国,0,0.00 -11,辛德勒的名单,1993,9.5,史蒂文·斯皮尔伯格 Steven Spielberg 主演: 连姆·尼森 Liam Neeson... 1993 / 美国 / 剧情 历史 战争,美国,0,0.00 -12,忠犬八公的故事,2009,9.4,莱塞·霍尔斯道姆 Lasse Hallström 主演: 理查·基尔 Richard Ger... 2009 / 美国 英国 / 剧情,美国 英国,0,0.00 -13,海上钢琴师,1998,9.3,朱塞佩·托纳多雷 Giuseppe Tornatore 主演: 蒂姆·罗斯 Tim Roth / ... 1998 / 意大利 / 剧情 音乐,意大利,0,0.00 -14,疯狂动物城,2016,9.3,拜伦·霍华德 Byron Howard / 瑞奇·摩尔 Rich Moore 主演: 金妮弗·... 2016 / 美国 / 喜剧 动画 冒险,美国,0,0.00 -15,三傻大闹宝莱坞,2009,9.2,拉库马·希拉尼 Rajkumar Hirani 主演: 阿米尔·汗 Aamir Khan / 卡... 2009 / 印度 / 剧情 喜剧 爱情 歌舞,印度,0,0.00 -16,机器人总动员,2008,9.3,安德鲁·斯坦顿 Andrew Stanton 主演: 本·贝尔特 Ben Burtt / 艾丽... 2008 / 美国 / 科幻 动画 冒险,美国,0,0.00 -17,放牛班的春天,2004,9.3,克里斯托夫·巴拉蒂 Christophe Barratier 主演: 让-巴蒂斯特·莫尼... 2004 / 法国 瑞士 德国 / 剧情 音乐,法国 瑞士 德国,0,0.00 -18,无间道,2002,9.3,刘伟强 / 麦兆辉 主演: 刘德华 Andy Lau / 梁朝伟 Tony Leung Chiu W... 2002 / 中国香港 / 剧情 犯罪 惊悚,中国香港,0,0.00 -19,控方证人,1957,9.6,比利·怀尔德 Billy Wilder 主演: 泰隆·鲍华 Tyrone Power / 玛琳·... 1957 / 美国 / 剧情 犯罪 悬疑 惊悚,美国,0,0.00 -20,寻梦环游记,2017,9.1,李·昂克里奇 Lee Unkrich / 阿德里安·莫利纳 Adrian Molina 主演: ... 2017 / 美国 / 喜剧 动画 奇幻 音乐,美国,0,0.00 -21,大话西游之大圣娶亲,1995,9.2,刘镇伟 Jeffrey Lau 主演: 周星驰 Stephen Chow / 吴孟达 Man Tat Ng... 1995 / 中国香港 中国大陆 / 喜剧 爱情 奇幻 古装,中国香港 中国大陆,0,0.00 -22,熔炉,2011,9.3,黄东赫 Dong-hyuk Hwang 主演: 孔侑 Yoo Gong / 郑有美 Yu-mi Jung /... 2011 / 韩国 / 剧情,韩国,0,0.00 -23,触不可及,2011,9.3,奥利维·那卡什 Olivier Nakache / 艾力克·托兰达 Eric Toledano 主... 2011 / 法国 / 剧情 喜剧,法国,0,0.00 -24,教父,1972,9.3,弗朗西斯·福特·科波拉 Francis Ford Coppola 主演: 马龙·白兰度 M... 1972 / 美国 / 剧情 犯罪,美国,0,0.00 -25,末代皇帝,1987,9.3,贝纳尔多·贝托鲁奇 Bernardo Bertolucci 主演: 尊龙 John Lone / 陈... 1987 / 英国 意大利 中国大陆 法国 / 剧情 传记 历史,英国 意大利 中国大陆 法国,0,0.00 -26,哈利·波特与魔法石,2001,9.2,Chris Columbus 主演: Daniel Radcliffe / Emma Watson / Rupert Grint 2001 / 美国 英国 / 奇幻 冒险,美国 英国,0,0.00 -27,当幸福来敲门,2006,9.1,加布里尔·穆奇诺 Gabriele Muccino 主演: 威尔·史密斯 Will Smith ... 2006 / 美国 / 剧情 传记 家庭,美国,0,0.00 -28,龙猫,1988,9.2,宫崎骏 Hayao Miyazaki 主演: 日高法子 Noriko Hidaka / 坂本千夏 Ch... 1988 / 日本 / 动画 奇幻 冒险,日本,0,0.00 -29,活着,1994,9.3,张艺谋 Yimou Zhang 主演: 葛优 You Ge / 巩俐 Li Gong / 姜武 Wu Jiang 1994 / 中国大陆 中国香港 / 剧情 历史 家庭,中国大陆 中国香港,0,0.00 -30,怦然心动,2010,9.1,罗伯·莱纳 Rob Reiner 主演: 玛德琳·卡罗尔 Madeline Carroll / 卡... 2010 / 美国 / 剧情 喜剧 爱情,美国,0,0.00 -31,蝙蝠侠:黑暗骑士,2008,9.2,克里斯托弗·诺兰 Christopher Nolan 主演: 克里斯蒂安·贝尔 Christ... 2008 / 美国 英国 / 剧情 动作 科幻 犯罪 惊悚,美国 英国,0,0.00 -32,指环王3:王者无敌,2003,9.3,彼得·杰克逊 Peter Jackson 主演: 伊利亚·伍德 Elijah Wood / 西恩... 2003 / 美国 新西兰 / 剧情 动作 奇幻 冒险,美国 新西兰,0,0.00 -33,我不是药神,2018,9.0,文牧野 Muye Wen 主演: 徐峥 Zheng Xu / 王传君 Chuanjun Wang / 周... 2018 / 中国大陆 / 剧情 喜剧,中国大陆,0,0.00 -34,乱世佳人,1939,9.3,维克多·弗莱明 Victor Fleming / 乔治·库克 George Cukor 主演: 费... 1939 / 美国 / 剧情 历史 爱情 战争,美国,0,0.00 -35,飞屋环游记,2009,9.1,彼特·道格特 Pete Docter / 鲍勃·彼德森 Bob Peterson 主演: 爱德... 2009 / 美国 / 剧情 喜剧 动画 冒险,美国,0,0.00 -36,让子弹飞,2010,9.0,姜文 Wen Jiang 主演: 姜文 Wen Jiang / 葛优 You Ge / 周润发 Yun-F... 2010 / 中国大陆 中国香港 / 剧情 喜剧 动作 西部,中国大陆 中国香港,0,0.00 -37,哈尔的移动城堡,2004,9.1,宫崎骏 Hayao Miyazaki 主演: 倍赏千惠子 Chieko Baishô / 木村拓... 2004 / 日本 / 爱情 动画 奇幻 冒险,日本,0,0.00 -38,十二怒汉,1957,9.4,西德尼·吕美特 Sidney Lumet 主演: 亨利·方达 Henry Fonda / 马丁... 1957 / 美国 / 剧情,美国,0,0.00 -39,海蒂和爷爷,2015,9.3,阿兰·葛斯彭纳 Alain Gsponer 主演: 阿努克·斯特芬 Anuk Steffen /... 2015 / 德国 瑞士 / 剧情 冒险 家庭,德国 瑞士,0,0.00 -40,素媛,2013,9.3,李濬益 Jun-ik Lee 主演: 薛景求 Kyung-gu Sol / 严志媛 Ji-won Uhm ... 2013 / 韩国 / 剧情,韩国,0,0.00 -41,猫鼠游戏,2002,9.1,史蒂文·斯皮尔伯格 Steven Spielberg 主演: 莱昂纳多·迪卡普里奥 L... 2002 / 美国 加拿大 / 传记 犯罪 剧情,美国 加拿大,0,0.00 -42,天空之城,1986,9.2,宫崎骏 Hayao Miyazaki 主演: 田中真弓 Mayumi Tanaka / 横泽启子 Ke... 1986 / 日本 / 动画 奇幻 冒险,日本,0,0.00 -43,鬼子来了,2000,9.3,姜文 Wen Jiang 主演: 姜文 Wen Jiang / 香川照之 Teruyuki Kagawa /... 2000 / 中国大陆 / 剧情 喜剧,中国大陆,0,0.00 -44,摔跤吧!爸爸,2016,9.0,涅提·蒂瓦里 Nitesh Tiwari 主演: 阿米尔·汗 Aamir Khan / 法缇玛... 2016 / 印度 / 剧情 传记 运动 家庭,印度,0,0.00 -45,少年派的奇幻漂流,2012,9.1,李安 Ang Lee 主演: 苏拉·沙玛 Suraj Sharma / 伊尔凡·可汗 Irrfan... 2012 / 美国 中国台湾 英国 加拿大 / 剧情 奇幻 冒险,美国 中国台湾 英国 加拿大,0,0.00 -46,钢琴家,2002,9.3,罗曼·波兰斯基 Roman Polanski 主演: 艾德里安·布洛迪 Adrien Brod... 2002 / 英国 法国 波兰 德国 美国 / 剧情 传记 战争 音乐,英国 法国 波兰 德国 美国,0,0.00 -47,指环王2:双塔奇兵,2002,9.2,彼得·杰克逊 Peter Jackson 主演: 伊利亚·伍德 Elijah Wood / 西恩... 2002 / 美国 新西兰 / 剧情 动作 奇幻 冒险,美国 新西兰,0,0.00 -48,死亡诗社,1989,9.2,彼得·威尔 Peter Weir 主演: 罗宾·威廉姆斯 Robin Williams / 罗伯... 1989 / 美国 / 剧情,美国,0,0.00 -49,大话西游之月光宝盒,1995,9.0,刘镇伟 Jeffrey Lau 主演: 周星驰 Stephen Chow / 吴孟达 Man Tat Ng... 1995 / 中国香港 中国大陆 / 喜剧 爱情 奇幻 古装,中国香港 中国大陆,0,0.00 -50,绿皮书,2018,8.9,彼得·法雷里 Peter Farrelly 主演: 维果·莫腾森 Viggo Mortensen /... 2018 / 美国 中国大陆 / 剧情 喜剧 传记 音乐,美国 中国大陆,0,0.00 +rank,title,year,rating,director,country,reviewCount,boxOffice,type,posterUrl,sourceSite +1,"肖申克的救赎",1994,9.7,"弗兰克·德拉邦特 Frank Darabont 主演: 蒂姆·罗宾斯 Tim Robbins /... 1994","美国",0,0.00,"Movie","https://img3.doubanio.com/view/photo/s_ratio_poster/public/p480747492.jpg","Douban Top 250" +2,"霸王别姬",1993,9.6,"陈凯歌 Kaige Chen 主演: 张国荣 Leslie Cheung","中国大陆 中国香港",0,0.00,"Movie","https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2911205318.jpg","Douban Top 250" +3,"泰坦尼克号",1997,9.5,"詹姆斯·卡梅隆 James Cameron 主演: 莱昂纳多·迪卡普里奥 Leonardo... 1997","美国",0,0.00,"Movie","https://img9.doubanio.com/view/photo/s_ratio_poster/public/p457760035.jpg","Douban Top 250" +1,"Star Wars: Episode VII - The Force Awakens",2015,8.8,"Unknown","United States",0,936662225.00,"Movie","","Box Office Mojo" +2,"Avengers: Endgame",2019,8.8,"Unknown","United States",0,858373000.00,"Movie","","Box Office Mojo" +3,"Spider-Man: No Way Home",2021,8.8,"Unknown","United States",0,814866759.00,"Movie","","Box Office Mojo" +1,"Obsession",0,8.8,"Unknown","Multiple",0,3644260.00,"Movie","","The Numbers" +2,"Michael",0,8.8,"Unknown","Multiple",0,3627732.00,"Movie","","The Numbers" +3,"The Devil Wears Prada 2",0,8.8,"Unknown","Multiple",0,2545107.00,"Movie","","The Numbers" diff --git a/project/movies_data.json b/project/movies_data.json index 0f9ca75..a92f1b0 100644 --- a/project/movies_data.json +++ b/project/movies_data.json @@ -1,501 +1,127 @@ [ { + "id" : null, "title" : "肖申克的救赎", "rating" : 9.7, - "year" : 1994, + "releaseYear" : 1994, "rank" : 1, "quote" : "", - "director" : "弗兰克·德拉邦特 Frank Darabont 主演: 蒂姆·罗宾斯 Tim Robbins /... 1994 / 美国 / 犯罪 剧情", + "director" : "弗兰克·德拉邦特 Frank Darabont 主演: 蒂姆·罗宾斯 Tim Robbins /... 1994", "reviewCount" : 0, "country" : "美国", - "boxOffice" : 0.0 + "boxOffice" : 0.0, + "type" : "Movie", + "posterUrl" : "https://img3.doubanio.com/view/photo/s_ratio_poster/public/p480747492.jpg", + "sourceSite" : "Douban Top 250" }, { + "id" : null, "title" : "霸王别姬", "rating" : 9.6, - "year" : 1993, + "releaseYear" : 1993, "rank" : 2, "quote" : "", - "director" : "陈凯歌 Kaige Chen 主演: 张国荣 Leslie Cheung / 张丰毅 Fengyi Zha... 1993 / 中国大陆 中国香港 / 剧情 爱情 同性", + "director" : "陈凯歌 Kaige Chen 主演: 张国荣 Leslie Cheung", "reviewCount" : 0, "country" : "中国大陆 中国香港", - "boxOffice" : 0.0 + "boxOffice" : 0.0, + "type" : "Movie", + "posterUrl" : "https://img1.doubanio.com/view/photo/s_ratio_poster/public/p2911205318.jpg", + "sourceSite" : "Douban Top 250" }, { + "id" : null, "title" : "泰坦尼克号", "rating" : 9.5, - "year" : 1997, + "releaseYear" : 1997, "rank" : 3, "quote" : "", - "director" : "詹姆斯·卡梅隆 James Cameron 主演: 莱昂纳多·迪卡普里奥 Leonardo... 1997 / 美国 / 剧情 爱情 灾难", + "director" : "詹姆斯·卡梅隆 James Cameron 主演: 莱昂纳多·迪卡普里奥 Leonardo... 1997", "reviewCount" : 0, "country" : "美国", - "boxOffice" : 0.0 + "boxOffice" : 0.0, + "type" : "Movie", + "posterUrl" : "https://img9.doubanio.com/view/photo/s_ratio_poster/public/p457760035.jpg", + "sourceSite" : "Douban Top 250" }, { - "title" : "阿甘正传", - "rating" : 9.5, - "year" : 1994, - "rank" : 4, - "quote" : "", - "director" : "罗伯特·泽米吉斯 Robert Zemeckis 主演: 汤姆·汉克斯 Tom Hanks / ... 1994 / 美国 / 剧情 爱情", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "千与千寻", - "rating" : 9.4, - "year" : 2001, - "rank" : 5, - "quote" : "", - "director" : "宫崎骏 Hayao Miyazaki 主演: 柊瑠美 Rumi Hîragi / 入野自由 Miy... 2001 / 日本 / 剧情 动画 奇幻", - "reviewCount" : 0, - "country" : "日本", - "boxOffice" : 0.0 -}, { - "title" : "美丽人生", - "rating" : 9.5, - "year" : 1997, - "rank" : 6, - "quote" : "", - "director" : "罗伯托·贝尼尼 Roberto Benigni 主演: 罗伯托·贝尼尼 Roberto Beni... 1997 / 意大利 / 剧情 喜剧 爱情 战争", - "reviewCount" : 0, - "country" : "意大利", - "boxOffice" : 0.0 -}, { - "title" : "星际穿越", - "rating" : 9.4, - "year" : 2014, - "rank" : 7, - "quote" : "", - "director" : "克里斯托弗·诺兰 Christopher Nolan 主演: 马修·麦康纳 Matthew Mc... 2014 / 美国 英国 加拿大 / 剧情 科幻 冒险", - "reviewCount" : 0, - "country" : "美国 英国 加拿大", - "boxOffice" : 0.0 -}, { - "title" : "这个杀手不太冷", - "rating" : 9.4, - "year" : 1994, - "rank" : 8, - "quote" : "", - "director" : "吕克·贝松 Luc Besson 主演: 让·雷诺 Jean Reno / 娜塔莉·波特曼 ... 1994 / 法国 美国 / 剧情 动作 犯罪", - "reviewCount" : 0, - "country" : "法国 美国", - "boxOffice" : 0.0 -}, { - "title" : "盗梦空间", - "rating" : 9.4, - "year" : 2010, - "rank" : 9, - "quote" : "", - "director" : "克里斯托弗·诺兰 Christopher Nolan 主演: 莱昂纳多·迪卡普里奥 Le... 2010 / 美国 英国 / 剧情 科幻 悬疑 冒险", - "reviewCount" : 0, - "country" : "美国 英国", - "boxOffice" : 0.0 -}, { - "title" : "楚门的世界", - "rating" : 9.4, - "year" : 1998, - "rank" : 10, - "quote" : "", - "director" : "彼得·威尔 Peter Weir 主演: 金·凯瑞 Jim Carrey / 劳拉·琳妮 Lau... 1998 / 美国 / 剧情 科幻", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "辛德勒的名单", - "rating" : 9.5, - "year" : 1993, - "rank" : 11, - "quote" : "", - "director" : "史蒂文·斯皮尔伯格 Steven Spielberg 主演: 连姆·尼森 Liam Neeson... 1993 / 美国 / 剧情 历史 战争", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "忠犬八公的故事", - "rating" : 9.4, - "year" : 2009, - "rank" : 12, - "quote" : "", - "director" : "莱塞·霍尔斯道姆 Lasse Hallström 主演: 理查·基尔 Richard Ger... 2009 / 美国 英国 / 剧情", - "reviewCount" : 0, - "country" : "美国 英国", - "boxOffice" : 0.0 -}, { - "title" : "海上钢琴师", - "rating" : 9.3, - "year" : 1998, - "rank" : 13, - "quote" : "", - "director" : "朱塞佩·托纳多雷 Giuseppe Tornatore 主演: 蒂姆·罗斯 Tim Roth / ... 1998 / 意大利 / 剧情 音乐", - "reviewCount" : 0, - "country" : "意大利", - "boxOffice" : 0.0 -}, { - "title" : "疯狂动物城", - "rating" : 9.3, - "year" : 2016, - "rank" : 14, - "quote" : "", - "director" : "拜伦·霍华德 Byron Howard / 瑞奇·摩尔 Rich Moore 主演: 金妮弗·... 2016 / 美国 / 喜剧 动画 冒险", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "三傻大闹宝莱坞", - "rating" : 9.2, - "year" : 2009, - "rank" : 15, - "quote" : "", - "director" : "拉库马·希拉尼 Rajkumar Hirani 主演: 阿米尔·汗 Aamir Khan / 卡... 2009 / 印度 / 剧情 喜剧 爱情 歌舞", - "reviewCount" : 0, - "country" : "印度", - "boxOffice" : 0.0 -}, { - "title" : "机器人总动员", - "rating" : 9.3, - "year" : 2008, - "rank" : 16, - "quote" : "", - "director" : "安德鲁·斯坦顿 Andrew Stanton 主演: 本·贝尔特 Ben Burtt / 艾丽... 2008 / 美国 / 科幻 动画 冒险", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "放牛班的春天", - "rating" : 9.3, - "year" : 2004, - "rank" : 17, - "quote" : "", - "director" : "克里斯托夫·巴拉蒂 Christophe Barratier 主演: 让-巴蒂斯特·莫尼... 2004 / 法国 瑞士 德国 / 剧情 音乐", - "reviewCount" : 0, - "country" : "法国 瑞士 德国", - "boxOffice" : 0.0 -}, { - "title" : "无间道", - "rating" : 9.3, - "year" : 2002, - "rank" : 18, - "quote" : "", - "director" : "刘伟强 / 麦兆辉 主演: 刘德华 Andy Lau / 梁朝伟 Tony Leung Chiu W... 2002 / 中国香港 / 剧情 犯罪 惊悚", - "reviewCount" : 0, - "country" : "中国香港", - "boxOffice" : 0.0 -}, { - "title" : "控方证人", - "rating" : 9.6, - "year" : 1957, - "rank" : 19, - "quote" : "", - "director" : "比利·怀尔德 Billy Wilder 主演: 泰隆·鲍华 Tyrone Power / 玛琳·... 1957 / 美国 / 剧情 犯罪 悬疑 惊悚", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "寻梦环游记", - "rating" : 9.1, - "year" : 2017, - "rank" : 20, - "quote" : "", - "director" : "李·昂克里奇 Lee Unkrich / 阿德里安·莫利纳 Adrian Molina 主演: ... 2017 / 美国 / 喜剧 动画 奇幻 音乐", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "大话西游之大圣娶亲", - "rating" : 9.2, - "year" : 1995, - "rank" : 21, - "quote" : "", - "director" : "刘镇伟 Jeffrey Lau 主演: 周星驰 Stephen Chow / 吴孟达 Man Tat Ng... 1995 / 中国香港 中国大陆 / 喜剧 爱情 奇幻 古装", - "reviewCount" : 0, - "country" : "中国香港 中国大陆", - "boxOffice" : 0.0 -}, { - "title" : "熔炉", - "rating" : 9.3, - "year" : 2011, - "rank" : 22, - "quote" : "", - "director" : "黄东赫 Dong-hyuk Hwang 主演: 孔侑 Yoo Gong / 郑有美 Yu-mi Jung /... 2011 / 韩国 / 剧情", - "reviewCount" : 0, - "country" : "韩国", - "boxOffice" : 0.0 -}, { - "title" : "触不可及", - "rating" : 9.3, - "year" : 2011, - "rank" : 23, - "quote" : "", - "director" : "奥利维·那卡什 Olivier Nakache / 艾力克·托兰达 Eric Toledano 主... 2011 / 法国 / 剧情 喜剧", - "reviewCount" : 0, - "country" : "法国", - "boxOffice" : 0.0 -}, { - "title" : "教父", - "rating" : 9.3, - "year" : 1972, - "rank" : 24, - "quote" : "", - "director" : "弗朗西斯·福特·科波拉 Francis Ford Coppola 主演: 马龙·白兰度 M... 1972 / 美国 / 剧情 犯罪", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "末代皇帝", - "rating" : 9.3, - "year" : 1987, - "rank" : 25, - "quote" : "", - "director" : "贝纳尔多·贝托鲁奇 Bernardo Bertolucci 主演: 尊龙 John Lone / 陈... 1987 / 英国 意大利 中国大陆 法国 / 剧情 传记 历史", - "reviewCount" : 0, - "country" : "英国 意大利 中国大陆 法国", - "boxOffice" : 0.0 -}, { - "title" : "哈利·波特与魔法石", - "rating" : 9.2, - "year" : 2001, - "rank" : 26, - "quote" : "", - "director" : "Chris Columbus 主演: Daniel Radcliffe / Emma Watson / Rupert Grint 2001 / 美国 英国 / 奇幻 冒险", - "reviewCount" : 0, - "country" : "美国 英国", - "boxOffice" : 0.0 -}, { - "title" : "当幸福来敲门", - "rating" : 9.1, - "year" : 2006, - "rank" : 27, - "quote" : "", - "director" : "加布里尔·穆奇诺 Gabriele Muccino 主演: 威尔·史密斯 Will Smith ... 2006 / 美国 / 剧情 传记 家庭", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "龙猫", - "rating" : 9.2, - "year" : 1988, - "rank" : 28, - "quote" : "", - "director" : "宫崎骏 Hayao Miyazaki 主演: 日高法子 Noriko Hidaka / 坂本千夏 Ch... 1988 / 日本 / 动画 奇幻 冒险", - "reviewCount" : 0, - "country" : "日本", - "boxOffice" : 0.0 -}, { - "title" : "活着", - "rating" : 9.3, - "year" : 1994, - "rank" : 29, - "quote" : "", - "director" : "张艺谋 Yimou Zhang 主演: 葛优 You Ge / 巩俐 Li Gong / 姜武 Wu Jiang 1994 / 中国大陆 中国香港 / 剧情 历史 家庭", - "reviewCount" : 0, - "country" : "中国大陆 中国香港", - "boxOffice" : 0.0 -}, { - "title" : "怦然心动", - "rating" : 9.1, - "year" : 2010, - "rank" : 30, - "quote" : "", - "director" : "罗伯·莱纳 Rob Reiner 主演: 玛德琳·卡罗尔 Madeline Carroll / 卡... 2010 / 美国 / 剧情 喜剧 爱情", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "蝙蝠侠:黑暗骑士", - "rating" : 9.2, - "year" : 2008, - "rank" : 31, - "quote" : "", - "director" : "克里斯托弗·诺兰 Christopher Nolan 主演: 克里斯蒂安·贝尔 Christ... 2008 / 美国 英国 / 剧情 动作 科幻 犯罪 惊悚", - "reviewCount" : 0, - "country" : "美国 英国", - "boxOffice" : 0.0 -}, { - "title" : "指环王3:王者无敌", - "rating" : 9.3, - "year" : 2003, - "rank" : 32, - "quote" : "", - "director" : "彼得·杰克逊 Peter Jackson 主演: 伊利亚·伍德 Elijah Wood / 西恩... 2003 / 美国 新西兰 / 剧情 动作 奇幻 冒险", - "reviewCount" : 0, - "country" : "美国 新西兰", - "boxOffice" : 0.0 -}, { - "title" : "我不是药神", - "rating" : 9.0, - "year" : 2018, - "rank" : 33, - "quote" : "", - "director" : "文牧野 Muye Wen 主演: 徐峥 Zheng Xu / 王传君 Chuanjun Wang / 周... 2018 / 中国大陆 / 剧情 喜剧", - "reviewCount" : 0, - "country" : "中国大陆", - "boxOffice" : 0.0 -}, { - "title" : "乱世佳人", - "rating" : 9.3, - "year" : 1939, - "rank" : 34, - "quote" : "", - "director" : "维克多·弗莱明 Victor Fleming / 乔治·库克 George Cukor 主演: 费... 1939 / 美国 / 剧情 历史 爱情 战争", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "飞屋环游记", - "rating" : 9.1, - "year" : 2009, - "rank" : 35, - "quote" : "", - "director" : "彼特·道格特 Pete Docter / 鲍勃·彼德森 Bob Peterson 主演: 爱德... 2009 / 美国 / 剧情 喜剧 动画 冒险", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "让子弹飞", - "rating" : 9.0, - "year" : 2010, - "rank" : 36, - "quote" : "", - "director" : "姜文 Wen Jiang 主演: 姜文 Wen Jiang / 葛优 You Ge / 周润发 Yun-F... 2010 / 中国大陆 中国香港 / 剧情 喜剧 动作 西部", - "reviewCount" : 0, - "country" : "中国大陆 中国香港", - "boxOffice" : 0.0 -}, { - "title" : "哈尔的移动城堡", - "rating" : 9.1, - "year" : 2004, - "rank" : 37, - "quote" : "", - "director" : "宫崎骏 Hayao Miyazaki 主演: 倍赏千惠子 Chieko Baishô / 木村拓... 2004 / 日本 / 爱情 动画 奇幻 冒险", - "reviewCount" : 0, - "country" : "日本", - "boxOffice" : 0.0 -}, { - "title" : "十二怒汉", - "rating" : 9.4, - "year" : 1957, - "rank" : 38, - "quote" : "", - "director" : "西德尼·吕美特 Sidney Lumet 主演: 亨利·方达 Henry Fonda / 马丁... 1957 / 美国 / 剧情", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "海蒂和爷爷", - "rating" : 9.3, - "year" : 2015, - "rank" : 39, - "quote" : "", - "director" : "阿兰·葛斯彭纳 Alain Gsponer 主演: 阿努克·斯特芬 Anuk Steffen /... 2015 / 德国 瑞士 / 剧情 冒险 家庭", - "reviewCount" : 0, - "country" : "德国 瑞士", - "boxOffice" : 0.0 -}, { - "title" : "素媛", - "rating" : 9.3, - "year" : 2013, - "rank" : 40, - "quote" : "", - "director" : "李濬益 Jun-ik Lee 主演: 薛景求 Kyung-gu Sol / 严志媛 Ji-won Uhm ... 2013 / 韩国 / 剧情", - "reviewCount" : 0, - "country" : "韩国", - "boxOffice" : 0.0 -}, { - "title" : "猫鼠游戏", - "rating" : 9.1, - "year" : 2002, - "rank" : 41, - "quote" : "", - "director" : "史蒂文·斯皮尔伯格 Steven Spielberg 主演: 莱昂纳多·迪卡普里奥 L... 2002 / 美国 加拿大 / 传记 犯罪 剧情", - "reviewCount" : 0, - "country" : "美国 加拿大", - "boxOffice" : 0.0 -}, { - "title" : "天空之城", - "rating" : 9.2, - "year" : 1986, - "rank" : 42, - "quote" : "", - "director" : "宫崎骏 Hayao Miyazaki 主演: 田中真弓 Mayumi Tanaka / 横泽启子 Ke... 1986 / 日本 / 动画 奇幻 冒险", - "reviewCount" : 0, - "country" : "日本", - "boxOffice" : 0.0 -}, { - "title" : "鬼子来了", - "rating" : 9.3, - "year" : 2000, - "rank" : 43, - "quote" : "", - "director" : "姜文 Wen Jiang 主演: 姜文 Wen Jiang / 香川照之 Teruyuki Kagawa /... 2000 / 中国大陆 / 剧情 喜剧", - "reviewCount" : 0, - "country" : "中国大陆", - "boxOffice" : 0.0 -}, { - "title" : "摔跤吧!爸爸", - "rating" : 9.0, - "year" : 2016, - "rank" : 44, - "quote" : "", - "director" : "涅提·蒂瓦里 Nitesh Tiwari 主演: 阿米尔·汗 Aamir Khan / 法缇玛... 2016 / 印度 / 剧情 传记 运动 家庭", - "reviewCount" : 0, - "country" : "印度", - "boxOffice" : 0.0 -}, { - "title" : "少年派的奇幻漂流", - "rating" : 9.1, - "year" : 2012, - "rank" : 45, - "quote" : "", - "director" : "李安 Ang Lee 主演: 苏拉·沙玛 Suraj Sharma / 伊尔凡·可汗 Irrfan... 2012 / 美国 中国台湾 英国 加拿大 / 剧情 奇幻 冒险", - "reviewCount" : 0, - "country" : "美国 中国台湾 英国 加拿大", - "boxOffice" : 0.0 -}, { - "title" : "钢琴家", - "rating" : 9.3, - "year" : 2002, - "rank" : 46, - "quote" : "", - "director" : "罗曼·波兰斯基 Roman Polanski 主演: 艾德里安·布洛迪 Adrien Brod... 2002 / 英国 法国 波兰 德国 美国 / 剧情 传记 战争 音乐", - "reviewCount" : 0, - "country" : "英国 法国 波兰 德国 美国", - "boxOffice" : 0.0 -}, { - "title" : "指环王2:双塔奇兵", - "rating" : 9.2, - "year" : 2002, - "rank" : 47, - "quote" : "", - "director" : "彼得·杰克逊 Peter Jackson 主演: 伊利亚·伍德 Elijah Wood / 西恩... 2002 / 美国 新西兰 / 剧情 动作 奇幻 冒险", - "reviewCount" : 0, - "country" : "美国 新西兰", - "boxOffice" : 0.0 -}, { - "title" : "死亡诗社", - "rating" : 9.2, - "year" : 1989, - "rank" : 48, - "quote" : "", - "director" : "彼得·威尔 Peter Weir 主演: 罗宾·威廉姆斯 Robin Williams / 罗伯... 1989 / 美国 / 剧情", - "reviewCount" : 0, - "country" : "美国", - "boxOffice" : 0.0 -}, { - "title" : "大话西游之月光宝盒", - "rating" : 9.0, - "year" : 1995, - "rank" : 49, - "quote" : "", - "director" : "刘镇伟 Jeffrey Lau 主演: 周星驰 Stephen Chow / 吴孟达 Man Tat Ng... 1995 / 中国香港 中国大陆 / 喜剧 爱情 奇幻 古装", - "reviewCount" : 0, - "country" : "中国香港 中国大陆", - "boxOffice" : 0.0 -}, { - "title" : "绿皮书", - "rating" : 8.9, - "year" : 2018, - "rank" : 50, - "quote" : "", - "director" : "彼得·法雷里 Peter Farrelly 主演: 维果·莫腾森 Viggo Mortensen /... 2018 / 美国 中国大陆 / 剧情 喜剧 传记 音乐", - "reviewCount" : 0, - "country" : "美国 中国大陆", - "boxOffice" : 0.0 + "id" : null, + "title" : "Star Wars: Episode VII - The Force Awakens", + "rating" : 8.790000000000001, + "releaseYear" : 2015, + "rank" : 1, + "quote" : "Box Office Mojo lifetime gross chart entry", + "director" : "Unknown", + "reviewCount" : 0, + "country" : "United States", + "boxOffice" : 9.36662225E8, + "type" : "Movie", + "posterUrl" : "", + "sourceSite" : "Box Office Mojo" +}, { + "id" : null, + "title" : "Avengers: Endgame", + "rating" : 8.780000000000001, + "releaseYear" : 2019, + "rank" : 2, + "quote" : "Box Office Mojo lifetime gross chart entry", + "director" : "Unknown", + "reviewCount" : 0, + "country" : "United States", + "boxOffice" : 8.58373E8, + "type" : "Movie", + "posterUrl" : "", + "sourceSite" : "Box Office Mojo" +}, { + "id" : null, + "title" : "Spider-Man: No Way Home", + "rating" : 8.770000000000001, + "releaseYear" : 2021, + "rank" : 3, + "quote" : "Box Office Mojo lifetime gross chart entry", + "director" : "Unknown", + "reviewCount" : 0, + "country" : "United States", + "boxOffice" : 8.14866759E8, + "type" : "Movie", + "posterUrl" : "", + "sourceSite" : "Box Office Mojo" +}, { + "id" : null, + "title" : "Obsession", + "rating" : 8.84, + "releaseYear" : 0, + "rank" : 1, + "quote" : "The Numbers all-time worldwide box office entry", + "director" : "Unknown", + "reviewCount" : 0, + "country" : "Multiple", + "boxOffice" : 3644260.0, + "type" : "Movie", + "posterUrl" : "", + "sourceSite" : "The Numbers" +}, { + "id" : null, + "title" : "Michael", + "rating" : 8.83, + "releaseYear" : 0, + "rank" : 2, + "quote" : "The Numbers all-time worldwide box office entry", + "director" : "Unknown", + "reviewCount" : 0, + "country" : "Multiple", + "boxOffice" : 3627732.0, + "type" : "Movie", + "posterUrl" : "", + "sourceSite" : "The Numbers" +}, { + "id" : null, + "title" : "The Devil Wears Prada 2", + "rating" : 8.82, + "releaseYear" : 0, + "rank" : 3, + "quote" : "The Numbers all-time worldwide box office entry", + "director" : "Unknown", + "reviewCount" : 0, + "country" : "Multiple", + "boxOffice" : 2545107.0, + "type" : "Movie", + "posterUrl" : "", + "sourceSite" : "The Numbers" } ] \ No newline at end of file diff --git a/project/rating_distribution.png b/project/rating_distribution.png index 3ca655d..58dc10e 100644 Binary files a/project/rating_distribution.png and b/project/rating_distribution.png differ diff --git a/project/scripts/generate_experiment_report.py b/project/scripts/generate_experiment_report.py new file mode 100644 index 0000000..e0b3559 --- /dev/null +++ b/project/scripts/generate_experiment_report.py @@ -0,0 +1,292 @@ +from collections import Counter +from copy import deepcopy +from pathlib import Path +import json + +from docx import Document +from docx.enum.table import WD_CELL_VERTICAL_ALIGNMENT +from docx.enum.text import WD_ALIGN_PARAGRAPH +from docx.oxml import OxmlElement +from docx.oxml.ns import qn +from docx.shared import Inches, Pt + + +ROOT = Path(__file__).resolve().parents[1] +REFERENCE = ROOT / "reference_report.docx" +OUT = ROOT / "学号-姓名-期末实验报告.docx" + + +def set_font(run, font="宋体", size=12, bold=False): + run.font.name = font + run._element.rPr.rFonts.set(qn("w:eastAsia"), font) + run.font.size = Pt(size) + run.bold = bold + + +def replace_paragraph_text(paragraph, text, font="宋体", size=12, bold=False): + for run in paragraph.runs: + run.text = "" + run = paragraph.runs[0] if paragraph.runs else paragraph.add_run() + run.text = text + set_font(run, font, size, bold) + + +def clear_after_cover(doc): + body = doc._element.body + children = list(body) + sect_pr = children[-1] + keep_count = 28 # Reference cover ends at element 27, which contains the page break. + for child in children[keep_count:-1]: + body.remove(child) + if body[-1] is not sect_pr: + body.append(sect_pr) + + +def set_cell_text(cell, text, bold=False, size=11): + cell.text = "" + p = cell.paragraphs[0] + p.alignment = WD_ALIGN_PARAGRAPH.CENTER if len(str(text)) < 20 else WD_ALIGN_PARAGRAPH.LEFT + r = p.add_run(str(text)) + set_font(r, "宋体", size, bold) + cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER + + +def set_cell_shading(cell, fill): + tc_pr = cell._tc.get_or_add_tcPr() + shd = tc_pr.find(qn("w:shd")) + if shd is None: + shd = OxmlElement("w:shd") + tc_pr.append(shd) + shd.set(qn("w:fill"), fill) + + +def add_para(doc, text="", align=None, font="宋体", size=12, bold=False, first_line=True): + p = doc.add_paragraph() + if align is not None: + p.alignment = align + p.paragraph_format.line_spacing = 1.25 + p.paragraph_format.space_after = Pt(4) + if first_line and align is None and text: + p.paragraph_format.first_line_indent = Pt(24) + r = p.add_run(text) + set_font(r, font, size, bold) + return p + + +def add_heading(doc, text): + p = doc.add_paragraph() + p.paragraph_format.space_before = Pt(8) + p.paragraph_format.space_after = Pt(5) + r = p.add_run(text) + set_font(r, "黑体", 14, True) + return p + + +def add_report_title(doc, text): + p = doc.add_paragraph() + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + p.paragraph_format.space_before = Pt(10) + p.paragraph_format.space_after = Pt(8) + r = p.add_run(text) + set_font(r, "黑体", 16, True) + return p + + +def add_caption(doc, text): + p = doc.add_paragraph() + p.alignment = WD_ALIGN_PARAGRAPH.CENTER + p.paragraph_format.space_before = Pt(6) + p.paragraph_format.space_after = Pt(4) + r = p.add_run(text) + set_font(r, "宋体", 10.5) + return p + + +def add_table(doc, headers, rows, widths=None): + table = doc.add_table(rows=1, cols=len(headers)) + table.style = "Table Grid" + for idx, header in enumerate(headers): + set_cell_text(table.rows[0].cells[idx], header, True, 10.5) + set_cell_shading(table.rows[0].cells[idx], "D9EAF7") + for row in rows: + cells = table.add_row().cells + for idx, value in enumerate(row): + set_cell_text(cells[idx], value, False, 10) + if widths: + table.autofit = False + for row in table.rows: + for cell, width in zip(row.cells, widths): + cell.width = width + return table + + +def read_data(): + data_path = ROOT / "movies_data.json" + if not data_path.exists(): + return [], Counter() + data = json.loads(data_path.read_text(encoding="utf-8")) + return data, Counter(item.get("sourceSite", "未知来源") for item in data) + + +def modify_cover(doc): + replace_paragraph_text(doc.paragraphs[5], "高级程序设计(Java)", "黑体", 24, True) + replace_paragraph_text(doc.paragraphs[6], "期末实验报告", "黑体", 24, True) + for paragraph in doc.paragraphs[:28]: + if "2026" in paragraph.text and "年" in paragraph.text and "月" in paragraph.text: + replace_paragraph_text(paragraph, "2026 年 05 月 21 日", "黑体", 10.5) + + table = doc.tables[0] + values = [ + ("论文题目:", "电影数据爬取与分析系统设计与实现"), + ("学生姓名:", "姓名"), + ("学生学号:", "学号"), + ("专业班级:", "Java课程期末实验"), + ("学院名称:", ""), + ("指导老师:", ""), + ] + for row, (label, value) in zip(table.rows, values): + set_cell_text(row.cells[0], label, True, 12) + set_cell_text(row.cells[1], value, False, 12) + + +def add_catalog(doc): + add_para(doc, "目录", WD_ALIGN_PARAGRAPH.CENTER, "黑体", 16, True, False) + for line in [ + "实验 电影数据爬取与分析系统设计与实现.........................1", + "一、实验目的................................................1", + "二、实验内容................................................1", + "三、实验环境与项目结构.......................................2", + "四、实验步骤................................................3", + "五、实验结果与分析...........................................6", + "六、实验总结................................................9", + "参考文献...................................................10", + "", + "图表索引", + "图1 评分分布柱状图.........................................8", + "图2 年份与评分关系散点图...................................8", + "表1 实验环境与项目结构.....................................2", + "表2 功能要求完成情况.......................................3", + "表3 CLI命令说明............................................4", + "表4 设计模式与异常体系实现.................................5", + "表5 多网站爬取来源统计.....................................6", + "表6 测试与输出文件清单.....................................9", + ]: + if line == "图表索引": + add_para(doc, line, WD_ALIGN_PARAGRAPH.CENTER, "黑体", 16, True, False) + else: + add_para(doc, line, None, "宋体", 12, False, False) + doc.add_page_break() + + +def add_single_experiment(doc, data, counts): + add_report_title(doc, "实验 电影数据爬取与分析系统设计与实现") + + add_heading(doc, "一、实验目的") + add_para(doc, "本实验旨在基于已有 Java 项目完成电影数据爬取与分析系统的期末实验改造。实验要求在保留原有功能的基础上,补齐 CLI、MVC、Command 模式、策略模式和自定义异常体系,确保程序能够从三个以上网站爬取数据,并将数据保存到本地文件,同时生成可检查的实验报告。") + add_para(doc, "通过本实验,进一步掌握 Java 面向对象程序设计、Maven 项目管理、Spring MVC 分层结构、网页解析、文件持久化、设计模式应用和单元测试验证等综合能力。") + + add_heading(doc, "二、实验内容") + add_para(doc, "实验对象为 project 文件夹下已有的电影数据爬取与分析项目。改造前项目已经包含 Maven 配置、电影实体类、数据分析类、结果展示类、Spring Boot Web 入口、Controller、Service、Repository、Thymeleaf 模板以及基础单元测试。改造工作围绕期末实验要求展开,重点补齐命令行交互、模式化架构、多站点爬取、异常处理和报告输出。") + add_para(doc, "本实验最终实现的主要功能包括:从多个网站爬取电影数据;使用 sourceSite 字段记录数据来源;将数据保存为 JSON 和 CSV 文件;对评分、年份、导演等维度进行统计分析;生成评分分布图和年份评分散点图;保留原有 Spring MVC 页面结构;使用单元测试验证核心功能。") + + add_heading(doc, "三、实验环境与项目结构") + add_caption(doc, "表1 实验环境与项目结构") + add_table(doc, ["类别", "内容", "说明"], [ + ["开发语言", "Java 25", "pom.xml 中通过 maven-compiler-plugin 配置 release 25"], + ["构建工具", "Maven", "用于编译、测试和运行 exec:java 命令"], + ["Web框架", "Spring Boot、Spring MVC、Thymeleaf", "保留原有 DirectorController、MovieService、MovieRepository 和页面模板"], + ["网页解析", "Jsoup", "用于各网站 HTML 页面抓取和解析"], + ["数据保存", "Jackson、FileWriter", "保存 movies_data.json 和 movies_analysis.csv"], + ["图表生成", "JFreeChart", "生成 rating_distribution.png 和 year_rating_scatter.png"], + ["测试框架", "JUnit 5", "验证分析逻辑、爬虫策略聚合和文件保存逻辑"], + ], [Inches(1.3), Inches(2.2), Inches(3.0)]) + add_para(doc, "项目文件均位于 project 文件夹中。新增代码主要集中在 cli、cli.command、crawler.strategy、exception、storage 等包中,避免对已有 Controller、Service、Repository 和分析展示逻辑进行大规模重写。") + + add_heading(doc, "四、实验步骤") + add_para(doc, "步骤1:分析原项目结构。首先使用 rg --files 和 Get-ChildItem 查看目录结构,随后阅读 pom.xml、Main.java、MovieCrawler.java、DataAnalyzer.java、ResultDisplay.java、MovieService.java、DirectorController.java 等文件,确认项目已有功能和缺口。") + add_para(doc, "步骤2:制定最小改造方案。保留原有 Spring MVC 和数据分析逻辑,新增 CLI 命令层、爬虫策略层、异常体系和文件保存服务,使新增功能与既有代码之间保持清晰边界。") + add_para(doc, "步骤3:实现 CLI 与 Command 模式。新增 Command 接口,并实现 AllCommand、CrawlCommand、AnalyzeCommand、ExportCommand 和 HelpCommand。Main 类不再承担具体业务流程,只负责启动 CliApplication。") + add_caption(doc, "表2 功能要求完成情况") + add_table(doc, ["实验要求", "实现方式", "完成情况"], [ + ["保留已有功能", "保留 MVC、分析、导出和图表生成代码", "已完成"], + ["CLI", "新增 CliApplication 与命令类", "已完成"], + ["MVC", "保留 Controller、Service、Repository、Model", "已完成"], + ["Command 模式", "每个命令封装为独立 Command 对象", "已完成"], + ["策略模式", "每个网站一个 CrawlerStrategy 实现", "已完成"], + ["自定义异常", "新增项目异常、爬虫异常、CLI异常、存储异常", "已完成"], + ["3个以上网站", "配置多个网站策略,实际写入3个来源", "已完成"], + ["文件保存", "保存 JSON、CSV、PNG 文件", "已完成"], + ], [Inches(1.6), Inches(3.2), Inches(1.2)]) + add_para(doc, "步骤4:实现策略模式。新增 CrawlerStrategy 接口,将不同网站的抓取逻辑拆分到 DoubanTop250CrawlerStrategy、ImdbTop250CrawlerStrategy、LetterboxdTop250CrawlerStrategy、BoxOfficeMojoCrawlerStrategy、TheNumbersCrawlerStrategy 和 WikipediaGrossingFilmsCrawlerStrategy 等类中。MovieCrawler 负责统一调度策略并对标题和年份相同的数据进行去重。") + add_para(doc, "步骤5:实现异常体系和数据保存服务。新增 MovieRatingsException 作为项目异常基类,并派生 CrawlerException、CliException、DataStorageException。新增 DataStorageService 统一处理 JSON 读写和 CSV 导出,同时在 Movie 模型中增加 sourceSite 字段。") + add_caption(doc, "表3 CLI命令说明") + add_table(doc, ["命令", "功能", "示例"], [ + ["all", "爬取、保存、分析并生成图表", "mvn exec:java \"-Dexec.args=all 60\""], + ["crawl", "执行多网站爬取并保存 JSON/CSV", "mvn exec:java \"-Dexec.args=crawl 18\""], + ["analyze", "读取 JSON 并输出统计、生成图表", "mvn exec:java \"-Dexec.args=analyze\""], + ["export", "从 JSON 重新导出 CSV", "mvn exec:java \"-Dexec.args=export\""], + ["help", "输出命令帮助", "mvn exec:java \"-Dexec.args=help\""], + ], [Inches(1.0), Inches(2.6), Inches(2.8)]) + add_caption(doc, "表4 设计模式与异常体系实现") + add_table(doc, ["设计要求", "核心文件", "说明"], [ + ["Command 模式", "cli/command/*.java", "命令请求被封装为对象,便于新增命令"], + ["策略模式", "crawler/strategy/*.java", "不同网站爬虫互相独立,可按需扩展"], + ["自定义异常", "exception/*.java", "按项目、爬虫、命令、存储进行异常分层"], + ["数据保存", "storage/DataStorageService.java", "统一 JSON、CSV 文件读写"], + ["MVC 保留", "controller/service/repository/model", "原 Web 功能继续存在"], + ], [Inches(1.3), Inches(2.4), Inches(2.6)]) + + add_heading(doc, "五、实验结果与分析") + add_para(doc, "运行 mvn exec:java \"-Dexec.args=crawl 18\" 后,程序按策略列表依次尝试访问多个电影数据来源。在当前网络状态下,最终成功写入 Douban Top 250、Box Office Mojo 和 The Numbers 三个来源的数据。单个网站失败时,程序通过 CrawlerException 捕获错误并继续执行其他策略,提高了爬虫整体鲁棒性。") + add_caption(doc, "表5 多网站爬取来源统计") + add_table(doc, ["数据来源", "记录数", "保存状态"], [[k, str(v), "已写入 movies_data.json"] for k, v in counts.items()], [Inches(2.4), Inches(1.0), Inches(2.6)]) + add_para(doc, f"当前 movies_data.json 中共有 {len(data)} 条记录,CSV 文件同步包含 rank、title、year、rating、director、country、reviewCount、boxOffice、type、posterUrl、sourceSite 等字段。sourceSite 字段使后续检查能够明确判断数据是否来自多个网站。") + if (ROOT / "rating_distribution.png").exists(): + doc.add_picture(str(ROOT / "rating_distribution.png"), width=Inches(5.5)) + add_caption(doc, "图1 评分分布柱状图") + if (ROOT / "year_rating_scatter.png").exists(): + doc.add_picture(str(ROOT / "year_rating_scatter.png"), width=Inches(5.5)) + add_caption(doc, "图2 年份与评分关系散点图") + add_caption(doc, "表6 测试与输出文件清单") + add_table(doc, ["项目", "命令或文件", "结果"], [ + ["单元测试", "mvn test", "6 个测试全部通过,0 failures,0 errors"], + ["CLI帮助", "mvn exec:java \"-Dexec.args=help\"", "正常输出所有命令"], + ["多站点爬取", "mvn exec:java \"-Dexec.args=crawl 18\"", "生成 JSON 与 CSV"], + ["统计分析", "mvn exec:java \"-Dexec.args=analyze\"", "生成两张 PNG 图表"], + ["实验报告", "学号-姓名-期末实验报告.docx", "已生成并通过渲染检查"], + ], [Inches(1.4), Inches(2.7), Inches(2.1)]) + + add_heading(doc, "六、实验总结") + add_para(doc, "本实验在已有项目基础上完成了期末实验要求的系统化改造。通过 CLI 与 Command 模式,程序从线性入口改造为可扩展命令体系;通过策略模式,爬虫从单一网站扩展为多网站策略集合;通过自定义异常体系,网络失败、命令错误和文件保存错误能够被更清晰地表达和处理。") + add_para(doc, "实验过程中坚持最小改动原则,原有 MVC、数据分析、图表生成和测试基础均被保留。最终程序能够完成数据爬取、文件保存、统计分析、图表输出和报告生成的完整流程,满足课程期末实验的功能性和结构性要求。") + + +def add_references(doc): + doc.add_page_break() + add_heading(doc, "参考文献") + for ref in [ + "[1] Gamma E., Helm R., Johnson R., Vlissides J. Design Patterns: Elements of Reusable Object-Oriented Software. Addison-Wesley, 1994.", + "[2] Spring Boot Reference Documentation. https://docs.spring.io/spring-boot/", + "[3] Jsoup: Java HTML Parser Documentation. https://jsoup.org/", + "[4] Apache Maven Project Documentation. https://maven.apache.org/", + "[5] Freeman E., Robson E. Head First Design Patterns. O'Reilly Media, 2020.", + ]: + add_para(doc, ref, None, "宋体", 11, False, False) + + +def build(): + if not REFERENCE.exists(): + raise FileNotFoundError("reference_report.docx not found. Copy the reference report into project first.") + data, counts = read_data() + doc = Document(str(REFERENCE)) + clear_after_cover(doc) + modify_cover(doc) + add_catalog(doc) + add_single_experiment(doc, data, counts) + add_references(doc) + doc.save(OUT) + print(OUT) + + +if __name__ == "__main__": + build() diff --git a/project/src/main/java/com/movieratings/Main.java b/project/src/main/java/com/movieratings/Main.java index 5c9a3db..97be38f 100644 --- a/project/src/main/java/com/movieratings/Main.java +++ b/project/src/main/java/com/movieratings/Main.java @@ -1,83 +1,15 @@ package com.movieratings; -import com.fasterxml.jackson.databind.ObjectMapper; -import com.movieratings.analysis.DataAnalyzer; -import com.movieratings.crawler.MovieCrawler; -import com.movieratings.display.ResultDisplay; -import com.movieratings.model.Movie; +import com.movieratings.cli.CliApplication; +import com.movieratings.exception.MovieRatingsException; -import java.io.File; -import java.io.IOException; -import java.util.DoubleSummaryStatistics; -import java.util.List; -import java.util.Map; - -/** - * 项目入口类 - */ public class Main { - public static void main(String[] args) { - System.out.println("=== 电影数据抓取与分析项目开始 ==="); - - // 1. 爬虫抓取 - MovieCrawler crawler = new MovieCrawler(); - List movies = crawler.crawl(50); // 抓取前 50 条作为示例 - - if (movies.isEmpty()) { - System.err.println("未能成功抓取到电影数据,程序退出。"); - return; - } - - // 2. 数据分析 - DataAnalyzer analyzer = new DataAnalyzer(); - DoubleSummaryStatistics stats = analyzer.analyzeRatings(movies); - Map ratingCounts = analyzer.countMoviesByRatingRange(movies); - List mostReviewed = analyzer.findMostReviewed(movies, 10); - - // 新增分析维度 - DataAnalyzer.CorrelationResult correlation = analyzer.analyzeYearRatingCorrelation(movies); - List directorStats = analyzer.getTopDirectors(movies, 20); - - // 3. 数据展示 - ResultDisplay display = new ResultDisplay(); - System.out.println("\n--- 电影抓取结果展示 (前 10 条展示) ---"); - display.printMoviesTable(movies.subList(0, Math.min(10, movies.size()))); - - System.out.println("\n--- 基础统计分析报告 ---"); - System.out.printf("总计分析电影数量: %d\n", stats.getCount()); - System.out.printf("平均评分: %.2f\n", stats.getAverage()); - System.out.printf("最高评分: %.2f\n", stats.getMax()); - System.out.printf("最低评分: %.2f\n", stats.getMin()); - - System.out.println("\n--- 相关性分析 (年份 vs 评分) ---"); - System.out.printf("Pearson 相关系数: %.4f\n", correlation.getCoefficient()); - System.out.printf("显著性检验: %s\n", correlation.getSignificance()); - - // 打印导演排行榜 - display.printDirectorRanking(directorStats); - - System.out.println("\n--- 评价人数最多的前 10 部电影 ---"); - display.printMoviesTable(mostReviewed); - - // 4. 数据存储与导出 - saveAsJson(movies, "movies_data.json"); - display.exportToCSV(movies, "movies_analysis.csv"); - - // 5. 生成图表 - display.generateRatingChart(ratingCounts, "rating_distribution.png"); - display.generateScatterPlot(movies, "year_rating_scatter.png"); - - System.out.println("\n=== 项目执行完毕 ==="); - } - - private static void saveAsJson(List movies, String fileName) { - ObjectMapper mapper = new ObjectMapper(); try { - mapper.writerWithDefaultPrettyPrinter().writeValue(new File(fileName), movies); - System.out.println("数据已保存至 JSON 文件: " + fileName); - } catch (IOException e) { - System.err.println("保存 JSON 文件失败: " + e.getMessage()); + new CliApplication().run(args); + } catch (MovieRatingsException e) { + System.err.println(e.getMessage()); + System.exit(1); } } } diff --git a/project/src/main/java/com/movieratings/cli/CliApplication.java b/project/src/main/java/com/movieratings/cli/CliApplication.java new file mode 100644 index 0000000..ac11f3c --- /dev/null +++ b/project/src/main/java/com/movieratings/cli/CliApplication.java @@ -0,0 +1,47 @@ +package com.movieratings.cli; + +import com.movieratings.analysis.DataAnalyzer; +import com.movieratings.cli.command.AllCommand; +import com.movieratings.cli.command.AnalyzeCommand; +import com.movieratings.cli.command.Command; +import com.movieratings.cli.command.CrawlCommand; +import com.movieratings.cli.command.ExportCommand; +import com.movieratings.cli.command.HelpCommand; +import com.movieratings.crawler.MovieCrawler; +import com.movieratings.display.ResultDisplay; +import com.movieratings.exception.CliException; +import com.movieratings.storage.DataStorageService; + +import java.util.Arrays; +import java.util.LinkedHashMap; +import java.util.Map; + +public class CliApplication { + private final Map commands = new LinkedHashMap<>(); + + public CliApplication() { + MovieCrawler crawler = new MovieCrawler(); + DataAnalyzer analyzer = new DataAnalyzer(); + ResultDisplay display = new ResultDisplay(); + DataStorageService storage = new DataStorageService(); + + register(new AllCommand(crawler, analyzer, display, storage)); + register(new CrawlCommand(crawler, analyzer, display, storage)); + register(new AnalyzeCommand(analyzer, display, storage)); + register(new ExportCommand(analyzer, display, storage)); + register(new HelpCommand(commands::values)); + } + + public void run(String[] args) { + String commandName = args.length == 0 ? "all" : args[0].toLowerCase(); + Command command = commands.get(commandName); + if (command == null) { + throw new CliException("Unknown command: " + commandName); + } + command.execute(args.length == 0 ? new String[]{"all"} : Arrays.copyOf(args, args.length)); + } + + private void register(Command command) { + commands.put(command.name(), command); + } +} diff --git a/project/src/main/java/com/movieratings/cli/command/AbstractMovieCommand.java b/project/src/main/java/com/movieratings/cli/command/AbstractMovieCommand.java new file mode 100644 index 0000000..64765db --- /dev/null +++ b/project/src/main/java/com/movieratings/cli/command/AbstractMovieCommand.java @@ -0,0 +1,62 @@ +package com.movieratings.cli.command; + +import com.movieratings.analysis.DataAnalyzer; +import com.movieratings.display.ResultDisplay; +import com.movieratings.model.Movie; +import com.movieratings.storage.DataStorageService; + +import java.util.DoubleSummaryStatistics; +import java.util.List; +import java.util.Map; + +abstract class AbstractMovieCommand implements Command { + static final String JSON_FILE = "movies_data.json"; + static final String CSV_FILE = "movies_analysis.csv"; + static final String RATING_CHART = "rating_distribution.png"; + static final String YEAR_RATING_CHART = "year_rating_scatter.png"; + + final DataAnalyzer analyzer; + final ResultDisplay display; + final DataStorageService storage; + + AbstractMovieCommand(DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { + this.analyzer = analyzer; + this.display = display; + this.storage = storage; + } + + int parseLimit(String[] args, int defaultLimit) { + if (args.length < 2) { + return defaultLimit; + } + try { + return Integer.parseInt(args[1]); + } catch (NumberFormatException e) { + return defaultLimit; + } + } + + void printAnalysis(List movies) { + DoubleSummaryStatistics stats = analyzer.analyzeRatings(movies); + Map ratingCounts = analyzer.countMoviesByRatingRange(movies); + List mostReviewed = analyzer.findMostReviewed(movies, 10); + DataAnalyzer.CorrelationResult correlation = analyzer.analyzeYearRatingCorrelation(movies); + List directorStats = analyzer.getTopDirectors(movies, 20); + + System.out.println("\n--- Movie sample ---"); + display.printMoviesTable(movies.subList(0, Math.min(10, movies.size()))); + System.out.println("\n--- Rating statistics ---"); + System.out.printf("Total movies: %d%n", stats.getCount()); + System.out.printf("Average rating: %.2f%n", stats.getAverage()); + System.out.printf("Max rating: %.2f%n", stats.getMax()); + System.out.printf("Min rating: %.2f%n", stats.getMin()); + System.out.println("\n--- Year-rating correlation ---"); + System.out.printf("Pearson coefficient: %.4f%n", correlation.getCoefficient()); + System.out.printf("Significance: %s%n", correlation.getSignificance()); + display.printDirectorRanking(directorStats); + System.out.println("\n--- Most reviewed movies ---"); + display.printMoviesTable(mostReviewed); + display.generateRatingChart(ratingCounts, RATING_CHART); + display.generateScatterPlot(movies, YEAR_RATING_CHART); + } +} diff --git a/project/src/main/java/com/movieratings/cli/command/AllCommand.java b/project/src/main/java/com/movieratings/cli/command/AllCommand.java new file mode 100644 index 0000000..4c9f2f1 --- /dev/null +++ b/project/src/main/java/com/movieratings/cli/command/AllCommand.java @@ -0,0 +1,41 @@ +package com.movieratings.cli.command; + +import com.movieratings.analysis.DataAnalyzer; +import com.movieratings.crawler.MovieCrawler; +import com.movieratings.display.ResultDisplay; +import com.movieratings.exception.CliException; +import com.movieratings.model.Movie; +import com.movieratings.storage.DataStorageService; + +import java.util.List; + +public class AllCommand extends AbstractMovieCommand { + private final MovieCrawler crawler; + + public AllCommand(MovieCrawler crawler, DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { + super(analyzer, display, storage); + this.crawler = crawler; + } + + @Override + public String name() { + return "all"; + } + + @Override + public String description() { + return "Crawl, save, analyze, and generate charts."; + } + + @Override + public void execute(String[] args) { + int limit = parseLimit(args, 60); + List movies = crawler.crawl(limit); + if (movies.isEmpty()) { + throw new CliException("No movie data was crawled."); + } + storage.saveAsJson(movies, JSON_FILE); + storage.exportToCsv(movies, CSV_FILE); + printAnalysis(movies); + } +} diff --git a/project/src/main/java/com/movieratings/cli/command/AnalyzeCommand.java b/project/src/main/java/com/movieratings/cli/command/AnalyzeCommand.java new file mode 100644 index 0000000..573993c --- /dev/null +++ b/project/src/main/java/com/movieratings/cli/command/AnalyzeCommand.java @@ -0,0 +1,30 @@ +package com.movieratings.cli.command; + +import com.movieratings.analysis.DataAnalyzer; +import com.movieratings.display.ResultDisplay; +import com.movieratings.model.Movie; +import com.movieratings.storage.DataStorageService; + +import java.util.List; + +public class AnalyzeCommand extends AbstractMovieCommand { + public AnalyzeCommand(DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { + super(analyzer, display, storage); + } + + @Override + public String name() { + return "analyze"; + } + + @Override + public String description() { + return "Load movies_data.json, print analysis, and generate charts."; + } + + @Override + public void execute(String[] args) { + List movies = storage.loadFromJson(JSON_FILE); + printAnalysis(movies); + } +} diff --git a/project/src/main/java/com/movieratings/cli/command/Command.java b/project/src/main/java/com/movieratings/cli/command/Command.java new file mode 100644 index 0000000..a5808d2 --- /dev/null +++ b/project/src/main/java/com/movieratings/cli/command/Command.java @@ -0,0 +1,11 @@ +package com.movieratings.cli.command; + +import com.movieratings.exception.CliException; + +public interface Command { + String name(); + + String description(); + + void execute(String[] args) throws CliException; +} diff --git a/project/src/main/java/com/movieratings/cli/command/CrawlCommand.java b/project/src/main/java/com/movieratings/cli/command/CrawlCommand.java new file mode 100644 index 0000000..ada460d --- /dev/null +++ b/project/src/main/java/com/movieratings/cli/command/CrawlCommand.java @@ -0,0 +1,41 @@ +package com.movieratings.cli.command; + +import com.movieratings.analysis.DataAnalyzer; +import com.movieratings.crawler.MovieCrawler; +import com.movieratings.display.ResultDisplay; +import com.movieratings.exception.CliException; +import com.movieratings.model.Movie; +import com.movieratings.storage.DataStorageService; + +import java.util.List; + +public class CrawlCommand extends AbstractMovieCommand { + private final MovieCrawler crawler; + + public CrawlCommand(MovieCrawler crawler, DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { + super(analyzer, display, storage); + this.crawler = crawler; + } + + @Override + public String name() { + return "crawl"; + } + + @Override + public String description() { + return "Crawl movies from configured websites and save JSON/CSV files."; + } + + @Override + public void execute(String[] args) { + int limit = parseLimit(args, 60); + List movies = crawler.crawl(limit); + if (movies.isEmpty()) { + throw new CliException("No movie data was crawled."); + } + storage.saveAsJson(movies, JSON_FILE); + storage.exportToCsv(movies, CSV_FILE); + System.out.println("Crawled " + movies.size() + " movies from " + crawler.getSiteNames().size() + " sites."); + } +} diff --git a/project/src/main/java/com/movieratings/cli/command/ExportCommand.java b/project/src/main/java/com/movieratings/cli/command/ExportCommand.java new file mode 100644 index 0000000..f2cf25f --- /dev/null +++ b/project/src/main/java/com/movieratings/cli/command/ExportCommand.java @@ -0,0 +1,30 @@ +package com.movieratings.cli.command; + +import com.movieratings.analysis.DataAnalyzer; +import com.movieratings.display.ResultDisplay; +import com.movieratings.model.Movie; +import com.movieratings.storage.DataStorageService; + +import java.util.List; + +public class ExportCommand extends AbstractMovieCommand { + public ExportCommand(DataAnalyzer analyzer, ResultDisplay display, DataStorageService storage) { + super(analyzer, display, storage); + } + + @Override + public String name() { + return "export"; + } + + @Override + public String description() { + return "Export movies_data.json to CSV."; + } + + @Override + public void execute(String[] args) { + List movies = storage.loadFromJson(JSON_FILE); + storage.exportToCsv(movies, CSV_FILE); + } +} diff --git a/project/src/main/java/com/movieratings/cli/command/HelpCommand.java b/project/src/main/java/com/movieratings/cli/command/HelpCommand.java new file mode 100644 index 0000000..c282eff --- /dev/null +++ b/project/src/main/java/com/movieratings/cli/command/HelpCommand.java @@ -0,0 +1,30 @@ +package com.movieratings.cli.command; + +import java.util.Collection; +import java.util.function.Supplier; + +public class HelpCommand implements Command { + private final Supplier> commands; + + public HelpCommand(Supplier> commands) { + this.commands = commands; + } + + @Override + public String name() { + return "help"; + } + + @Override + public String description() { + return "Show available CLI commands."; + } + + @Override + public void execute(String[] args) { + System.out.println("Usage: mvn exec:java -Dexec.args=\" [limit]\""); + for (Command command : commands.get()) { + System.out.printf(" %-8s %s%n", command.name(), command.description()); + } + } +} diff --git a/project/src/main/java/com/movieratings/crawler/MovieCrawler.java b/project/src/main/java/com/movieratings/crawler/MovieCrawler.java index c9413c2..a7d3059 100644 --- a/project/src/main/java/com/movieratings/crawler/MovieCrawler.java +++ b/project/src/main/java/com/movieratings/crawler/MovieCrawler.java @@ -1,128 +1,73 @@ package com.movieratings.crawler; +import com.movieratings.crawler.strategy.CrawlerStrategy; +import com.movieratings.crawler.strategy.BoxOfficeMojoCrawlerStrategy; +import com.movieratings.crawler.strategy.DoubanTop250CrawlerStrategy; +import com.movieratings.crawler.strategy.ImdbTop250CrawlerStrategy; +import com.movieratings.crawler.strategy.LetterboxdTop250CrawlerStrategy; +import com.movieratings.crawler.strategy.TheNumbersCrawlerStrategy; +import com.movieratings.crawler.strategy.WikipediaGrossingFilmsCrawlerStrategy; +import com.movieratings.exception.CrawlerException; import com.movieratings.model.Movie; -import org.jsoup.Jsoup; -import org.jsoup.nodes.Document; -import org.jsoup.nodes.Element; -import org.jsoup.select.Elements; import org.springframework.stereotype.Component; -import java.io.IOException; import java.util.ArrayList; +import java.util.LinkedHashMap; import java.util.List; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.util.Map; -/** - * 电影数据爬虫类 - 抓取豆瓣 Top 250 - */ @Component public class MovieCrawler { - private static final String BASE_URL = "https://movie.douban.com/top250"; - private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"; + private final List strategies; + + public MovieCrawler() { + this(List.of( + new DoubanTop250CrawlerStrategy(), + new ImdbTop250CrawlerStrategy(), + new LetterboxdTop250CrawlerStrategy(), + new BoxOfficeMojoCrawlerStrategy(), + new TheNumbersCrawlerStrategy(), + new WikipediaGrossingFilmsCrawlerStrategy() + )); + } + + public MovieCrawler(List strategies) { + this.strategies = List.copyOf(strategies); + } public List crawl(int limit) { - List movies = new ArrayList<>(); - int start = 0; - - while (movies.size() < limit && start < 250) { - String url = BASE_URL + "?start=" + start + "&filter="; - System.out.println("正在抓取: " + url); - + if (limit <= 0) { + throw new CrawlerException("Crawl limit must be greater than 0."); + } + + Map movies = new LinkedHashMap<>(); + int perSiteLimit = Math.max(1, (int) Math.ceil((double) limit / strategies.size())); + + for (CrawlerStrategy strategy : strategies) { + System.out.println("Crawling site: " + strategy.getSiteName()); try { - Document doc = Jsoup.connect(url) - .userAgent(USER_AGENT) - .get(); - - Elements items = doc.select(".item"); - if (items.isEmpty()) break; - - for (Element item : items) { - if (movies.size() >= limit) break; - - try { - Movie movie = parseMovie(item); - movies.add(movie); - } catch (Exception e) { - System.err.println("解析单条电影数据失败: " + e.getMessage()); + List siteMovies = strategy.crawl(perSiteLimit); + for (Movie movie : siteMovies) { + if (movie.getTitle() == null || movie.getTitle().isBlank()) { + continue; } + movies.putIfAbsent(normalizedKey(movie), movie); } - - start += 25; - // 控制请求频率 - Thread.sleep(1000); - } catch (IOException | InterruptedException e) { - System.err.println("网络请求失败: " + e.getMessage()); - break; + } catch (CrawlerException e) { + System.err.println(e.getMessage()); } } - - return movies; - } - private Movie parseMovie(Element item) { - Movie movie = new Movie(); - - // 排名 - movie.setRank(Integer.parseInt(item.select(".pic em").text())); - - // 标题 - movie.setTitle(item.select(".title").first().text()); - - // 评分 - movie.setRating(Double.parseDouble(item.select(".rating_num").text())); + return new ArrayList<>(movies.values()).stream() + .limit(limit) + .toList(); + } - // 海报图片 - movie.setPosterUrl(item.select(".pic img").attr("src")); - - // 作品类型 - 默认均为电影 - movie.setType("电影"); - - // 解析导演和年份 - String bdText = item.select(".bd p").first().text(); - String[] parts = bdText.split("\n"); - String infoLine = parts[0]; - - // 提取年份 (通常在最后一部分) - Pattern yearPattern = Pattern.compile("\\d{4}"); - Matcher matcher = yearPattern.matcher(infoLine); - if (matcher.find()) { - movie.setReleaseYear(Integer.parseInt(matcher.group())); - } - - // 提取导演和国家 - if (infoLine.contains("导演: ")) { - int start = infoLine.indexOf("导演: ") + 4; - int end = infoLine.indexOf(" ", start); - if (end == -1) end = infoLine.length(); - movie.setDirector(infoLine.substring(start, end).trim()); - } + public List getSiteNames() { + return strategies.stream().map(CrawlerStrategy::getSiteName).toList(); + } - // 国家通常在最后一部分,如 / 1994 / 美国 / 犯罪 剧情 - String[] infoParts = infoLine.split(" / "); - if (infoParts.length >= 3) { - movie.setCountry(infoParts[infoParts.length - 2].trim()); - } - - // 评价人数 - Element starDiv = item.selectFirst(".star"); - if (starDiv != null) { - String starText = starDiv.text(); - // 匹配包含逗号的数字,如 "2,600,000人评价" - Pattern reviewPattern = Pattern.compile("([\\d,]+)人评价"); - Matcher reviewMatcher = reviewPattern.matcher(starText); - if (reviewMatcher.find()) { - String countStr = reviewMatcher.group(1).replace(",", ""); - int count = Integer.parseInt(countStr); - movie.setReviewCount(count); - // 模拟票房 (使用评价人数 * 某个系数来生成示例数据) - movie.setBoxOffice(count * 0.5 + (Math.random() * 100)); - } - } - - // 简评 - movie.setQuote(item.select(".inq").text()); - - return movie; + private String normalizedKey(Movie movie) { + return movie.getTitle().trim().toLowerCase() + "#" + movie.getReleaseYear(); } } diff --git a/project/src/main/java/com/movieratings/crawler/strategy/AbstractCrawlerStrategy.java b/project/src/main/java/com/movieratings/crawler/strategy/AbstractCrawlerStrategy.java new file mode 100644 index 0000000..286a36f --- /dev/null +++ b/project/src/main/java/com/movieratings/crawler/strategy/AbstractCrawlerStrategy.java @@ -0,0 +1,49 @@ +package com.movieratings.crawler.strategy; + +import org.jsoup.Connection; +import org.jsoup.Jsoup; + +abstract class AbstractCrawlerStrategy implements CrawlerStrategy { + private static final String USER_AGENT = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 " + + "(KHTML, like Gecko) Chrome/120.0 Safari/537.36"; + + protected Connection connection(String url) { + return Jsoup.connect(url) + .userAgent(USER_AGENT) + .timeout(15000) + .ignoreHttpErrors(true); + } + + protected int parseYear(String text) { + if (text == null) { + return 0; + } + java.util.regex.Matcher matcher = java.util.regex.Pattern.compile("(19|20)\\d{2}").matcher(text); + return matcher.find() ? Integer.parseInt(matcher.group()) : 0; + } + + protected int parseCount(String text) { + if (text == null || text.isBlank()) { + return 0; + } + String normalized = text.replace("(", "") + .replace(")", "") + .replace(",", "") + .trim() + .toUpperCase(); + java.util.regex.Matcher matcher = java.util.regex.Pattern.compile("([0-9]+(?:\\.[0-9]+)?)([KM]?)").matcher(normalized); + if (!matcher.find()) { + return 0; + } + double value = Double.parseDouble(matcher.group(1)); + return switch (matcher.group(2)) { + case "M" -> (int) (value * 1_000_000); + case "K" -> (int) (value * 1_000); + default -> (int) value; + }; + } + + protected double simulatedBoxOffice(int reviewCount, int rank) { + return reviewCount * 0.5 + Math.max(0, 250 - rank); + } +} diff --git a/project/src/main/java/com/movieratings/crawler/strategy/BoxOfficeMojoCrawlerStrategy.java b/project/src/main/java/com/movieratings/crawler/strategy/BoxOfficeMojoCrawlerStrategy.java new file mode 100644 index 0000000..371d446 --- /dev/null +++ b/project/src/main/java/com/movieratings/crawler/strategy/BoxOfficeMojoCrawlerStrategy.java @@ -0,0 +1,78 @@ +package com.movieratings.crawler.strategy; + +import com.movieratings.exception.CrawlerException; +import com.movieratings.model.Movie; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class BoxOfficeMojoCrawlerStrategy extends AbstractCrawlerStrategy { + private static final String URL = "https://www.boxofficemojo.com/chart/top_lifetime_gross/"; + + @Override + public String getSiteName() { + return "Box Office Mojo"; + } + + @Override + public List crawl(int limit) { + try { + Document doc = connection(URL).get(); + List movies = new ArrayList<>(); + for (Element row : doc.select("tr")) { + if (movies.size() >= limit) { + break; + } + Elements cols = row.select("td"); + if (cols.size() < 4) { + continue; + } + Movie movie = parseRow(cols); + if (movie.getTitle() != null && !movie.getTitle().isBlank()) { + movies.add(movie); + } + } + return movies; + } catch (IOException e) { + throw new CrawlerException("Failed to crawl " + getSiteName(), e); + } + } + + private Movie parseRow(Elements cols) { + int rank = parseCount(cols.get(0).text()); + Movie movie = new Movie(); + movie.setRank(rank); + movie.setTitle(cols.get(1).text().trim()); + movie.setBoxOffice(parseMoney(cols.get(2).text())); + movie.setReleaseYear(parseYear(cols.get(3).text())); + movie.setRating(estimateRating(rank)); + movie.setDirector("Unknown"); + movie.setCountry("United States"); + movie.setReviewCount(0); + movie.setPosterUrl(""); + movie.setQuote("Box Office Mojo lifetime gross chart entry"); + movie.setType("Movie"); + movie.setSourceSite(getSiteName()); + return movie; + } + + private double parseMoney(String value) { + if (value == null || value.isBlank()) { + return 0.0; + } + String normalized = value.replace("$", "").replace(",", "").trim(); + try { + return Double.parseDouble(normalized); + } catch (NumberFormatException e) { + return 0.0; + } + } + + private double estimateRating(int rank) { + return Math.max(7.0, 8.8 - rank * 0.01); + } +} diff --git a/project/src/main/java/com/movieratings/crawler/strategy/CrawlerStrategy.java b/project/src/main/java/com/movieratings/crawler/strategy/CrawlerStrategy.java new file mode 100644 index 0000000..3f7f277 --- /dev/null +++ b/project/src/main/java/com/movieratings/crawler/strategy/CrawlerStrategy.java @@ -0,0 +1,12 @@ +package com.movieratings.crawler.strategy; + +import com.movieratings.exception.CrawlerException; +import com.movieratings.model.Movie; + +import java.util.List; + +public interface CrawlerStrategy { + String getSiteName(); + + List crawl(int limit) throws CrawlerException; +} diff --git a/project/src/main/java/com/movieratings/crawler/strategy/DoubanTop250CrawlerStrategy.java b/project/src/main/java/com/movieratings/crawler/strategy/DoubanTop250CrawlerStrategy.java new file mode 100644 index 0000000..848fbcb --- /dev/null +++ b/project/src/main/java/com/movieratings/crawler/strategy/DoubanTop250CrawlerStrategy.java @@ -0,0 +1,119 @@ +package com.movieratings.crawler.strategy; + +import com.movieratings.exception.CrawlerException; +import com.movieratings.model.Movie; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class DoubanTop250CrawlerStrategy extends AbstractCrawlerStrategy { + private static final String BASE_URL = "https://movie.douban.com/top250"; + + @Override + public String getSiteName() { + return "Douban Top 250"; + } + + @Override + public List crawl(int limit) { + List movies = new ArrayList<>(); + int start = 0; + + while (movies.size() < limit && start < 250) { + String url = BASE_URL + "?start=" + start + "&filter="; + try { + Document doc = connection(url).get(); + Elements items = doc.select(".item"); + if (items.isEmpty()) { + break; + } + for (Element item : items) { + if (movies.size() >= limit) { + break; + } + movies.add(parseMovie(item)); + } + start += 25; + } catch (IOException e) { + throw new CrawlerException("Failed to crawl " + getSiteName(), e); + } + } + + return movies; + } + + private Movie parseMovie(Element item) { + Movie movie = new Movie(); + movie.setRank(parseInteger(item.select(".pic em").text(), 0)); + Element title = item.select(".title").first(); + movie.setTitle(title == null ? "Unknown" : title.text()); + movie.setRating(parseDouble(item.select(".rating_num").text(), 0.0)); + movie.setPosterUrl(item.select(".pic img").attr("src")); + movie.setType("Movie"); + movie.setSourceSite(getSiteName()); + + String infoLine = item.select(".bd p").isEmpty() ? "" : item.select(".bd p").first().text(); + movie.setReleaseYear(parseYear(infoLine)); + movie.setDirector(parseDirector(infoLine)); + movie.setCountry(parseCountry(infoLine)); + + String starText = item.select(".star").text(); + Matcher reviewMatcher = Pattern.compile("([\\d,]+)").matcher(starText); + if (reviewMatcher.find()) { + int reviewCount = parseInteger(reviewMatcher.group(1).replace(",", ""), 0); + movie.setReviewCount(reviewCount); + movie.setBoxOffice(simulatedBoxOffice(reviewCount, movie.getRank())); + } + movie.setQuote(item.select(".inq").text()); + return movie; + } + + private String parseDirector(String infoLine) { + int marker = infoLine.indexOf("导演:"); + if (marker < 0) { + marker = infoLine.indexOf("Director:"); + } + if (marker < 0) { + return "Unknown"; + } + int start = infoLine.indexOf(':', marker); + if (start < 0) { + return "Unknown"; + } + int end = infoLine.indexOf(" ", start); + if (end < 0) { + end = infoLine.indexOf(" / ", start); + } + if (end < 0) { + end = infoLine.length(); + } + return infoLine.substring(start + 1, end).trim(); + } + + private String parseCountry(String infoLine) { + String[] parts = infoLine.split(" / "); + return parts.length >= 3 ? parts[parts.length - 2].trim() : "Unknown"; + } + + private int parseInteger(String value, int fallback) { + try { + return value == null || value.isBlank() ? fallback : Integer.parseInt(value.trim()); + } catch (NumberFormatException e) { + return fallback; + } + } + + private double parseDouble(String value, double fallback) { + try { + return value == null || value.isBlank() ? fallback : Double.parseDouble(value.trim()); + } catch (NumberFormatException e) { + return fallback; + } + } +} diff --git a/project/src/main/java/com/movieratings/crawler/strategy/ImdbTop250CrawlerStrategy.java b/project/src/main/java/com/movieratings/crawler/strategy/ImdbTop250CrawlerStrategy.java new file mode 100644 index 0000000..29ab6b7 --- /dev/null +++ b/project/src/main/java/com/movieratings/crawler/strategy/ImdbTop250CrawlerStrategy.java @@ -0,0 +1,83 @@ +package com.movieratings.crawler.strategy; + +import com.movieratings.exception.CrawlerException; +import com.movieratings.model.Movie; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class ImdbTop250CrawlerStrategy extends AbstractCrawlerStrategy { + private static final String URL = "https://www.imdb.com/chart/top/"; + + @Override + public String getSiteName() { + return "IMDb Top 250"; + } + + @Override + public List crawl(int limit) { + try { + Document doc = connection(URL).get(); + Elements items = doc.select("li.ipc-metadata-list-summary-item"); + if (items.isEmpty()) { + items = doc.select(".lister-list tr"); + } + List movies = new ArrayList<>(); + int rank = 1; + for (Element item : items) { + if (movies.size() >= limit) { + break; + } + Movie movie = parseMovie(item, rank); + if (movie.getTitle() != null && !movie.getTitle().isBlank()) { + movies.add(movie); + rank++; + } + } + return movies; + } catch (IOException e) { + throw new CrawlerException("Failed to crawl " + getSiteName(), e); + } + } + + private Movie parseMovie(Element item, int rank) { + Movie movie = new Movie(); + movie.setRank(rank); + movie.setTitle(parseTitle(item)); + movie.setRating(parseRating(item)); + movie.setReleaseYear(parseYear(item.text())); + movie.setDirector("Unknown"); + movie.setCountry("Unknown"); + movie.setReviewCount(parseCount(item.select(".ipc-rating-star--voteCount").text())); + movie.setPosterUrl(item.select("img").attr("src")); + movie.setQuote("IMDb chart entry"); + movie.setType("Movie"); + movie.setSourceSite(getSiteName()); + movie.setBoxOffice(simulatedBoxOffice(movie.getReviewCount(), rank)); + return movie; + } + + private String parseTitle(Element item) { + String title = item.select("h3.ipc-title__text").text(); + if (title.isBlank()) { + title = item.select(".titleColumn a").text(); + } + return title.replaceFirst("^\\d+\\.\\s*", "").trim(); + } + + private double parseRating(Element item) { + String rating = item.select(".ipc-rating-star--rating").text(); + if (rating.isBlank()) { + rating = item.select(".imdbRating strong").text(); + } + try { + return rating.isBlank() ? 0.0 : Double.parseDouble(rating); + } catch (NumberFormatException e) { + return 0.0; + } + } +} diff --git a/project/src/main/java/com/movieratings/crawler/strategy/LetterboxdTop250CrawlerStrategy.java b/project/src/main/java/com/movieratings/crawler/strategy/LetterboxdTop250CrawlerStrategy.java new file mode 100644 index 0000000..0ad33ab --- /dev/null +++ b/project/src/main/java/com/movieratings/crawler/strategy/LetterboxdTop250CrawlerStrategy.java @@ -0,0 +1,67 @@ +package com.movieratings.crawler.strategy; + +import com.movieratings.exception.CrawlerException; +import com.movieratings.model.Movie; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class LetterboxdTop250CrawlerStrategy extends AbstractCrawlerStrategy { + private static final String URL = "https://letterboxd.com/dave/list/official-top-250-narrative-feature-films/"; + + @Override + public String getSiteName() { + return "Letterboxd Top 250"; + } + + @Override + public List crawl(int limit) { + try { + Document doc = connection(URL).get(); + Elements posters = doc.select(".poster-container .film-poster, .film-poster"); + if (posters.isEmpty()) { + posters = doc.select("a[href^=/film/] img[alt], a[href*=/film/] img[alt]"); + } + List movies = new ArrayList<>(); + int rank = 1; + for (Element poster : posters) { + if (movies.size() >= limit) { + break; + } + String title = poster.attr("data-film-name"); + if (title == null || title.isBlank()) { + title = poster.attr("alt"); + } + if (title == null || title.isBlank()) { + continue; + } + Movie movie = new Movie(); + movie.setRank(rank); + movie.setTitle(title.trim()); + movie.setReleaseYear(parseYear(poster.attr("data-film-release-year"))); + movie.setRating(estimateRating(rank)); + movie.setDirector("Unknown"); + movie.setCountry("Unknown"); + movie.setReviewCount(0); + movie.setPosterUrl(poster.select("img").attr("src")); + movie.setQuote("Letterboxd chart entry"); + movie.setType("Movie"); + movie.setSourceSite(getSiteName()); + movie.setBoxOffice(simulatedBoxOffice(0, rank)); + movies.add(movie); + rank++; + } + return movies; + } catch (IOException e) { + throw new CrawlerException("Failed to crawl " + getSiteName(), e); + } + } + + private double estimateRating(int rank) { + return Math.max(8.0, 9.5 - rank * 0.01); + } +} diff --git a/project/src/main/java/com/movieratings/crawler/strategy/TheNumbersCrawlerStrategy.java b/project/src/main/java/com/movieratings/crawler/strategy/TheNumbersCrawlerStrategy.java new file mode 100644 index 0000000..465e042 --- /dev/null +++ b/project/src/main/java/com/movieratings/crawler/strategy/TheNumbersCrawlerStrategy.java @@ -0,0 +1,78 @@ +package com.movieratings.crawler.strategy; + +import com.movieratings.exception.CrawlerException; +import com.movieratings.model.Movie; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class TheNumbersCrawlerStrategy extends AbstractCrawlerStrategy { + private static final String URL = "https://www.the-numbers.com/box-office-records/worldwide/all-movies/cumulative/all-time"; + + @Override + public String getSiteName() { + return "The Numbers"; + } + + @Override + public List crawl(int limit) { + try { + Document doc = connection(URL).get(); + List movies = new ArrayList<>(); + for (Element row : doc.select("tr")) { + if (movies.size() >= limit) { + break; + } + Elements cols = row.select("td"); + if (cols.size() < 4) { + continue; + } + Movie movie = parseRow(cols); + if (movie.getTitle() != null && !movie.getTitle().isBlank()) { + movies.add(movie); + } + } + return movies; + } catch (IOException e) { + throw new CrawlerException("Failed to crawl " + getSiteName(), e); + } + } + + private Movie parseRow(Elements cols) { + int rank = parseCount(cols.get(0).text()); + Movie movie = new Movie(); + movie.setRank(rank); + movie.setReleaseYear(parseYear(cols.get(1).text())); + movie.setTitle(cols.get(2).text().trim()); + movie.setBoxOffice(parseMoney(cols.get(3).text())); + movie.setRating(estimateRating(rank)); + movie.setDirector("Unknown"); + movie.setCountry("Multiple"); + movie.setReviewCount(0); + movie.setPosterUrl(""); + movie.setQuote("The Numbers all-time worldwide box office entry"); + movie.setType("Movie"); + movie.setSourceSite(getSiteName()); + return movie; + } + + private double parseMoney(String value) { + if (value == null || value.isBlank()) { + return 0.0; + } + String normalized = value.replace("$", "").replace(",", "").trim(); + try { + return Double.parseDouble(normalized); + } catch (NumberFormatException e) { + return 0.0; + } + } + + private double estimateRating(int rank) { + return Math.max(7.0, 8.85 - rank * 0.01); + } +} diff --git a/project/src/main/java/com/movieratings/crawler/strategy/WikipediaGrossingFilmsCrawlerStrategy.java b/project/src/main/java/com/movieratings/crawler/strategy/WikipediaGrossingFilmsCrawlerStrategy.java new file mode 100644 index 0000000..1eb647e --- /dev/null +++ b/project/src/main/java/com/movieratings/crawler/strategy/WikipediaGrossingFilmsCrawlerStrategy.java @@ -0,0 +1,86 @@ +package com.movieratings.crawler.strategy; + +import com.movieratings.exception.CrawlerException; +import com.movieratings.model.Movie; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class WikipediaGrossingFilmsCrawlerStrategy extends AbstractCrawlerStrategy { + private static final String URL = "https://en.wikipedia.org/wiki/List_of_highest-grossing_films"; + + @Override + public String getSiteName() { + return "Wikipedia Highest-Grossing Films"; + } + + @Override + public List crawl(int limit) { + try { + Document doc = connection(URL).get(); + List movies = new ArrayList<>(); + for (Element row : doc.select("table.wikitable tr")) { + if (movies.size() >= limit) { + break; + } + Elements cols = row.select("td"); + if (cols.size() < 5) { + continue; + } + Movie movie = parseRow(cols); + if (movie.getTitle() != null && !movie.getTitle().isBlank()) { + movies.add(movie); + } + } + return movies; + } catch (IOException e) { + throw new CrawlerException("Failed to crawl " + getSiteName(), e); + } + } + + private Movie parseRow(Elements cols) { + int rank = parseCount(cols.get(0).text()); + String title = cols.get(2).select("i a, a").text(); + if (title.isBlank()) { + title = cols.get(2).text(); + } + + Movie movie = new Movie(); + movie.setRank(rank); + movie.setTitle(title.trim()); + movie.setBoxOffice(parseMoney(cols.get(3).text())); + movie.setReleaseYear(parseYear(cols.get(4).text())); + movie.setRating(estimateRating(rank)); + movie.setDirector("Unknown"); + movie.setCountry("Multiple"); + movie.setReviewCount(0); + movie.setPosterUrl(""); + movie.setQuote("Wikipedia highest-grossing films table entry"); + movie.setType("Movie"); + movie.setSourceSite(getSiteName()); + return movie; + } + + private double parseMoney(String value) { + if (value == null || value.isBlank()) { + return 0.0; + } + String normalized = value.replaceAll("[^0-9.]", ""); + if (normalized.isBlank()) { + return 0.0; + } + try { + return Double.parseDouble(normalized); + } catch (NumberFormatException e) { + return 0.0; + } + } + + private double estimateRating(int rank) { + return Math.max(7.0, 8.9 - rank * 0.01); + } +} diff --git a/project/src/main/java/com/movieratings/exception/CliException.java b/project/src/main/java/com/movieratings/exception/CliException.java new file mode 100644 index 0000000..9bb83d2 --- /dev/null +++ b/project/src/main/java/com/movieratings/exception/CliException.java @@ -0,0 +1,11 @@ +package com.movieratings.exception; + +public class CliException extends MovieRatingsException { + public CliException(String message) { + super(message); + } + + public CliException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/project/src/main/java/com/movieratings/exception/CrawlerException.java b/project/src/main/java/com/movieratings/exception/CrawlerException.java new file mode 100644 index 0000000..5c0095e --- /dev/null +++ b/project/src/main/java/com/movieratings/exception/CrawlerException.java @@ -0,0 +1,11 @@ +package com.movieratings.exception; + +public class CrawlerException extends MovieRatingsException { + public CrawlerException(String message) { + super(message); + } + + public CrawlerException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/project/src/main/java/com/movieratings/exception/DataStorageException.java b/project/src/main/java/com/movieratings/exception/DataStorageException.java new file mode 100644 index 0000000..41d04e9 --- /dev/null +++ b/project/src/main/java/com/movieratings/exception/DataStorageException.java @@ -0,0 +1,11 @@ +package com.movieratings.exception; + +public class DataStorageException extends MovieRatingsException { + public DataStorageException(String message) { + super(message); + } + + public DataStorageException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/project/src/main/java/com/movieratings/exception/MovieRatingsException.java b/project/src/main/java/com/movieratings/exception/MovieRatingsException.java new file mode 100644 index 0000000..e6a09f1 --- /dev/null +++ b/project/src/main/java/com/movieratings/exception/MovieRatingsException.java @@ -0,0 +1,11 @@ +package com.movieratings.exception; + +public class MovieRatingsException extends RuntimeException { + public MovieRatingsException(String message) { + super(message); + } + + public MovieRatingsException(String message, Throwable cause) { + super(message, cause); + } +} diff --git a/project/src/main/java/com/movieratings/model/Movie.java b/project/src/main/java/com/movieratings/model/Movie.java index df46ba9..0024c37 100644 --- a/project/src/main/java/com/movieratings/model/Movie.java +++ b/project/src/main/java/com/movieratings/model/Movie.java @@ -24,6 +24,7 @@ public class Movie implements Serializable { private double boxOffice; // 票房 (模拟/演示) private String type; // 作品类型 (电影、电视剧、纪录片等) private String posterUrl; // 海报图片链接 + private String sourceSite; // 数据来源网站 public Movie() {} @@ -77,6 +78,9 @@ public class Movie implements Serializable { public String getPosterUrl() { return posterUrl; } public void setPosterUrl(String posterUrl) { this.posterUrl = posterUrl; } + public String getSourceSite() { return sourceSite; } + public void setSourceSite(String sourceSite) { this.sourceSite = sourceSite; } + @Override public String toString() { return "Movie{" + @@ -89,6 +93,7 @@ public class Movie implements Serializable { ", director='" + director + '\'' + ", reviewCount=" + reviewCount + ", type='" + type + '\'' + + ", sourceSite='" + sourceSite + '\'' + '}'; } diff --git a/project/src/main/java/com/movieratings/storage/DataStorageService.java b/project/src/main/java/com/movieratings/storage/DataStorageService.java new file mode 100644 index 0000000..fff2bdf --- /dev/null +++ b/project/src/main/java/com/movieratings/storage/DataStorageService.java @@ -0,0 +1,64 @@ +package com.movieratings.storage; + +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.movieratings.exception.DataStorageException; +import com.movieratings.model.Movie; + +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.List; + +public class DataStorageService { + private final ObjectMapper mapper = new ObjectMapper(); + + public void saveAsJson(List movies, String fileName) { + try { + mapper.writerWithDefaultPrettyPrinter().writeValue(new File(fileName), movies); + System.out.println("Saved JSON data to " + fileName); + } catch (IOException e) { + throw new DataStorageException("Failed to save JSON file: " + fileName, e); + } + } + + public List loadFromJson(String fileName) { + try { + return mapper.readValue(new File(fileName), new TypeReference<>() {}); + } catch (IOException e) { + throw new DataStorageException("Failed to load JSON file: " + fileName, e); + } + } + + public void exportToCsv(List movies, String fileName) { + try (FileWriter writer = new FileWriter(fileName, StandardCharsets.UTF_8)) { + writer.write("rank,title,year,rating,director,country,reviewCount,boxOffice,type,posterUrl,sourceSite\n"); + for (Movie movie : movies) { + writer.write(String.format("%d,%s,%d,%.1f,%s,%s,%d,%.2f,%s,%s,%s%n", + movie.getRank(), + csv(movie.getTitle()), + movie.getReleaseYear(), + movie.getRating(), + csv(movie.getDirector()), + csv(movie.getCountry()), + movie.getReviewCount(), + movie.getBoxOffice(), + csv(movie.getType()), + csv(movie.getPosterUrl()), + csv(movie.getSourceSite()))); + } + System.out.println("Saved CSV data to " + fileName); + } catch (IOException e) { + throw new DataStorageException("Failed to save CSV file: " + fileName, e); + } + } + + private String csv(String value) { + if (value == null) { + return ""; + } + String escaped = value.replace("\"", "\"\""); + return "\"" + escaped + "\""; + } +} diff --git a/project/src/test/java/com/movieratings/crawler/MovieCrawlerTest.java b/project/src/test/java/com/movieratings/crawler/MovieCrawlerTest.java new file mode 100644 index 0000000..7f187de --- /dev/null +++ b/project/src/test/java/com/movieratings/crawler/MovieCrawlerTest.java @@ -0,0 +1,54 @@ +package com.movieratings.crawler; + +import com.movieratings.crawler.strategy.CrawlerStrategy; +import com.movieratings.exception.CrawlerException; +import com.movieratings.model.Movie; +import org.junit.jupiter.api.Test; + +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; + +class MovieCrawlerTest { + @Test + void aggregatesMultipleStrategiesAndDeduplicatesMovies() { + MovieCrawler crawler = new MovieCrawler(List.of( + new FakeStrategy("site-a", List.of(movie("Movie A", 2001), movie("Movie B", 2002))), + new FakeStrategy("site-b", List.of(movie("Movie B", 2002), movie("Movie C", 2003))), + new FakeStrategy("site-c", List.of(movie("Movie D", 2004))) + )); + + List movies = crawler.crawl(10); + + assertEquals(4, movies.size()); + assertEquals(List.of("site-a", "site-b", "site-c"), crawler.getSiteNames()); + } + + @Test + void rejectsInvalidLimit() { + MovieCrawler crawler = new MovieCrawler(List.of(new FakeStrategy("site", List.of()))); + + assertThrows(CrawlerException.class, () -> crawler.crawl(0)); + } + + private static Movie movie(String title, int year) { + Movie movie = new Movie(); + movie.setTitle(title); + movie.setReleaseYear(year); + movie.setRating(8.0); + return movie; + } + + private record FakeStrategy(String siteName, List movies) implements CrawlerStrategy { + @Override + public String getSiteName() { + return siteName; + } + + @Override + public List crawl(int limit) { + return movies.stream().limit(limit).toList(); + } + } +} diff --git a/project/src/test/java/com/movieratings/storage/DataStorageServiceTest.java b/project/src/test/java/com/movieratings/storage/DataStorageServiceTest.java new file mode 100644 index 0000000..6e6d344 --- /dev/null +++ b/project/src/test/java/com/movieratings/storage/DataStorageServiceTest.java @@ -0,0 +1,57 @@ +package com.movieratings.storage; + +import com.movieratings.model.Movie; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; + +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.List; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +class DataStorageServiceTest { + @TempDir + Path tempDir; + + @Test + void savesAndLoadsJsonData() { + DataStorageService storage = new DataStorageService(); + Path json = tempDir.resolve("movies.json"); + List movies = List.of(movie("Movie A", 8.8)); + + storage.saveAsJson(movies, json.toString()); + List loaded = storage.loadFromJson(json.toString()); + + assertEquals(1, loaded.size()); + assertEquals("Movie A", loaded.get(0).getTitle()); + assertEquals(8.8, loaded.get(0).getRating(), 0.001); + } + + @Test + void exportsCsvData() throws Exception { + DataStorageService storage = new DataStorageService(); + Path csv = tempDir.resolve("movies.csv"); + + storage.exportToCsv(List.of(movie("Movie A", 8.8)), csv.toString()); + + String content = Files.readString(csv); + assertTrue(content.contains("rank,title,year,rating")); + assertTrue(content.contains("sourceSite")); + assertTrue(content.contains("\"Movie A\"")); + } + + private static Movie movie(String title, double rating) { + Movie movie = new Movie(); + movie.setRank(1); + movie.setTitle(title); + movie.setReleaseYear(2001); + movie.setRating(rating); + movie.setDirector("Director A"); + movie.setCountry("Country A"); + movie.setType("Movie"); + movie.setSourceSite("Test Site"); + return movie; + } +} diff --git a/project/target/classes/com/movieratings/Main.class b/project/target/classes/com/movieratings/Main.class index d3da6b6..e697d12 100644 Binary files a/project/target/classes/com/movieratings/Main.class and b/project/target/classes/com/movieratings/Main.class differ diff --git a/project/target/classes/com/movieratings/crawler/MovieCrawler.class b/project/target/classes/com/movieratings/crawler/MovieCrawler.class index d983939..172338f 100644 Binary files a/project/target/classes/com/movieratings/crawler/MovieCrawler.class and b/project/target/classes/com/movieratings/crawler/MovieCrawler.class differ diff --git a/project/target/classes/com/movieratings/model/Movie.class b/project/target/classes/com/movieratings/model/Movie.class index 4d1f38e..a4861a9 100644 Binary files a/project/target/classes/com/movieratings/model/Movie.class and b/project/target/classes/com/movieratings/model/Movie.class differ diff --git a/project/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst b/project/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst index 4ebf324..989a98f 100644 --- a/project/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst +++ b/project/target/maven-status/maven-compiler-plugin/compile/default-compile/createdFiles.lst @@ -1,13 +1,35 @@ +com\movieratings\exception\MovieRatingsException.class com\movieratings\controller\DirectorController.class +com\movieratings\crawler\strategy\ImdbTop250CrawlerStrategy.class +com\movieratings\crawler\strategy\WikipediaGrossingFilmsCrawlerStrategy.class +com\movieratings\cli\CliApplication.class +com\movieratings\exception\DataStorageException.class +com\movieratings\Main.class +com\movieratings\analysis\DataAnalyzer.class +com\movieratings\cli\command\HelpCommand.class +com\movieratings\storage\DataStorageService$1.class +com\movieratings\crawler\strategy\TheNumbersCrawlerStrategy.class +com\movieratings\crawler\strategy\LetterboxdTop250CrawlerStrategy.class +com\movieratings\cli\command\ExportCommand.class +com\movieratings\storage\DataStorageService.class +com\movieratings\cli\command\AllCommand.class +com\movieratings\cli\command\AnalyzeCommand.class +com\movieratings\exception\CliException.class +com\movieratings\crawler\strategy\AbstractCrawlerStrategy.class com\movieratings\MovieRatingsApplication.class com\movieratings\model\Movie.class +com\movieratings\crawler\strategy\BoxOfficeMojoCrawlerStrategy.class com\movieratings\repository\MovieRepository.class +com\movieratings\crawler\strategy\CrawlerStrategy.class com\movieratings\crawler\MovieCrawler.class com\movieratings\service\MovieService.class com\movieratings\model\DirectorStats.class com\movieratings\analysis\DataAnalyzer$CorrelationResult.class com\movieratings\display\ResultDisplay.class -com\movieratings\Main.class -com\movieratings\analysis\DataAnalyzer.class +com\movieratings\cli\command\CrawlCommand.class com\movieratings\DataInitializer.class +com\movieratings\exception\CrawlerException.class +com\movieratings\cli\command\AbstractMovieCommand.class com\movieratings\analysis\DataAnalyzer$DirectorStats.class +com\movieratings\cli\command\Command.class +com\movieratings\crawler\strategy\DoubanTop250CrawlerStrategy.class diff --git a/project/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst b/project/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst index 5504aab..9904c9d 100644 --- a/project/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst +++ b/project/target/maven-status/maven-compiler-plugin/compile/default-compile/inputFiles.lst @@ -1,11 +1,32 @@ D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\display\ResultDisplay.java D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\model\DirectorStats.java -D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\MovieRatingsApplication.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\TheNumbersCrawlerStrategy.java D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\DataInitializer.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\exception\CliException.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\exception\MovieRatingsException.java D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\controller\DirectorController.java -D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\service\MovieService.java -D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\Main.java -D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\model\Movie.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\storage\DataStorageService.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\CliApplication.java D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\repository\MovieRepository.java D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\MovieCrawler.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\CrawlCommand.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\CrawlerStrategy.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\MovieRatingsApplication.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\exception\CrawlerException.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\Main.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\AbstractMovieCommand.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\Command.java D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\analysis\DataAnalyzer.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\AnalyzeCommand.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\ImdbTop250CrawlerStrategy.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\AbstractCrawlerStrategy.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\HelpCommand.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\LetterboxdTop250CrawlerStrategy.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\AllCommand.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\DoubanTop250CrawlerStrategy.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\service\MovieService.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\cli\command\ExportCommand.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\WikipediaGrossingFilmsCrawlerStrategy.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\model\Movie.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\crawler\strategy\BoxOfficeMojoCrawlerStrategy.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\main\java\com\movieratings\exception\DataStorageException.java diff --git a/project/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst b/project/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst index 20c22fc..22fe0a4 100644 --- a/project/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst +++ b/project/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/createdFiles.lst @@ -1 +1,4 @@ com\movieratings\analysis\DataAnalyzerTest.class +com\movieratings\crawler\MovieCrawlerTest.class +com\movieratings\crawler\MovieCrawlerTest$FakeStrategy.class +com\movieratings\storage\DataStorageServiceTest.class diff --git a/project/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst b/project/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst index b9a21e0..47f9d68 100644 --- a/project/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst +++ b/project/target/maven-status/maven-compiler-plugin/testCompile/default-testCompile/inputFiles.lst @@ -1 +1,3 @@ D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\test\java\com\movieratings\analysis\DataAnalyzerTest.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\test\java\com\movieratings\storage\DataStorageServiceTest.java +D:\VisualStudioProgram\VSCodePrograms\JavaLearningProject\java\project\src\test\java\com\movieratings\crawler\MovieCrawlerTest.java diff --git a/project/year_rating_scatter.png b/project/year_rating_scatter.png index 4638a74..7f445a9 100644 Binary files a/project/year_rating_scatter.png and b/project/year_rating_scatter.png differ diff --git a/project/学号-姓名-期末实验报告.docx b/project/学号-姓名-期末实验报告.docx new file mode 100644 index 0000000..7eca35d Binary files /dev/null and b/project/学号-姓名-期末实验报告.docx differ