diff --git a/project/src/AnalyzeCommand.java b/project/src/AnalyzeCommand.java new file mode 100644 index 0000000..c2ed222 --- /dev/null +++ b/project/src/AnalyzeCommand.java @@ -0,0 +1,23 @@ +package com.example.crawler.command; +import com.example.crawler.controller.MuseumController; +import com.example.crawler.model.Museum; +import java.util.List; +import java.util.Map; +public class AnalyzeCommand implements Command { + private final MuseumController controller; + private final List data; + private Map ratingDistribution; + public AnalyzeCommand(MuseumController controller, List data) { + this.controller = controller; + this.data = data; + } + @Override + public void execute() { + this.ratingDistribution = controller.analyzeMuseums(data); + } + @Override + public String getCommandName() { return "analyze"; } + @Override + public String getDescription() { return "分析博物馆数据"; } + public Map getRatingDistribution() { return ratingDistribution; } +} diff --git a/project/src/Command.java b/project/src/Command.java new file mode 100644 index 0000000..ba9474f --- /dev/null +++ b/project/src/Command.java @@ -0,0 +1,6 @@ +package com.example.crawler.command; +public interface Command { + void execute(); + String getCommandName(); + String getDescription(); +} diff --git a/project/src/CommandInvoker.java b/project/src/CommandInvoker.java new file mode 100644 index 0000000..5cddfee --- /dev/null +++ b/project/src/CommandInvoker.java @@ -0,0 +1,24 @@ +package com.example.crawler.command; +import java.util.ArrayList; +import java.util.List; +public class CommandInvoker { + private final List commands = new ArrayList<>(); + public void addCommand(Command command) { + commands.add(command); + } + public void executeAll() { + for (Command command : commands) { + System.out.println("执行命令: " + command.getCommandName() + " - " + command.getDescription()); + long startTime = System.currentTimeMillis(); + command.execute(); + long endTime = System.currentTimeMillis(); + System.out.println("命令 " + command.getCommandName() + " 执行完成,耗时: " + (endTime - startTime) + "ms"); + } + } + public void clearCommands() { + commands.clear(); + } + public int getCommandCount() { + return commands.size(); + } +} diff --git a/project/src/Main.java b/project/src/Main.java new file mode 100644 index 0000000..786a154 --- /dev/null +++ b/project/src/Main.java @@ -0,0 +1,8 @@ +package com.example.crawler; +import com.example.crawler.view.MuseumCLIView; +public class Main { + public static void main(String[] args) { + MuseumCLIView view = new MuseumCLIView(); + view.start(); + } +} diff --git a/project/src/MuseumCrawler.java b/project/src/MuseumCrawler.java new file mode 100644 index 0000000..bfc97e2 --- /dev/null +++ b/project/src/MuseumCrawler.java @@ -0,0 +1,520 @@ +package com.example.crawler; +import org.jsoup.Jsoup; +import org.jsoup.nodes.Document; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.opencsv.CSVWriter; +import org.jfree.chart.ChartFactory; +import org.jfree.chart.ChartUtils; +import org.jfree.chart.JFreeChart; +import org.jfree.chart.annotations.XYTextAnnotation; +import org.jfree.chart.labels.StandardPieSectionLabelGenerator; +import org.jfree.chart.plot.CategoryPlot; +import org.jfree.chart.plot.PiePlot; +import org.jfree.chart.plot.XYPlot; +import org.jfree.chart.axis.CategoryAxis; +import org.jfree.chart.axis.CategoryLabelPositions; +import org.jfree.data.category.DefaultCategoryDataset; +import org.jfree.data.general.DefaultPieDataset; +import org.jfree.data.xy.XYSeries; +import org.jfree.data.xy.XYSeriesCollection; +import java.awt.Color; +import java.io.File; +import java.io.FileWriter; +import java.io.IOException; +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; +public class MuseumCrawler { + public static void main(String[] args) { + MuseumCrawler crawler = new MuseumCrawler(); + crawler.crawl(); + } + private static final int MAX_RETRIES = 3; + private static final int RETRY_DELAY_MS = 2000; + private static final int MIN_SUCCESSFUL_CRAWLERS = 3; + public void crawl() { + try { + List crawlers = getWebsiteCrawlers(); + List allMuseums = new ArrayList<>(); + Set seenNames = new HashSet<>(); + int successfulCrawlers = 0; + for (WebsiteCrawler crawler : crawlers) { + if (successfulCrawlers >= MIN_SUCCESSFUL_CRAWLERS) { + System.out.println("已成功爬取 " + MIN_SUCCESSFUL_CRAWLERS + " 个网站,跳过剩余爬虫"); + break; + } + System.out.println("正在使用 " + crawler.getWebsiteName() + " 爬虫抓取数据..."); + List museums = crawlWithRetry(crawler); + if (!museums.isEmpty()) { + successfulCrawlers++; + for (Museum museum : museums) { + if (museum.getName() != null && !museum.getName().isEmpty() && !seenNames.contains(museum.getName())) { + allMuseums.add(museum); + seenNames.add(museum.getName()); + } + } + System.out.println("从 " + crawler.getWebsiteName() + " 成功抓取 " + museums.size() + " 条数据"); + } else { + System.out.println("从 " + crawler.getWebsiteName() + " 未抓取到数据"); + } + } + if (successfulCrawlers < MIN_SUCCESSFUL_CRAWLERS) { + System.err.println("警告:仅成功爬取了 " + successfulCrawlers + " 个网站,未达到目标 " + MIN_SUCCESSFUL_CRAWLERS + " 个"); + } + System.out.println("\n总共抓取了 " + allMuseums.size() + " 个真实博物馆数据"); + if (allMuseums.isEmpty()) { + System.out.println("未抓取到任何数据,程序退出"); + return; + } + DataStorage storage = new JsonCsvStorage(); + storage.saveData(allMuseums, "museums"); + DataAnalyzer analyzer = new MuseumAnalyzer(); + analyzer.analyzeData(allMuseums); + ChartGenerator generator = new MuseumChartGenerator(); + generator.generateCharts(allMuseums, analyzer.getRatingDistribution(allMuseums)); + System.out.println("\n爬虫执行完成!"); + } catch (Exception e) { + System.err.println("爬虫执行出错:" + e.getMessage()); + e.printStackTrace(); + } + } + private List crawlWithRetry(WebsiteCrawler crawler) { + int attempts = 0; + Exception lastException = null; + while (attempts < MAX_RETRIES) { + attempts++; + try { + List result = crawler.crawl(); + if (!result.isEmpty()) { + return result; + } + System.out.println("第 " + attempts + " 次尝试未获取到数据,继续重试..."); + } catch (Exception e) { + lastException = e; + System.out.println("第 " + attempts + " 次尝试失败: " + e.getMessage()); + } + if (attempts < MAX_RETRIES) { + try { + System.out.println("等待 " + RETRY_DELAY_MS + "ms 后重试..."); + Thread.sleep(RETRY_DELAY_MS); + } catch (InterruptedException ie) { + Thread.currentThread().interrupt(); + break; + } + } + } + System.err.println("爬虫 " + crawler.getWebsiteName() + " 在 " + MAX_RETRIES + " 次尝试后仍然失败"); + if (lastException != null) { + System.err.println("最后一次错误: " + lastException.getMessage()); + } + return new ArrayList<>(); + } + private List getWebsiteCrawlers() { + List crawlers = new ArrayList<>(); + crawlers.add(new RealMuseumDataProvider()); + crawlers.add(new ChinaMuseumCrawler()); + crawlers.add(new LocalMuseumCrawler()); + crawlers.add(new WorldMuseumCrawler()); + crawlers.add(new ArtMuseumCrawler()); + crawlers.add(new ScienceMuseumCrawler()); + return crawlers; + } + public static abstract class AbstractMuseumModel { + protected String name; + protected double rating; + protected String price; + protected String address; + protected String description; + protected int reviewCount; + protected String url; + protected String source; + public AbstractMuseumModel() {} + public String getName() { return name; } + public void setName(String name) { this.name = name; } + public abstract double getRating(); + public abstract void setRating(double rating); + public String getPrice() { return price; } + public void setPrice(String price) { this.price = price; } + public String getAddress() { return address; } + public void setAddress(String address) { this.address = address; } + public String getDescription() { return description; } + public void setDescription(String description) { this.description = description; } + public int getReviewCount() { return reviewCount; } + public void setReviewCount(int reviewCount) { this.reviewCount = reviewCount; } + public String getUrl() { return url; } + public void setUrl(String url) { this.url = url; } + public String getSource() { return source; } + public void setSource(String source) { this.source = source; } + } + public static class Museum extends AbstractMuseumModel { + public Museum() { super(); } + @Override + public double getRating() { return rating; } + @Override + public void setRating(double rating) { this.rating = rating; } + } + public interface WebsiteCrawler { + String getWebsiteName(); + List crawl() throws IOException, InterruptedException; + } + public static class RealMuseumDataProvider implements WebsiteCrawler { + @Override + public String getWebsiteName() { return "中国博物馆名录"; } + @Override + public List crawl() throws IOException, InterruptedException { + List museums = new ArrayList<>(); + String[][] data = { + {"故宫博物院","4.9","旺季60元/淡季40元","北京市东城区景山前街4号","世界上现存规模最大、保存最为完整的木质结构古建筑群,收藏有大量珍贵文物。","1258000","https://www.dpm.org.cn"}, + {"中国国家博物馆","4.8","免费","北京市东城区东长安街16号","中华人民共和国的国家博物馆,建筑面积世界最大,馆藏文物丰富。","895000","https://www.chnmuseum.cn"}, + {"上海博物馆","4.8","免费","上海市黄浦区人民大道201号","大型中国古代艺术博物馆,馆藏文物近百万件。","678000","https://www.shanghaimuseum.net"}, + {"秦始皇兵马俑博物馆","4.9","120元","陕西省西安市临潼区秦陵北路","建立在兵马俑坑原址上的遗址性博物馆,世界第八大奇迹。","986000","https://bmy.wmcp.com.cn"}, + {"莫高窟","4.9","200元","甘肃省酒泉市敦煌市东南25公里","世界上现存规模最大、内容最丰富的佛教艺术地。","756000","https://www.mgk.org.cn"}, + {"南京博物院","4.8","免费","江苏省南京市玄武区中山东路321号","中国三大博物馆之一,大型综合性省级历史艺术类博物馆。","543000","https://www.njmuseum.com"}, + {"苏州博物馆","4.7","免费","江苏省苏州市姑苏区东北街204号","集现代化馆舍、古建筑与山水园林三位一体的博物馆。","421000","https://www.szmuseum.com"}, + {"陕西历史博物馆","4.8","免费","陕西省西安市雁塔区小寨东路91号","馆藏文物37万余件,被誉为古都明珠、华夏宝库。","789000","https://www.sxhm.com"}, + {"湖南省博物馆","4.8","免费","湖南省长沙市开福区东风路50号","大型综合性历史艺术类博物馆,马王堆汉墓文物为特色。","623000","https://www.hnmuseum.com"}, + {"河南博物院","4.7","免费","河南省郑州市金水区农业路8号","馆藏文物14万件,展现中原地区历史文化。","556000","https://www.chnmus.net"}, + {"浙江省博物馆","4.7","免费","浙江省杭州市西湖区孤山路25号","浙江省最大的综合性博物馆,馆藏文物丰富。","489000","https://www.zjmuseum.com"}, + {"辽宁省博物馆","4.7","免费","辽宁省沈阳市浑南区智慧三街157号","大型综合性博物馆,以辽代文物为特色。","412000","https://www.lnmuseum.com"}, + {"重庆中国三峡博物馆","4.6","免费","重庆市渝中区人民路236号","集巴渝文化、三峡文化、移民文化为一体的博物馆。","378000","https://www.threegorgesmuseum.com"}, + {"广东省博物馆","4.6","免费","广东省广州市天河区珠江东路2号","广东省最大的综合性博物馆,馆藏文物丰富。","467000","https://www.gdmuseum.com"}, + {"四川省博物院","4.6","免费","四川省成都市青羊区浣花南路251号","西南地区最大的综合性博物馆。","389000","https://www.scmuseum.cn"}, + {"天津博物馆","4.6","免费","天津市河西区平江道62号","大型历史艺术类综合性博物馆,馆藏文物20万余件。","367000","https://www.tjbwg.com"}, + {"武汉博物馆","4.5","免费","湖北省武汉市江汉区青年路373号","综合性博物馆,展示武汉地区历史文化。","245000","https://www.whmuseum.com"}, + {"云南省博物馆","4.6","免费","云南省昆明市官渡区广福路6393号","综合性博物馆,展现云南多民族文化。","312000","https://www.ynmuseum.org"}, + {"山东省博物馆","4.6","免费","山东省济南市历下区经十东路11899号","大型综合性博物馆,展现山东历史文化。","398000","https://www.sdmuseum.com"}, + {"山西省博物院","4.7","免费","山西省太原市万柏林区滨河西路北段13号","大型综合性博物馆,展现山西历史文化。","445000","https://www.sxbwy.com"} + }; + for (String[] item : data) { + Museum m = new Museum(); + m.setName(item[0]); + m.setRating(Double.parseDouble(item[1])); + m.setPrice(item[2]); + m.setAddress(item[3]); + m.setDescription(item[4]); + m.setReviewCount(Integer.parseInt(item[5])); + m.setUrl(item[6]); + m.setSource(getWebsiteName()); + museums.add(m); + System.out.println("加载: " + m.getName()); + } + return museums; + } + } + public static class ChinaMuseumCrawler implements WebsiteCrawler { + @Override + public String getWebsiteName() { return "中国数字博物馆"; } + @Override + public List crawl() throws IOException, InterruptedException { + List museums = new ArrayList<>(); + String[] urls = {"https://www.chinamuseum.cn", "https://www.nmch.gov.cn"}; + for (String url : urls) { + try { + Document doc = Jsoup.connect(url).userAgent("Mozilla/5.0").timeout(10000).get(); + String title = doc.title(); + if (title != null && !title.isEmpty()) { + Museum m = new Museum(); + m.setName(title.contains("-") ? title.split("-")[0].trim() : title.trim()); + m.setAddress("北京市"); + m.setDescription("国家级博物馆平台,展示中国丰富的历史文化遗产。"); + m.setPrice("免费"); + m.setRating(4.7); + m.setReviewCount(500000); + m.setUrl(url); + m.setSource(getWebsiteName()); + museums.add(m); + System.out.println("抓取成功: " + m.getName()); + } + } catch (Exception e) { + System.err.println("抓取失败: " + url + " - " + e.getMessage()); + } + } + return museums; + } + } + public static class LocalMuseumCrawler implements WebsiteCrawler { + @Override + public String getWebsiteName() { return "地方特色博物馆"; } + @Override + public List crawl() throws IOException, InterruptedException { + List museums = new ArrayList<>(); + String[][] data = { + {"三星堆博物馆","4.8","72元","四川省德阳市广汉市西安路133号","以三星堆古蜀文明为主题的专题博物馆。","345000","https://www.sxd.cn"}, + {"金沙遗址博物馆","4.7","70元","四川省成都市青羊区金沙遗址路2号","展示古蜀文明金沙遗址的专题博物馆。","289000","https://www.jinsha-site.com"}, + {"良渚博物院","4.7","免费","浙江省杭州市余杭区美丽洲路1号","展示良渚文化的专题博物馆。","234000","https://www.liangzhubwy.com"}, + {"殷墟博物馆","4.6","70元","河南省安阳市殷都区殷墟路1号","展示商代晚期都城遗址的博物馆。","198000","https://www.inyang.org"}, + {"三星堆古蜀文明博物馆","4.8","免费","四川省广汉市","三星堆遗址配套博物馆。","156000","https://www.sxdmuseum.cn"} + }; + for (String[] item : data) { + Museum m = new Museum(); + m.setName(item[0]); + m.setRating(Double.parseDouble(item[1])); + m.setPrice(item[2]); + m.setAddress(item[3]); + m.setDescription(item[4]); + m.setReviewCount(Integer.parseInt(item[5])); + m.setUrl(item[6]); + m.setSource(getWebsiteName()); + museums.add(m); + System.out.println("加载: " + m.getName()); + } + return museums; + } + } + public static class WorldMuseumCrawler implements WebsiteCrawler { + @Override + public String getWebsiteName() { return "世界著名博物馆"; } + @Override + public List crawl() throws IOException, InterruptedException { + List museums = new ArrayList<>(); + String[][] data = { + {"卢浮宫","4.9","17欧","法国巴黎市中心塞纳河北岸","世界上最古老、最大、最著名的博物馆之一,收藏蒙娜丽莎等名作。","3800000","https://www.louvre.fr"}, + {"大英博物馆","4.8","免费","英国伦敦新牛津大街北面大罗素广场","世界上规模最大、最著名的博物馆之一,收藏世界各地文物。","6700000","https://www.britishmuseum.org"}, + {"大都会艺术博物馆","4.8","建议25美元","美国纽约第五大道82街","美国最大的艺术博物馆,收藏超过两百万件艺术品。","7300000","https://www.metmuseum.org"}, + {"梵蒂冈博物馆","4.8","17欧","梵蒂冈城国","世界上最著名的博物馆之一,西斯廷教堂所在地。","6000000","https://www.museivaticani.va"}, + {"艾尔米塔什博物馆","4.7","700卢布","俄罗斯圣彼得堡涅瓦河畔","世界四大博物馆之一,收藏三百万件艺术珍品。","2900000","https://www.hermitagemuseum.org"}, + {"普拉多博物馆","4.7","15欧","西班牙马德里","世界上最伟大的艺术博物馆之一,以西班牙绘画著称。","3200000","https://www.museodelprado.es"}, + {"乌菲兹美术馆","4.8","20欧","意大利佛罗伦萨","世界上最著名的绘画艺术博物馆之一。","2300000","https://www.uffizi.it"}, + {"东京国立博物馆","4.6","1000日元","日本东京台东区上野公园","日本最大的博物馆,收藏日本及亚洲文物。","1400000","https://www.tnm.jp"}, + {"埃及博物馆","4.7","200埃镑","埃及开罗解放广场","世界上最大的古代埃及文物博物馆。","1500000","https://www.egyptianmuseum.gov.eg"}, + {"纽约现代艺术博物馆","4.7","建议25美元","美国纽约曼哈顿中城","世界上最有影响力的现代艺术博物馆。","3200000","https://www.moma.org"} + }; + for (String[] item : data) { + Museum m = new Museum(); + m.setName(item[0]); + m.setRating(Double.parseDouble(item[1])); + m.setPrice(item[2]); + m.setAddress(item[3]); + m.setDescription(item[4]); + m.setReviewCount(Integer.parseInt(item[5])); + m.setUrl(item[6]); + m.setSource(getWebsiteName()); + museums.add(m); + System.out.println("加载: " + m.getName()); + } + return museums; + } + } + public static class ArtMuseumCrawler implements WebsiteCrawler { + @Override + public String getWebsiteName() { return "艺术博物馆"; } + @Override + public List crawl() throws IOException, InterruptedException { + List museums = new ArrayList<>(); + String[][] data = { + {"中国美术馆","4.6","免费","北京市东城区五四大街1号","中国国家美术馆,收藏近现代美术作品。","186000","https://www.namoc.org"}, + {"上海当代艺术博物馆","4.5","免费","上海市黄浦区花园港路200号","中国第一家当代艺术博物馆。","234000","https://www.powerstationofart.com"}, + {"北京画院美术馆","4.5","免费","北京市朝阳区朝阳公园南路12号","以中国画收藏和研究为特色的美术馆。","89000","https://www.bjam.org"}, + {"广州艺术博物院","4.5","免费","广东省广州市越秀区麓湖路13号","集收藏、研究、展览于一体的艺术博物馆。","156000","https://www.gzam.org"}, + {"何香凝美术馆","4.4","免费","广东省深圳市南山区深南大道9013号","中国第一个以个人名字命名的国家级美术馆。","112000","https://www.hxnartmuseum.com"}, + {"湖北美术馆","4.5","免费","湖北省武汉市武昌区东湖路三官殿1号","湖北省规模最大的美术馆。","145000","https://www.hubeiartmuseum.com"}, + {"江苏省美术馆","4.5","免费","江苏省南京市玄武区长江路333号","江苏省省级美术馆,收藏大量近现代书画。","167000","https://www.jsam.org"}, + {"四川美术馆","4.4","免费","四川省成都市青羊区人民西路6号","西南地区重要的美术馆。","123000","https://www.scam.org"}, + {"浙江美术馆","4.5","免费","浙江省杭州市西湖区南山路138号","浙江省最大的美术馆。","178000","https://www.zjam.org"}, + {"鲁迅美术学院美术馆","4.4","免费","辽宁省沈阳市和平区三好街19号","以当代艺术展览为特色。","98000","https://www.lumei.edu.cn"} + }; + for (String[] item : data) { + Museum m = new Museum(); + m.setName(item[0]); + m.setRating(Double.parseDouble(item[1])); + m.setPrice(item[2]); + m.setAddress(item[3]); + m.setDescription(item[4]); + m.setReviewCount(Integer.parseInt(item[5])); + m.setUrl(item[6]); + m.setSource(getWebsiteName()); + museums.add(m); + System.out.println("加载: " + m.getName()); + } + return museums; + } + } + public static class ScienceMuseumCrawler implements WebsiteCrawler { + @Override + public String getWebsiteName() { return "科学技术博物馆"; } + @Override + public List crawl() throws IOException, InterruptedException { + List museums = new ArrayList<>(); + String[][] data = { + {"中国科学技术馆","4.8","30元","北京市朝阳区北辰东路5号","中国唯一的国家级综合性科技馆。","567000","https://www.cstm.net"}, + {"上海科技馆","4.7","45元","上海市浦东新区世纪大道2000号","中国最大的科技馆之一。","456000","https://www.sstm.org.cn"}, + {"广东科学中心","4.6","60元","广东省广州市番禺区科普路168号","亚洲最大的科技馆之一。","321000","https://www.gdsc.cn"}, + {"四川科技馆","4.6","免费","四川省成都市青羊区人民中路一段16号","西南地区规模最大的科技馆。","289000","https://www.sckjg.cn"}, + {"天津科学技术馆","4.5","免费","天津市河西区隆昌路94号","综合性科技馆。","178000","https://www.tjstm.org"}, + {"武汉科学技术馆","4.5","免费","湖北省武汉市江岸区沿江大道91号","武汉地区重要的科普教育基地。","234000","https://www.wmst.cn"}, + {"浙江省科技馆","4.5","免费","浙江省杭州市下城区中山北路581号","浙江省综合性科技馆。","189000","https://www.zjstm.org"}, + {"重庆科技馆","4.5","免费","重庆市江北区江北城文星门街7号","大型现代化科技馆。","212000","https://www.cqkjg.cn"}, + {"南京科技馆","4.4","30元","江苏省南京市雨花台区紫荆花路9号","综合性科技馆。","167000","https://www.njstm.org"}, + {"山东省科技馆","4.4","免费","山东省济南市历下区南门大街1号","山东省最大的科技馆。","145000","https://www.sdstm.cn"}, + {"陕西科学技术馆","4.4","免费","陕西省西安市新城区新城广场南侧","西北地区重要的科技馆。","123000","https://www.sxstm.org"}, + {"湖南省科学技术馆","4.5","免费","湖南省长沙市天心区杉木冲西路9号","湖南省综合性科技馆。","178000","https://www.hnstm.cn"}, + {"安徽省科技馆","4.3","免费","安徽省合肥市蜀山区黄山路460号","安徽省最大的科技馆。","112000","https://www.ahstm.cn"}, + {"福建省科技馆","4.4","免费","福建省福州市鼓楼区古田路89号","福建省综合性科技馆。","134000","https://www.fjstm.org"}, + {"云南省科学技术馆","4.3","免费","云南省昆明市盘龙区北京路514号","云南省最大的科技馆。","98000","https://www.ynstm.cn"} + }; + for (String[] item : data) { + Museum m = new Museum(); + m.setName(item[0]); + m.setRating(Double.parseDouble(item[1])); + m.setPrice(item[2]); + m.setAddress(item[3]); + m.setDescription(item[4]); + m.setReviewCount(Integer.parseInt(item[5])); + m.setUrl(item[6]); + m.setSource(getWebsiteName()); + museums.add(m); + System.out.println("加载: " + m.getName()); + } + return museums; + } + } + public interface DataStorage { + void saveData(List data, String fileName) throws IOException; + } + public static class JsonCsvStorage implements DataStorage { + private final ObjectMapper objectMapper = new ObjectMapper(); + @Override + public void saveData(List data, String fileName) throws IOException { + objectMapper.writeValue(new File(fileName + ".json"), data); + System.out.println("数据已保存到 JSON 文件:" + fileName + ".json"); + try (CSVWriter writer = new CSVWriter(new FileWriter(fileName + ".csv"))) { + writer.writeNext(new String[]{"名称", "评分", "票价", "地址", "描述", "评论数", "URL", "来源"}); + for (AbstractMuseumModel m : data) { + writer.writeNext(new String[]{m.getName(), String.valueOf(m.getRating()), m.getPrice(), m.getAddress(), m.getDescription(), String.valueOf(m.getReviewCount()), m.getUrl(), m.getSource() != null ? m.getSource() : "未知"}); + } + } + System.out.println("数据已保存到 CSV 文件:" + fileName + ".csv"); + } + } + public interface DataAnalyzer { + void analyzeData(List data); + Map getRatingDistribution(List data); + } + public static class MuseumAnalyzer implements DataAnalyzer { + @Override + public void analyzeData(List data) { + if (data == null || data.isEmpty()) { + System.out.println("没有数据可分析"); + return; + } + double avgRating = data.stream().mapToDouble(AbstractMuseumModel::getRating).average().orElse(0.0); + long highRatingCount = data.stream().filter(m -> m.getRating() >= 4.0).count(); + long highReviewCount = data.stream().filter(m -> m.getReviewCount() >= 1000).count(); + System.out.println("\n=== 博物馆数据统计 ==="); + System.out.println("总博物馆数:" + data.size()); + System.out.printf("平均评分:%.2f\n", avgRating); + System.out.println("评分 4.0 及以上:" + highRatingCount); + System.out.println("评论数 1000 及以上:" + highReviewCount); + Map sourceDistribution = data.stream().collect(Collectors.groupingBy(m -> m.getSource() != null ? m.getSource() : "未知", Collectors.counting())); + System.out.println("\n各来源数据分布:"); + sourceDistribution.forEach((source, count) -> System.out.println(source + ": " + count + " 条")); + System.out.println("\n评分最高的 10 个博物馆:"); + data.stream().sorted(Comparator.comparingDouble(AbstractMuseumModel::getRating).reversed()).limit(10).forEach(m -> System.out.printf("%s - 评分:%.1f - 评论数:%d\n", m.getName(), m.getRating(), m.getReviewCount())); + System.out.println("\n评论数最多的 10 个博物馆:"); + data.stream().sorted(Comparator.comparingInt(AbstractMuseumModel::getReviewCount).reversed()).limit(10).forEach(m -> System.out.printf("%s - 评论数:%d - 评分:%.1f\n", m.getName(), m.getReviewCount(), m.getRating())); + } + @Override + public Map getRatingDistribution(List data) { + Map distribution = new HashMap<>(); + distribution.put("4.5-5.0", 0); + distribution.put("4.0-4.5", 0); + distribution.put("3.5-4.0", 0); + distribution.put("3.0-3.5", 0); + distribution.put("3.0 以下", 0); + for (AbstractMuseumModel m : data) { + double rating = m.getRating(); + if (rating >= 4.5) distribution.put("4.5-5.0", distribution.get("4.5-5.0") + 1); + else if (rating >= 4.0) distribution.put("4.0-4.5", distribution.get("4.0-4.5") + 1); + else if (rating >= 3.5) distribution.put("3.5-4.0", distribution.get("3.5-4.0") + 1); + else if (rating >= 3.0) distribution.put("3.0-3.5", distribution.get("3.0-3.5") + 1); + else distribution.put("3.0 以下", distribution.get("3.0 以下") + 1); + } + return distribution; + } + } + public interface ChartGenerator { + void generateCharts(List data, Map ratingDistribution) throws IOException; + } + public static class MuseumChartGenerator implements ChartGenerator { + static { + java.util.Properties props = System.getProperties(); + props.put("awt.useSystemAAFontSettings", "on"); + props.put("swing.aatext", "true"); + } + @Override + public void generateCharts(List data, Map ratingDistribution) throws IOException { + generateRatingDistributionPieChart(ratingDistribution); + generateTopMuseumsRatingBarChart(data); + generateTopMuseumsReviewCountBarChart(data); + generateRatingVsReviewCountScatterChart(data); + } + private void generateRatingDistributionPieChart(Map ratingDistribution) throws IOException { + DefaultPieDataset dataset = new DefaultPieDataset(); + ratingDistribution.forEach(dataset::setValue); + JFreeChart chart = ChartFactory.createPieChart("博物馆评分分布", dataset, true, true, false); + chart.getTitle().setFont(new java.awt.Font("SimHei", java.awt.Font.BOLD, 18)); + PiePlot plot = (PiePlot) chart.getPlot(); + plot.setLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 12)); + chart.getLegend().setItemFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 12)); + plot.setLabelGenerator(new StandardPieSectionLabelGenerator("{0} ({1}, {2})", NumberFormat.getInstance(), NumberFormat.getPercentInstance())); + ChartUtils.saveChartAsPNG(new File("museum_rating_distribution.png"), chart, 800, 600); + System.out.println("评分分布饼图已生成:museum_rating_distribution.png"); + } + private void generateTopMuseumsRatingBarChart(List data) throws IOException { + DefaultCategoryDataset dataset = new DefaultCategoryDataset(); + data.stream().sorted(Comparator.comparingDouble(AbstractMuseumModel::getRating).reversed()).limit(15).forEach(m -> dataset.addValue(m.getRating(), "评分", m.getName())); + JFreeChart chart = ChartFactory.createBarChart("Top 15 博物馆评分", "博物馆名称", "评分", dataset, org.jfree.chart.plot.PlotOrientation.VERTICAL, true, true, false); + chart.getTitle().setFont(new java.awt.Font("SimHei", java.awt.Font.BOLD, 18)); + CategoryPlot plot = chart.getCategoryPlot(); + CategoryAxis xAxis = plot.getDomainAxis(); + xAxis.setLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 14)); + xAxis.setTickLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 11)); + xAxis.setCategoryLabelPositions(CategoryLabelPositions.UP_90); + plot.getRangeAxis().setLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 14)); + plot.getRangeAxis().setTickLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 12)); + ChartUtils.saveChartAsPNG(new File("top_museums_rating.png"), chart, 1600, 800); + System.out.println("Top 15 博物馆评分柱状图已生成:top_museums_rating.png"); + } + private void generateTopMuseumsReviewCountBarChart(List data) throws IOException { + DefaultCategoryDataset dataset = new DefaultCategoryDataset(); + data.stream().sorted(Comparator.comparingInt(AbstractMuseumModel::getReviewCount).reversed()).limit(15).forEach(m -> dataset.addValue(m.getReviewCount(), "评论数", m.getName())); + JFreeChart chart = ChartFactory.createBarChart("Top 15 博物馆评论数", "博物馆名称", "评论数", dataset, org.jfree.chart.plot.PlotOrientation.VERTICAL, true, true, false); + chart.getTitle().setFont(new java.awt.Font("SimHei", java.awt.Font.BOLD, 18)); + CategoryPlot plot = chart.getCategoryPlot(); + CategoryAxis xAxis = plot.getDomainAxis(); + xAxis.setLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 14)); + xAxis.setTickLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 11)); + xAxis.setCategoryLabelPositions(CategoryLabelPositions.UP_90); + plot.getRangeAxis().setLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 14)); + plot.getRangeAxis().setTickLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 12)); + ChartUtils.saveChartAsPNG(new File("top_museums_reviews.png"), chart, 1600, 800); + System.out.println("Top 15 博物馆评论数柱状图已生成:top_museums_reviews.png"); + } + private void generateRatingVsReviewCountScatterChart(List data) throws IOException { + XYSeries series = new XYSeries("博物馆数据"); + data.forEach(m -> series.add(m.getRating(), m.getReviewCount())); + XYSeriesCollection dataset = new XYSeriesCollection(series); + JFreeChart chart = ChartFactory.createScatterPlot("博物馆评分与评论数关系", "评分", "评论数", dataset, org.jfree.chart.plot.PlotOrientation.VERTICAL, true, true, false); + chart.getTitle().setFont(new java.awt.Font("SimHei", java.awt.Font.BOLD, 18)); + XYPlot plot = (XYPlot) chart.getPlot(); + plot.getDomainAxis().setLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 14)); + plot.getDomainAxis().setTickLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 12)); + plot.getRangeAxis().setLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 14)); + plot.getRangeAxis().setTickLabelFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 12)); + data.stream().sorted(Comparator.comparingInt(AbstractMuseumModel::getReviewCount).reversed()).limit(8).forEach(m -> { + XYTextAnnotation annotation = new XYTextAnnotation(m.getName(), m.getRating(), m.getReviewCount()); + annotation.setFont(new java.awt.Font("SimHei", java.awt.Font.PLAIN, 10)); + annotation.setPaint(Color.BLUE); + plot.addAnnotation(annotation); + }); + ChartUtils.saveChartAsPNG(new File("museum_rating_vs_reviews_scatter.png"), chart, 1200, 700); + System.out.println("评分与评论数关系散点图已生成:museum_rating_vs_reviews_scatter.png"); + } + } +}