package com.project; import com.project.analyzer.PostAnalyzer; import com.project.chart.SimpleChartGenerator; import com.project.model.PostInfo; import com.project.reader.ExcelReader; import com.project.report.HTMLReportGenerator; import com.project.storage.DataStorage; import com.project.util.DataCleaner; import java.util.List; import java.util.Scanner; public class Main { public static void main(String[] args) { System.out.println("========================================"); System.out.println(" Java网络爬虫与数据分析系统"); System.out.println("========================================\n"); String dataFilePath = "D:\\计量经济学\\计量实验资料及作业要求\\计量实验资料及作业要求\\图文帖子原始信息计量实验使用.xlsx"; String outputDir = "d:\\java\\project\\data"; int maxRows = 300; try { System.out.println("开始读取本地数据文件..."); System.out.println("数据文件: " + dataFilePath); System.out.println("读取前 " + maxRows + " 条数据"); List rawPosts = ExcelReader.readExcelData(dataFilePath, maxRows); if (rawPosts.isEmpty()) { System.out.println("未获取到任何数据,程序退出"); return; } System.out.println("\n开始数据清洗..."); List cleanedPosts = DataCleaner.cleanPosts(rawPosts); System.out.println("\n保存数据到文件..."); DataStorage.saveToCSV(cleanedPosts, outputDir); DataStorage.saveToJSON(cleanedPosts, outputDir); System.out.println("\n开始数据分析..."); PostAnalyzer analyzer = new PostAnalyzer(cleanedPosts); analyzer.analyzeAll(); System.out.println("\n生成图表..."); SimpleChartGenerator.generateAllCharts(analyzer); System.out.println("\n生成HTML报告..."); HTMLReportGenerator.generateReport(analyzer); System.out.println("\n========================================"); System.out.println(" 程序执行完成!"); System.out.println("========================================"); System.out.println("\n输出文件位置:"); System.out.println("- 数据文件: " + outputDir); System.out.println("- 图表文件: d:\\java\\project\\charts"); System.out.println("- 报告文件: d:\\java\\project\\reports"); } catch (Exception e) { System.err.println("程序执行出错: " + e.getMessage()); e.printStackTrace(); } } }