You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
67 lines
2.8 KiB
67 lines
2.8 KiB
package com.project;
|
|
|
|
import com.project.analyzer.PostAnalyzer;
|
|
import com.project.chart.SimpleChartGenerator;
|
|
import com.project.model.PostInfo;
|
|
import com.project.reader.ExcelReader;
|
|
import com.project.report.HTMLReportGenerator;
|
|
import com.project.storage.DataStorage;
|
|
import com.project.util.DataCleaner;
|
|
|
|
import java.util.List;
|
|
import java.util.Scanner;
|
|
|
|
public class Main {
|
|
|
|
public static void main(String[] args) {
|
|
System.out.println("========================================");
|
|
System.out.println(" Java网络爬虫与数据分析系统");
|
|
System.out.println("========================================\n");
|
|
|
|
String dataFilePath = "D:\\计量经济学\\计量实验资料及作业要求\\计量实验资料及作业要求\\图文帖子原始信息计量实验使用.xlsx";
|
|
String outputDir = "d:\\java\\project\\data";
|
|
int maxRows = 300;
|
|
|
|
try {
|
|
System.out.println("开始读取本地数据文件...");
|
|
System.out.println("数据文件: " + dataFilePath);
|
|
System.out.println("读取前 " + maxRows + " 条数据");
|
|
|
|
List<PostInfo> rawPosts = ExcelReader.readExcelData(dataFilePath, maxRows);
|
|
|
|
if (rawPosts.isEmpty()) {
|
|
System.out.println("未获取到任何数据,程序退出");
|
|
return;
|
|
}
|
|
|
|
System.out.println("\n开始数据清洗...");
|
|
List<PostInfo> cleanedPosts = DataCleaner.cleanPosts(rawPosts);
|
|
|
|
System.out.println("\n保存数据到文件...");
|
|
DataStorage.saveToCSV(cleanedPosts, outputDir);
|
|
DataStorage.saveToJSON(cleanedPosts, outputDir);
|
|
|
|
System.out.println("\n开始数据分析...");
|
|
PostAnalyzer analyzer = new PostAnalyzer(cleanedPosts);
|
|
analyzer.analyzeAll();
|
|
|
|
System.out.println("\n生成图表...");
|
|
SimpleChartGenerator.generateAllCharts(analyzer);
|
|
|
|
System.out.println("\n生成HTML报告...");
|
|
HTMLReportGenerator.generateReport(analyzer);
|
|
|
|
System.out.println("\n========================================");
|
|
System.out.println(" 程序执行完成!");
|
|
System.out.println("========================================");
|
|
System.out.println("\n输出文件位置:");
|
|
System.out.println("- 数据文件: " + outputDir);
|
|
System.out.println("- 图表文件: d:\\java\\project\\charts");
|
|
System.out.println("- 报告文件: d:\\java\\project\\reports");
|
|
|
|
} catch (Exception e) {
|
|
System.err.println("程序执行出错: " + e.getMessage());
|
|
e.printStackTrace();
|
|
}
|
|
}
|
|
}
|
|
|