diff --git a/project/DataCleaner.java b/project/DataCleaner.java new file mode 100644 index 0000000..dd9c5fa --- /dev/null +++ b/project/DataCleaner.java @@ -0,0 +1,103 @@ +package com.project.util; + +import com.project.model.PostInfo; + +import java.util.ArrayList; +import java.util.List; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class DataCleaner { + + public static List cleanPosts(List rawPosts) { + List cleanedPosts = new ArrayList<>(); + + for (PostInfo post : rawPosts) { + PostInfo cleaned = cleanPost(post); + if (isValidPost(cleaned)) { + cleanedPosts.add(cleaned); + } + } + + System.out.println("数据清洗完成,有效数据: " + cleanedPosts.size() + " 条"); + return cleanedPosts; + } + + private static PostInfo cleanPost(PostInfo post) { + PostInfo cleaned = new PostInfo(); + + cleaned.setTitle(cleanText(post.getTitle())); + cleaned.setContent(cleanContent(post.getContent())); + cleaned.setAuthor(cleanText(post.getAuthor())); + cleaned.setPostDate(post.getPostDate()); + cleaned.setLikeCount(post.getLikeCount()); + cleaned.setCommentCount(post.getCommentCount()); + cleaned.setViewCount(post.getViewCount()); + cleaned.setTags(cleanText(post.getTags())); + cleaned.setSentiment(normalizeSentiment(post.getSentiment())); + + return cleaned; + } + + private static String cleanText(String text) { + if (text == null) { + return ""; + } + return text.trim().replaceAll("\\s+", " "); + } + + private static String cleanContent(String content) { + if (content == null) { + return ""; + } + return content.trim() + .replaceAll("\\s+", " ") + .replaceAll("[\\r\\n]+", " ") + .replaceAll("<[^>]+>", "") + .replaceAll("\\[.*?\\]", "") + .replaceAll("\\(.*?\\)", ""); + } + + private static String normalizeSentiment(String sentiment) { + if (sentiment == null || sentiment.isEmpty()) { + return "中性"; + } + + String lower = sentiment.toLowerCase(); + if (lower.contains("积极") || lower.contains("正面") || lower.contains("positive")) { + return "积极"; + } else if (lower.contains("消极") || lower.contains("负面") || lower.contains("negative")) { + return "消极"; + } else { + return "中性"; + } + } + + private static boolean isValidPost(PostInfo post) { + return post.getTitle() != null && !post.getTitle().isEmpty() && + post.getContent() != null && !post.getContent().isEmpty(); + } + + public static String[] extractKeywords(String content) { + if (content == null || content.isEmpty()) { + return new String[0]; + } + + String[] commonKeywords = { + "数据", "分析", "学习", "技术", "互联网", "发展", "趋势", + "工具", "方法", "实践", "经验", "案例", "应用", "创新", + "挑战", "机遇", "未来", "智能", "算法", "模型", "平台" + }; + + List keywords = new ArrayList<>(); + String lowerContent = content.toLowerCase(); + + for (String keyword : commonKeywords) { + if (lowerContent.contains(keyword.toLowerCase())) { + keywords.add(keyword); + } + } + + return keywords.toArray(new String[0]); + } +} diff --git a/project/DataStorage.java b/project/DataStorage.java new file mode 100644 index 0000000..4089bee --- /dev/null +++ b/project/DataStorage.java @@ -0,0 +1,125 @@ +package com.project.storage; + +import com.project.model.PostInfo; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.List; + +public class DataStorage { + + public static void saveToCSV(List posts, String directory) { + if (posts == null || posts.isEmpty()) { + System.out.println("没有数据需要保存"); + return; + } + + try { + java.nio.file.Path dirPath = Paths.get(directory); + if (!Files.exists(dirPath)) { + Files.createDirectories(dirPath); + } + + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); + String filename = "posts_" + timestamp + ".csv"; + java.nio.file.Path filePath = dirPath.resolve(filename); + + try (BufferedWriter writer = new BufferedWriter( + new FileWriter(filePath.toFile(), StandardCharsets.UTF_8))) { + + writer.write("\uFEFF"); + writer.write("标题,内容,作者,发布日期,点赞数,评论数,浏览量,标签,情感倾向\n"); + + for (PostInfo post : posts) { + writer.write(post.toCSV()); + writer.write("\n"); + } + } + + System.out.println("数据已保存到: " + filePath.toAbsolutePath()); + + } catch (IOException e) { + System.err.println("保存CSV文件时出错: " + e.getMessage()); + } + } + + public static void saveToJSON(List posts, String directory) { + if (posts == null || posts.isEmpty()) { + System.out.println("没有数据需要保存"); + return; + } + + try { + java.nio.file.Path dirPath = Paths.get(directory); + if (!Files.exists(dirPath)) { + Files.createDirectories(dirPath); + } + + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); + String filename = "posts_" + timestamp + ".json"; + java.nio.file.Path filePath = dirPath.resolve(filename); + + try (BufferedWriter writer = new BufferedWriter( + new FileWriter(filePath.toFile(), StandardCharsets.UTF_8))) { + + writer.write("[\n"); + for (int i = 0; i < posts.size(); i++) { + writer.write(postToJSON(posts.get(i))); + if (i < posts.size() - 1) { + writer.write(",\n"); + } else { + writer.write("\n"); + } + } + writer.write("]\n"); + } + + System.out.println("数据已保存到: " + filePath.toAbsolutePath()); + + } catch (IOException e) { + System.err.println("保存JSON文件时出错: " + e.getMessage()); + } + } + + private static String postToJSON(PostInfo post) { + return String.format( + " {\n" + + " \"title\": \"%s\",\n" + + " \"content\": \"%s\",\n" + + " \"author\": \"%s\",\n" + + " \"postDate\": \"%s\",\n" + + " \"likeCount\": %d,\n" + + " \"commentCount\": %d,\n" + + " \"viewCount\": %d,\n" + + " \"tags\": \"%s\",\n" + + " \"sentiment\": \"%s\"\n" + + " }", + escapeJSON(post.getTitle()), + escapeJSON(post.getContent()), + escapeJSON(post.getAuthor()), + post.getPostDate() != null ? post.getPostDate().toString() : "", + post.getLikeCount(), + post.getCommentCount(), + post.getViewCount(), + escapeJSON(post.getTags()), + escapeJSON(post.getSentiment()) + ); + } + + private static String escapeJSON(String text) { + if (text == null) { + return ""; + } + return text.replace("\\", "\\\\") + .replace("\"", "\\\"") + .replace("\n", "\\n") + .replace("\r", "\\r") + .replace("\t", "\\t"); + } +} diff --git a/project/ExcelReader.java b/project/ExcelReader.java new file mode 100644 index 0000000..66e23ad --- /dev/null +++ b/project/ExcelReader.java @@ -0,0 +1,106 @@ +package com.project.reader; + +import com.project.model.PostInfo; + +import java.io.*; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.List; +import java.util.Locale; + +public class ExcelReader { + + private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ofPattern("yyyy-MM-dd", Locale.CHINA); + + public static List readExcelData(String filePath, int maxRows) { + List posts = new ArrayList<>(); + + try (BufferedReader reader = new BufferedReader(new FileReader(filePath, java.nio.charset.StandardCharsets.UTF_8))) { + + String line; + boolean isFirstLine = true; + int rowCount = 0; + + while ((line = reader.readLine()) != null && rowCount < maxRows) { + if (isFirstLine) { + isFirstLine = false; + continue; + } + + String[] parts = parseCSVLine(line); + if (parts.length >= 9) { + PostInfo post = parsePostInfo(parts); + if (post != null) { + posts.add(post); + rowCount++; + } + } + } + + System.out.println("成功读取 " + posts.size() + " 条数据"); + + } catch (IOException e) { + System.err.println("读取文件时出错: " + e.getMessage()); + } + + return posts; + } + + private static String[] parseCSVLine(String line) { + List fields = new ArrayList<>(); + StringBuilder currentField = new StringBuilder(); + boolean inQuotes = false; + + for (char c : line.toCharArray()) { + if (c == '"') { + inQuotes = !inQuotes; + } else if (c == ',' && !inQuotes) { + fields.add(currentField.toString().trim()); + currentField.setLength(0); + } else { + currentField.append(c); + } + } + + fields.add(currentField.toString().trim()); + return fields.toArray(new String[0]); + } + + private static PostInfo parsePostInfo(String[] parts) { + try { + PostInfo post = new PostInfo(); + + post.setTitle(parts[0]); + post.setContent(parts[1]); + post.setAuthor(parts[2]); + + if (!parts[3].isEmpty()) { + post.setPostDate(LocalDate.parse(parts[3], DATE_FORMATTER)); + } + + post.setLikeCount(parseInt(parts[4])); + post.setCommentCount(parseInt(parts[5])); + post.setViewCount(parseInt(parts[6])); + + post.setTags(parts[7]); + post.setSentiment(parts[8]); + + return post; + } catch (Exception e) { + System.err.println("解析数据时出错: " + e.getMessage()); + return null; + } + } + + private static int parseInt(String value) { + try { + if (value == null || value.isEmpty()) { + return 0; + } + return Integer.parseInt(value); + } catch (NumberFormatException e) { + return 0; + } + } +} diff --git a/project/HTMLReportGenerator.java b/project/HTMLReportGenerator.java new file mode 100644 index 0000000..7a6855e --- /dev/null +++ b/project/HTMLReportGenerator.java @@ -0,0 +1,214 @@ +package com.project.report; + +import com.project.analyzer.PostAnalyzer; +import com.project.model.PostInfo; + +import java.io.BufferedWriter; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Map; + +public class HTMLReportGenerator { + + private static final String OUTPUT_DIR = "d:\\java\\project\\reports"; + + public static void generateReport(PostAnalyzer analyzer) { + try { + Files.createDirectories(Paths.get(OUTPUT_DIR)); + + String timestamp = LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyyMMdd_HHmmss")); + String filename = "report_" + timestamp + ".html"; + String filepath = OUTPUT_DIR + "/" + filename; + + try (BufferedWriter writer = new BufferedWriter( + new FileWriter(filepath, StandardCharsets.UTF_8))) { + + writer.write(generateHTMLContent(analyzer)); + } + + System.out.println("HTML报告已生成: " + filepath); + + } catch (IOException e) { + System.err.println("生成HTML报告时出错: " + e.getMessage()); + } + } + + private static String generateHTMLContent(PostAnalyzer analyzer) { + StringBuilder html = new StringBuilder(); + + html.append("\n"); + html.append("\n"); + html.append("\n"); + html.append(" \n"); + html.append(" \n"); + html.append(" 图文帖子数据分析报告\n"); + html.append(" \n"); + html.append("\n"); + html.append("\n"); + html.append("
\n"); + html.append("

图文帖子数据分析报告

\n"); + html.append("

生成时间: ").append(LocalDateTime.now().format(DateTimeFormatter.ofPattern("yyyy-MM-dd HH:mm:ss"))).append("

\n"); + + html.append(generateSummarySection(analyzer)); + html.append(generateSentimentSection(analyzer)); + html.append(generateEngagementSection(analyzer)); + html.append(generateAuthorSection(analyzer)); + html.append(generateChartsSection()); + + html.append("
\n"); + html.append("\n"); + html.append(""); + + return html.toString(); + } + + private static String generateSummarySection(PostAnalyzer analyzer) { + StringBuilder section = new StringBuilder(); + + int totalPosts = analyzer.getPosts().size(); + double avgLikes = analyzer.getPosts().stream() + .mapToInt(PostInfo::getLikeCount) + .average() + .orElse(0); + + section.append("
\n"); + section.append("
\n"); + section.append("

").append(totalPosts).append("

\n"); + section.append("

帖子总数

\n"); + section.append("
\n"); + section.append("
\n"); + section.append("

").append(String.format("%.1f", avgLikes)).append("

\n"); + section.append("

平均点赞

\n"); + section.append("
\n"); + section.append("
\n"); + + section.append("
\n"); + section.append("

分析摘要

\n"); + section.append("
    \n"); + section.append("
  • 本次分析共收集 ").append(totalPosts).append(" 条图文帖子数据
  • \n"); + section.append("
  • 数据来源:D:\计量经济学\计量实验资料及作业要求\计量实验资料及作业要求\图文帖子原始信息计量实验使用
  • \n"); + section.append("
  • 分析内容包括情感倾向分布、互动指标、热门作者等多个维度
  • \n"); + section.append("
  • 通过数据可视化展示分析结果,便于直观理解
  • \n"); + section.append("
\n"); + section.append("
\n"); + + return section.toString(); + } + + private static String generateSentimentSection(PostAnalyzer analyzer) { + StringBuilder section = new StringBuilder(); + Map sentimentData = analyzer.getSentimentDistributionData(); + + section.append("
\n"); + section.append("

情感倾向分布分析

\n"); + section.append(" \n"); + section.append(" \n"); + + long total = sentimentData.values().stream().mapToLong(Long::longValue).sum(); + + for (Map.Entry entry : sentimentData.entrySet()) { + double percent = (entry.getValue() * 100.0) / total; + section.append(" \n"); + } + + section.append("
情感倾向帖子数量占比
").append(entry.getKey()) + .append("").append(entry.getValue()) + .append("").append(String.format("%.1f%%", percent)) + .append("
\n"); + section.append("
\n"); + + return section.toString(); + } + + private static String generateEngagementSection(PostAnalyzer analyzer) { + StringBuilder section = new StringBuilder(); + Map engagementData = analyzer.getEngagementData(); + + section.append("
\n"); + section.append("

互动指标分析

\n"); + section.append(" \n"); + section.append(" \n"); + + for (Map.Entry entry : engagementData.entrySet()) { + section.append(" \n"); + } + + section.append("
指标平均值
").append(entry.getKey()) + .append("").append(String.format("%.1f", entry.getValue())) + .append("
\n"); + section.append("
\n"); + + return section.toString(); + } + + private static String generateAuthorSection(PostAnalyzer analyzer) { + StringBuilder section = new StringBuilder(); + Map authorData = analyzer.getAuthorPostCount(); + + section.append("
\n"); + section.append("

热门作者排行TOP10

\n"); + section.append(" \n"); + section.append(" \n"); + + int rank = 1; + for (Map.Entry entry : authorData.entrySet()) { + section.append(" \n"); + } + + section.append("
排名作者帖子数量
").append(rank++) + .append("").append(entry.getKey()) + .append("").append(entry.getValue()) + .append("
\n"); + section.append("
\n"); + + return section.toString(); + } + + private static String generateChartsSection() { + StringBuilder section = new StringBuilder(); + + section.append("
\n"); + section.append("

数据可视化图表

\n"); + section.append("
\n"); + section.append("

情感倾向分布

\n"); + section.append(" \"情感倾向分布图\"\n"); + section.append("
\n"); + section.append("
\n"); + section.append("

互动指标分析

\n"); + section.append(" \"互动指标图\"\n"); + section.append("
\n"); + section.append("
\n"); + section.append("

热门作者排行

\n"); + section.append(" \"作者排行图\"\n"); + section.append("
\n"); + section.append("
\n"); + + return section.toString(); + } +} diff --git a/project/Main.java b/project/Main.java new file mode 100644 index 0000000..148520e --- /dev/null +++ b/project/Main.java @@ -0,0 +1,67 @@ +package com.project; + +import com.project.analyzer.PostAnalyzer; +import com.project.chart.SimpleChartGenerator; +import com.project.model.PostInfo; +import com.project.reader.ExcelReader; +import com.project.report.HTMLReportGenerator; +import com.project.storage.DataStorage; +import com.project.util.DataCleaner; + +import java.util.List; +import java.util.Scanner; + +public class Main { + + public static void main(String[] args) { + System.out.println("========================================"); + System.out.println(" Java网络爬虫与数据分析系统"); + System.out.println("========================================\n"); + + String dataFilePath = "D:\\计量经济学\\计量实验资料及作业要求\\计量实验资料及作业要求\\图文帖子原始信息计量实验使用.xlsx"; + String outputDir = "d:\\java\\project\\data"; + int maxRows = 300; + + try { + System.out.println("开始读取本地数据文件..."); + System.out.println("数据文件: " + dataFilePath); + System.out.println("读取前 " + maxRows + " 条数据"); + + List rawPosts = ExcelReader.readExcelData(dataFilePath, maxRows); + + if (rawPosts.isEmpty()) { + System.out.println("未获取到任何数据,程序退出"); + return; + } + + System.out.println("\n开始数据清洗..."); + List cleanedPosts = DataCleaner.cleanPosts(rawPosts); + + System.out.println("\n保存数据到文件..."); + DataStorage.saveToCSV(cleanedPosts, outputDir); + DataStorage.saveToJSON(cleanedPosts, outputDir); + + System.out.println("\n开始数据分析..."); + PostAnalyzer analyzer = new PostAnalyzer(cleanedPosts); + analyzer.analyzeAll(); + + System.out.println("\n生成图表..."); + SimpleChartGenerator.generateAllCharts(analyzer); + + System.out.println("\n生成HTML报告..."); + HTMLReportGenerator.generateReport(analyzer); + + System.out.println("\n========================================"); + System.out.println(" 程序执行完成!"); + System.out.println("========================================"); + System.out.println("\n输出文件位置:"); + System.out.println("- 数据文件: " + outputDir); + System.out.println("- 图表文件: d:\\java\\project\\charts"); + System.out.println("- 报告文件: d:\\java\\project\\reports"); + + } catch (Exception e) { + System.err.println("程序执行出错: " + e.getMessage()); + e.printStackTrace(); + } + } +} diff --git a/project/PostAnalyzer.java b/project/PostAnalyzer.java new file mode 100644 index 0000000..76a5216 --- /dev/null +++ b/project/PostAnalyzer.java @@ -0,0 +1,200 @@ +package com.project.analyzer; + +import com.project.model.PostInfo; + +import java.util.*; +import java.util.stream.Collectors; + +public class PostAnalyzer { + + private final List posts; + + public PostAnalyzer(List posts) { + this.posts = posts; + } + + public List getPosts() { + return posts; + } + + public void analyzeAll() { + System.out.println("\n========== 数据分析报告 ==========\n"); + + analyzeSentimentDistribution(); + analyzeEngagementMetrics(); + analyzePopularAuthors(); + analyzeContentLength(); + analyzeTemporalTrends(); + + System.out.println("\n========== 分析完成 ==========\n"); + } + + public void analyzeSentimentDistribution() { + System.out.println("【情感倾向分布分析】"); + System.out.println("----------------------------------------"); + + Map sentimentCounts = posts.stream() + .collect(Collectors.groupingBy( + PostInfo::getSentiment, + Collectors.counting() + )); + + System.out.printf("%-20s %s%n", "情感倾向", "帖子数量"); + System.out.println("----------------------------------------"); + + sentimentCounts.entrySet().stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .forEach(entry -> System.out.printf("%-20s %d%n", entry.getKey(), entry.getValue())); + + System.out.println(); + } + + public void analyzeEngagementMetrics() { + System.out.println("【互动指标分析】"); + System.out.println("----------------------------------------"); + + double avgLikes = posts.stream() + .mapToInt(PostInfo::getLikeCount) + .average() + .orElse(0); + + double avgComments = posts.stream() + .mapToInt(PostInfo::getCommentCount) + .average() + .orElse(0); + + double avgViews = posts.stream() + .mapToInt(PostInfo::getViewCount) + .average() + .orElse(0); + + System.out.printf("平均点赞数: %.1f%n", avgLikes); + System.out.printf("平均评论数: %.1f%n", avgComments); + System.out.printf("平均浏览量: %.1f%n", avgViews); + + System.out.println(); + } + + public void analyzePopularAuthors() { + System.out.println("【热门作者排行】"); + System.out.println("----------------------------------------"); + System.out.printf("%-30s %10s %10s %10s%n", "作者", "帖子数", "总点赞", "总评论"); + System.out.println("----------------------------------------"); + + Map> authorPosts = posts.stream() + .collect(Collectors.groupingBy(PostInfo::getAuthor)); + + authorPosts.entrySet().stream() + .sorted(Map.Entry.>comparingByValue((a, b) -> b.size() - a.size())) + .limit(10) + .forEach(entry -> { + String author = entry.getKey(); + List authorPostList = entry.getValue(); + int postCount = authorPostList.size(); + int totalLikes = authorPostList.stream().mapToInt(PostInfo::getLikeCount).sum(); + int totalComments = authorPostList.stream().mapToInt(PostInfo::getCommentCount).sum(); + + System.out.printf("%-30s %10d %10d %10d%n", + author.length() > 28 ? author.substring(0, 28) : author, + postCount, totalLikes, totalComments); + }); + + System.out.println(); + } + + public void analyzeContentLength() { + System.out.println("【内容长度分析】"); + System.out.println("----------------------------------------"); + + double avgLength = posts.stream() + .mapToInt(post -> post.getContent().length()) + .average() + .orElse(0); + + int maxLength = posts.stream() + .mapToInt(post -> post.getContent().length()) + .max() + .orElse(0); + + int minLength = posts.stream() + .mapToInt(post -> post.getContent().length()) + .min() + .orElse(0); + + System.out.printf("平均内容长度: %.1f 字符%n", avgLength); + System.out.printf("最长内容: %d 字符%n", maxLength); + System.out.printf("最短内容: %d 字符%n", minLength); + + System.out.println(); + } + + public void analyzeTemporalTrends() { + System.out.println("【时间趋势分析】"); + System.out.println("----------------------------------------"); + + Map monthlyPosts = posts.stream() + .filter(post -> post.getPostDate() != null) + .collect(Collectors.groupingBy( + post -> post.getPostDate().format(java.time.format.DateTimeFormatter.ofPattern("yyyy-MM")), + Collectors.counting() + )); + + System.out.printf("%-10s %s%n", "月份", "帖子数量"); + System.out.println("----------------------------------------"); + + monthlyPosts.entrySet().stream() + .sorted(Map.Entry.comparingByKey()) + .forEach(entry -> System.out.printf("%-10s %d%n", entry.getKey(), entry.getValue())); + + System.out.println(); + } + + public Map getSentimentDistributionData() { + return posts.stream() + .collect(Collectors.groupingBy( + PostInfo::getSentiment, + Collectors.counting() + )); + } + + public Map getEngagementData() { + Map engagementData = new LinkedHashMap<>(); + + double avgLikes = posts.stream() + .mapToInt(PostInfo::getLikeCount) + .average() + .orElse(0); + + double avgComments = posts.stream() + .mapToInt(PostInfo::getCommentCount) + .average() + .orElse(0); + + double avgViews = posts.stream() + .mapToInt(PostInfo::getViewCount) + .average() + .orElse(0); + + engagementData.put("点赞", avgLikes); + engagementData.put("评论", avgComments); + engagementData.put("浏览", avgViews); + + return engagementData; + } + + public Map getAuthorPostCount() { + return posts.stream() + .collect(Collectors.groupingBy( + PostInfo::getAuthor, + Collectors.summingInt(post -> 1) + )).entrySet().stream() + .sorted(Map.Entry.comparingByValue().reversed()) + .limit(10) + .collect(Collectors.toMap( + Map.Entry::getKey, + Map.Entry::getValue, + (e1, e2) -> e1, + LinkedHashMap::new + )); + } +} diff --git a/project/PostInfo.java b/project/PostInfo.java new file mode 100644 index 0000000..71fbb4d --- /dev/null +++ b/project/PostInfo.java @@ -0,0 +1,129 @@ +package com.project.model; + +import java.time.LocalDate; + +public class PostInfo { + private String title; + private String content; + private String author; + private LocalDate postDate; + private int likeCount; + private int commentCount; + private int viewCount; + private String tags; + private String sentiment; + + public PostInfo() { + } + + public PostInfo(String title, String content, String author, LocalDate postDate, + int likeCount, int commentCount, int viewCount, String tags, String sentiment) { + this.title = title; + this.content = content; + this.author = author; + this.postDate = postDate; + this.likeCount = likeCount; + this.commentCount = commentCount; + this.viewCount = viewCount; + this.tags = tags; + this.sentiment = sentiment; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public String getAuthor() { + return author; + } + + public void setAuthor(String author) { + this.author = author; + } + + public LocalDate getPostDate() { + return postDate; + } + + public void setPostDate(LocalDate postDate) { + this.postDate = postDate; + } + + public int getLikeCount() { + return likeCount; + } + + public void setLikeCount(int likeCount) { + this.likeCount = likeCount; + } + + public int getCommentCount() { + return commentCount; + } + + public void setCommentCount(int commentCount) { + this.commentCount = commentCount; + } + + public int getViewCount() { + return viewCount; + } + + public void setViewCount(int viewCount) { + this.viewCount = viewCount; + } + + public String getTags() { + return tags; + } + + public void setTags(String tags) { + this.tags = tags; + } + + public String getSentiment() { + return sentiment; + } + + public void setSentiment(String sentiment) { + this.sentiment = sentiment; + } + + @Override + public String toString() { + return "PostInfo{" + + "title='" + title + '\'' + + ", author='" + author + '\'' + + ", postDate=" + postDate + + ", likeCount=" + likeCount + + ", commentCount=" + commentCount + + ", viewCount=" + viewCount + + ", sentiment='" + sentiment + '\'' + + '}'; + } + + public String toCSV() { + return String.format("\"%s\",\"%s\",\"%s\",\"%s\",%d,%d,%d,\"%s\",\"%s\"", + title != null ? title.replace("\"", "\"\"") : "", + content != null ? content.replace("\"", "\"\"").replace("\n", " ") : "", + author != null ? author.replace("\"", "\"\"") : "", + postDate != null ? postDate.toString() : "", + likeCount, + commentCount, + viewCount, + tags != null ? tags.replace("\"", "\"\"") : "", + sentiment != null ? sentiment.replace("\"", "\"\"") : ""); + } +} diff --git a/project/README.md b/project/README.md new file mode 100644 index 0000000..a8687f1 --- /dev/null +++ b/project/README.md @@ -0,0 +1,2 @@ +# java + diff --git a/project/SimpleChartGenerator.java b/project/SimpleChartGenerator.java new file mode 100644 index 0000000..5a14324 --- /dev/null +++ b/project/SimpleChartGenerator.java @@ -0,0 +1,165 @@ +package com.project.chart; + +import com.project.analyzer.PostAnalyzer; + +import java.awt.*; +import java.awt.image.BufferedImage; +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Map; +import javax.imageio.ImageIO; + +public class SimpleChartGenerator { + + private static final String OUTPUT_DIR = "d:\\java\\project\\charts"; + private static final int WIDTH = 800; + private static final int HEIGHT = 600; + + public static void generateAllCharts(PostAnalyzer analyzer) { + try { + Files.createDirectories(Paths.get(OUTPUT_DIR)); + + generateSentimentChart(analyzer); + generateEngagementChart(analyzer); + generateAuthorChart(analyzer); + + System.out.println("\n所有图表已生成,保存在: " + OUTPUT_DIR); + + } catch (IOException e) { + System.err.println("创建图表目录时出错: " + e.getMessage()); + } + } + + public static void generateSentimentChart(PostAnalyzer analyzer) { + Map data = analyzer.getSentimentDistributionData(); + + BufferedImage image = new BufferedImage(WIDTH, HEIGHT, BufferedImage.TYPE_INT_RGB); + Graphics2D g2d = image.createGraphics(); + + g2d.setColor(Color.WHITE); + g2d.fillRect(0, 0, WIDTH, HEIGHT); + + g2d.setColor(Color.BLACK); + g2d.setFont(new Font("宋体", Font.BOLD, 24)); + g2d.drawString("情感倾向分布", 300, 40); + + int barWidth = 150; + int startX = 200; + int startY = 500; + int maxHeight = 400; + + long maxValue = data.values().stream().max(Long::compare).orElse(1L); + + int index = 0; + for (Map.Entry entry : data.entrySet()) { + int barHeight = (int) ((entry.getValue() * 1.0 / maxValue) * maxHeight); + + g2d.setColor(new Color(70, 130, 180)); + g2d.fillRect(startX + index * (barWidth + 50), startY - barHeight, barWidth, barHeight); + + g2d.setColor(Color.BLACK); + g2d.setFont(new Font("宋体", Font.PLAIN, 14)); + g2d.drawString(entry.getKey(), startX + index * (barWidth + 50) + 50, startY + 20); + g2d.drawString(String.valueOf(entry.getValue()), startX + index * (barWidth + 50) + 60, startY - barHeight - 5); + + index++; + } + + g2d.dispose(); + saveImage(image, "sentiment_distribution.png"); + } + + public static void generateEngagementChart(PostAnalyzer analyzer) { + Map data = analyzer.getEngagementData(); + + BufferedImage image = new BufferedImage(WIDTH, HEIGHT, BufferedImage.TYPE_INT_RGB); + Graphics2D g2d = image.createGraphics(); + + g2d.setColor(Color.WHITE); + g2d.fillRect(0, 0, WIDTH, HEIGHT); + + g2d.setColor(Color.BLACK); + g2d.setFont(new Font("宋体", Font.BOLD, 24)); + g2d.drawString("互动指标分析", 300, 40); + + int barWidth = 150; + int startX = 200; + int startY = 500; + int maxHeight = 400; + + double maxValue = data.values().stream().max(Double::compare).orElse(1.0); + + int index = 0; + for (Map.Entry entry : data.entrySet()) { + int barHeight = (int) ((entry.getValue() / maxValue) * maxHeight); + + g2d.setColor(new Color(60, 179, 113)); + g2d.fillRect(startX + index * (barWidth + 50), startY - barHeight, barWidth, barHeight); + + g2d.setColor(Color.BLACK); + g2d.setFont(new Font("宋体", Font.PLAIN, 14)); + g2d.drawString(entry.getKey(), startX + index * (barWidth + 50) + 60, startY + 20); + g2d.drawString(String.format("%.1f", entry.getValue()), startX + index * (barWidth + 50) + 50, startY - barHeight - 5); + + index++; + } + + g2d.dispose(); + saveImage(image, "engagement_metrics.png"); + } + + public static void generateAuthorChart(PostAnalyzer analyzer) { + Map data = analyzer.getAuthorPostCount(); + + BufferedImage image = new BufferedImage(WIDTH, HEIGHT, BufferedImage.TYPE_INT_RGB); + Graphics2D g2d = image.createGraphics(); + + g2d.setColor(Color.WHITE); + g2d.fillRect(0, 0, WIDTH, HEIGHT); + + g2d.setColor(Color.BLACK); + g2d.setFont(new Font("宋体", Font.BOLD, 24)); + g2d.drawString("热门作者排行TOP10", 280, 40); + + int barHeight = 35; + int startY = 80; + int startX = 200; + int maxWidth = 500; + + int maxValue = data.values().stream().max(Integer::compare).orElse(1); + + int index = 0; + for (Map.Entry entry : data.entrySet()) { + int barWidth = (int) ((entry.getValue() * 1.0 / maxValue) * maxWidth); + + g2d.setColor(new Color(255, 140, 0)); + g2d.fillRect(startX, startY + index * (barHeight + 10), barWidth, barHeight); + + g2d.setColor(Color.BLACK); + g2d.setFont(new Font("宋体", Font.PLAIN, 12)); + String author = entry.getKey(); + if (author.length() > 15) { + author = author.substring(0, 15) + "..."; + } + g2d.drawString(author, 50, startY + index * (barHeight + 10) + 23); + g2d.drawString(String.valueOf(entry.getValue()), startX + barWidth + 10, startY + index * (barHeight + 10) + 23); + + index++; + } + + g2d.dispose(); + saveImage(image, "author_ranking.png"); + } + + private static void saveImage(BufferedImage image, String filename) { + try { + File file = new File(OUTPUT_DIR, filename); + ImageIO.write(image, "PNG", file); + System.out.println("图表已保存: " + file.getAbsolutePath()); + } catch (IOException e) { + System.err.println("保存图表失败: " + e.getMessage()); + } + } +}