You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
100 lines
3.4 KiB
100 lines
3.4 KiB
import os
|
|
import openpyxl
|
|
import re
|
|
|
|
# 文件路径
|
|
input_file = r'D:\计量经济学\计量实验资料及作业要求\计量实验资料及作业要求\图文帖子原始信息计量实验使用.xlsx'
|
|
output_file = r'D:\计量经济学\计量实验资料及作业要求\计量实验资料及作业要求\UGC回归数据.xlsx'
|
|
|
|
print("========================================")
|
|
print(" 简单计算UGC回归数据")
|
|
print("========================================")
|
|
print(f"输入文件: {input_file}")
|
|
print(f"输出文件: {output_file}")
|
|
print()
|
|
|
|
# 检查文件是否存在
|
|
if not os.path.exists(input_file):
|
|
print("错误: 输入文件不存在!")
|
|
exit(1)
|
|
|
|
if not os.path.exists(output_file):
|
|
print("错误: 输出文件不存在!")
|
|
exit(1)
|
|
|
|
print(f"输入文件大小: {os.path.getsize(input_file) / 1024:.2f} KB")
|
|
|
|
# 读取输入文件
|
|
try:
|
|
print("正在读取输入文件...")
|
|
wb_input = openpyxl.load_workbook(input_file)
|
|
ws_input = wb_input.active
|
|
|
|
print(f"输入工作表名称: {ws_input.title}")
|
|
print(f"输入文件最大行数: {ws_input.max_row}")
|
|
print(f"输入文件最大列数: {ws_input.max_column}")
|
|
|
|
# 读取输出文件
|
|
print("\n正在读取输出文件...")
|
|
wb_output = openpyxl.load_workbook(output_file)
|
|
ws_output = wb_output.active
|
|
|
|
print(f"输出工作表名称: {ws_output.title}")
|
|
|
|
# 识别列
|
|
print("\n识别列...")
|
|
headers = []
|
|
for col in range(1, ws_input.max_column + 1):
|
|
header = ws_input.cell(row=1, column=col).value
|
|
headers.append(header)
|
|
if header and 'helpfull' in str(header):
|
|
helpfull_col = col
|
|
print(f"找到 helpfull 列: {col}")
|
|
elif header and ('评论总数' in str(header) or '帖子评论总数' in str(header)):
|
|
comment_count_col = col
|
|
print(f"找到评论总数列: {col}")
|
|
elif header and '评论' in str(header):
|
|
print(f"找到评论列: {col} - {header}")
|
|
|
|
# 计算并填充数据
|
|
print("\n计算并填充数据...")
|
|
max_rows = min(ws_input.max_row, 10) # 只处理前10行用于测试
|
|
print(f"处理前 {max_rows - 1} 行数据")
|
|
|
|
for row in range(2, max_rows + 1):
|
|
print(f"处理行 {row}")
|
|
|
|
# Y (UGC有用性)
|
|
if 'helpfull_col' in locals():
|
|
y_value = ws_input.cell(row=row, column=helpfull_col).value
|
|
ws_output.cell(row=row, column=1, value=y_value if y_value else 0)
|
|
else:
|
|
ws_output.cell(row=row, column=1, value=0)
|
|
|
|
# X1 (评论数量)
|
|
if 'comment_count_col' in locals():
|
|
x1_value = ws_input.cell(row=row, column=comment_count_col).value
|
|
ws_output.cell(row=row, column=2, value=x1_value if x1_value else 0)
|
|
else:
|
|
ws_output.cell(row=row, column=2, value=0)
|
|
|
|
# X2-X6 暂时设为0
|
|
for col in range(3, 8):
|
|
ws_output.cell(row=row, column=col, value=0)
|
|
|
|
# 保存文件
|
|
print("\n保存文件...")
|
|
wb_output.save(output_file)
|
|
|
|
print(f"文件已成功保存: {output_file}")
|
|
print(f"文件大小: {os.path.getsize(output_file) / 1024:.2f} KB")
|
|
|
|
print()
|
|
print("========================================")
|
|
print(" 任务完成")
|
|
print("========================================")
|
|
|
|
except Exception as e:
|
|
print(f"处理文件时出错: {str(e)}")
|
|
import traceback
|
|
traceback.print_exc()
|
|
|