170 lines
6.5 KiB
Python
170 lines
6.5 KiB
Python
"""
|
||
解析Excel文件,提取所有字段定义
|
||
"""
|
||
import openpyxl
|
||
from pathlib import Path
|
||
import json
|
||
from collections import defaultdict
|
||
|
||
# Excel文件路径
|
||
EXCEL_FILE = '技术文档/智慧监督项目模板数据结构设计表-20251125-一凡标注.xlsx'
|
||
|
||
def parse_excel():
|
||
"""解析Excel文件,提取字段信息"""
|
||
print("="*60)
|
||
print("开始解析Excel文件...")
|
||
print("="*60)
|
||
|
||
try:
|
||
# 检查文件是否存在
|
||
if not Path(EXCEL_FILE).exists():
|
||
print(f"错误: Excel文件不存在: {EXCEL_FILE}")
|
||
return None
|
||
|
||
# 打开Excel文件
|
||
workbook = openpyxl.load_workbook(EXCEL_FILE, data_only=True)
|
||
print(f"✓ 成功打开Excel文件: {EXCEL_FILE}")
|
||
|
||
# 获取工作表
|
||
sheet = workbook.active
|
||
|
||
# 读取所有数据
|
||
all_rows = []
|
||
for row in sheet.iter_rows(values_only=True):
|
||
all_rows.append(row)
|
||
|
||
print(f"总行数: {len(all_rows)}")
|
||
|
||
if len(all_rows) < 2:
|
||
print("错误: Excel文件数据不足")
|
||
return None
|
||
|
||
# 解析表头
|
||
headers = all_rows[0]
|
||
print(f"\n表头: {headers}")
|
||
|
||
# 字段映射关系
|
||
field_map = {
|
||
'一级分类': 0,
|
||
'二级分类': 1,
|
||
'三级分类': 2,
|
||
'输入数据字段': 3,
|
||
'输出数据字段': 4,
|
||
'输出示例数据': 5,
|
||
'备注说明': 6
|
||
}
|
||
|
||
# 存储字段信息
|
||
templates = defaultdict(lambda: {
|
||
'template_name': '',
|
||
'template_code': '',
|
||
'input_fields': [],
|
||
'output_fields': []
|
||
})
|
||
|
||
current_template = None
|
||
current_input_field = None
|
||
|
||
# 处理数据行(从第2行开始)
|
||
for i, row in enumerate(all_rows[1:], 2):
|
||
# 跳过空行
|
||
if not any(row):
|
||
continue
|
||
|
||
level1 = row[0] # 一级分类
|
||
level2 = row[1] # 二级分类
|
||
level3 = row[2] # 三级分类
|
||
input_field = row[3] # 输入数据字段
|
||
output_field = row[4] # 输出数据字段
|
||
example = row[5] # 输出示例数据
|
||
remark = row[6] # 备注说明
|
||
|
||
# 如果有一级分类,说明是新的模板组
|
||
if level1:
|
||
print(f"\n处理一级分类: {level1}")
|
||
|
||
# 如果有二级分类,说明是新的模板
|
||
if level2:
|
||
current_template = level2
|
||
# 生成模板编码(将中文转换为大写英文,去掉空格)
|
||
template_code = level2.upper().replace(' ', '_').replace('初步核实审批表', 'PRELIMINARY_VERIFICATION_APPROVAL').replace('请示报告卡', 'REPORT_CARD')
|
||
templates[current_template]['template_name'] = current_template
|
||
templates[current_template]['template_code'] = template_code
|
||
print(f" 处理二级分类(模板): {current_template} -> {template_code}")
|
||
|
||
# 处理输入字段
|
||
if input_field and input_field != current_input_field:
|
||
current_input_field = input_field
|
||
if current_template:
|
||
templates[current_template]['input_fields'].append({
|
||
'name': input_field,
|
||
'field_code': input_field.replace('线索信息', 'clue_info').replace('被核查人员工作基本情况线索', 'target_basic_info_clue')
|
||
})
|
||
print(f" 输入字段: {input_field}")
|
||
|
||
# 处理输出字段
|
||
if output_field:
|
||
if current_template:
|
||
# 生成字段编码(简化版,实际需要更精确的映射)
|
||
field_code = output_field.lower().replace(' ', '_').replace('被核查人姓名', 'target_name').replace('被核查人员单位及职务', 'target_organization_and_position')
|
||
|
||
templates[current_template]['output_fields'].append({
|
||
'name': output_field,
|
||
'field_code': field_code,
|
||
'example': example,
|
||
'remark': remark
|
||
})
|
||
print(f" 输出字段: {output_field} -> {field_code}")
|
||
|
||
workbook.close()
|
||
|
||
# 转换为字典格式
|
||
result = {}
|
||
for template_name, template_info in templates.items():
|
||
result[template_name] = template_info
|
||
|
||
return result
|
||
|
||
except Exception as e:
|
||
print(f"解析Excel文件时发生错误: {e}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return None
|
||
|
||
def generate_field_mapping():
|
||
"""生成字段映射关系(基于已知字段)"""
|
||
# 根据初步核实审批表的字段定义,创建字段名称到字段编码的映射
|
||
field_mapping = {
|
||
'被核查人姓名': 'target_name',
|
||
'被核查人员单位及职务': 'target_organization_and_position',
|
||
'被核查人员性别': 'target_gender',
|
||
'被核查人员出生年月': 'target_date_of_birth',
|
||
'被核查人员政治面貌': 'target_political_status',
|
||
'被核查人员职级': 'target_professional_rank',
|
||
'线索来源': 'clue_source',
|
||
'主要问题线索': 'target_issue_description',
|
||
'初步核实审批表承办部门意见': 'department_opinion',
|
||
'初步核实审批表填表人': 'filler_name',
|
||
'线索信息': 'clue_info',
|
||
'被核查人员工作基本情况线索': 'target_basic_info_clue',
|
||
}
|
||
return field_mapping
|
||
|
||
if __name__ == '__main__':
|
||
result = parse_excel()
|
||
if result:
|
||
print("\n" + "="*60)
|
||
print("解析结果汇总:")
|
||
print("="*60)
|
||
for template_name, template_info in result.items():
|
||
print(f"\n模板: {template_name}")
|
||
print(f" 模板编码: {template_info['template_code']}")
|
||
print(f" 输入字段数: {len(template_info['input_fields'])}")
|
||
print(f" 输出字段数: {len(template_info['output_fields'])}")
|
||
|
||
# 保存为JSON文件
|
||
output_file = 'parsed_fields.json'
|
||
with open(output_file, 'w', encoding='utf-8') as f:
|
||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||
print(f"\n✓ 结果已保存到: {output_file}")
|