""" 更新两个模板文件的信息并上传到MinIO - 8.XXX初核情况报告.docx - 8-1请示报告卡(初核报告结论) .docx """ import os import re import json import sys import pymysql from minio import Minio from minio.error import S3Error from datetime import datetime from pathlib import Path from docx import Document from typing import Dict, List, Optional # 设置输出编码为UTF-8(Windows兼容) if sys.platform == 'win32': import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace') # MinIO连接配置 MINIO_CONFIG = { 'endpoint': 'minio.datacubeworld.com:9000', 'access_key': 'JOLXFXny3avFSzB0uRA5', 'secret_key': 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I', 'secure': True } # 数据库连接配置 DB_CONFIG = { 'host': '152.136.177.240', 'port': 5012, 'user': 'finyx', 'password': '6QsGK6MpePZDE57Z', 'database': 'finyx', 'charset': 'utf8mb4' } # 固定值 TENANT_ID = 615873064429507639 CREATED_BY = 655162080928945152 UPDATED_BY = 655162080928945152 BUCKET_NAME = 'finyx' # 要处理的模板文件 TEMPLATE_FILES = [ 'template_finish/2-初核模版/3.初核结论/8.XXX初核情况报告.docx', 'template_finish/2-初核模版/3.初核结论/8-1请示报告卡(初核报告结论) .docx' ] # 模板名称映射(用于查找数据库中的记录) TEMPLATE_NAME_MAP = { '8.XXX初核情况报告.docx': ['8.XXX初核情况报告', 'XXX初核情况报告'], '8-1请示报告卡(初核报告结论) .docx': ['8-1请示报告卡(初核报告结论) ', '请示报告卡(初核报告结论)'] } def generate_id(): """生成ID""" import time import random timestamp = int(time.time() * 1000) random_part = random.randint(100000, 999999) return timestamp * 1000 + random_part def extract_placeholders_from_docx(file_path: str) -> List[str]: """ 从docx文件中提取所有占位符 Args: file_path: docx文件路径 Returns: 占位符列表,格式: ['field_code1', 'field_code2', ...] """ placeholders = set() pattern = r'\{\{([^}]+)\}\}' # 匹配 {{field_code}} 格式 try: doc = Document(file_path) # 从段落中提取占位符 for paragraph in doc.paragraphs: text = paragraph.text matches = re.findall(pattern, text) for match in matches: cleaned = match.strip() # 过滤掉不完整的占位符(包含 { 或 } 的) if cleaned and '{' not in cleaned and '}' not in cleaned: placeholders.add(cleaned) # 从表格中提取占位符 for table in doc.tables: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: text = paragraph.text matches = re.findall(pattern, text) for match in matches: cleaned = match.strip() # 过滤掉不完整的占位符(包含 { 或 } 的) if cleaned and '{' not in cleaned and '}' not in cleaned: placeholders.add(cleaned) except Exception as e: print(f" 错误: 读取文件失败 - {str(e)}") return [] return sorted(list(placeholders)) def normalize_template_name(file_name: str) -> str: """ 标准化模板名称(去掉扩展名、括号内容、数字前缀等) Args: file_name: 文件名,如 "8.XXX初核情况报告.docx" Returns: 标准化后的名称,如 "XXX初核情况报告" """ # 去掉扩展名 name = Path(file_name).stem # 去掉括号内容 name = re.sub(r'[((].*?[))]', '', name) name = name.strip() # 去掉数字前缀和点号 name = re.sub(r'^\d+[\.\-]?\s*', '', name) name = name.strip() return name def upload_to_minio(client: Minio, file_path: str, template_name: str) -> str: """上传文件到MinIO""" try: now = datetime.now() object_name = f'{TENANT_ID}/TEMPLATE/{now.year}/{now.month:02d}/{template_name}' client.fput_object( BUCKET_NAME, object_name, file_path, content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document' ) return f"/{object_name}" except Exception as e: raise Exception(f"上传到MinIO失败: {str(e)}") def find_template_by_names(conn, possible_names: List[str]) -> Optional[Dict]: """根据可能的模板名称查找数据库中的模板""" cursor = conn.cursor(pymysql.cursors.DictCursor) try: # 尝试每个可能的名称 for name in possible_names: sql = """ SELECT id, name, file_path, parent_id, input_data FROM f_polic_file_config WHERE tenant_id = %s AND name = %s """ cursor.execute(sql, (TENANT_ID, name)) result = cursor.fetchone() if result: return result return None finally: cursor.close() def get_template_code_from_input_data(input_data: Optional[str]) -> str: """从input_data中提取template_code,如果没有则生成一个""" if input_data: try: data = json.loads(input_data) return data.get('template_code', '') except: pass return '' def update_template(conn, template_file_path: str, template_info: Dict, minio_path: str): """ 更新模板配置 Args: conn: 数据库连接 template_file_path: 模板文件路径 template_info: 模板信息(包含占位符等) minio_path: MinIO中的文件路径 """ cursor = conn.cursor() try: file_name = Path(template_file_path).name possible_names = TEMPLATE_NAME_MAP.get(file_name, [normalize_template_name(file_name)]) # 查找现有记录 existing_template = find_template_by_names(conn, possible_names) if not existing_template: print(f" [WARN] 未找到数据库记录,将创建新记录") template_id = generate_id() template_name = possible_names[0] # 使用第一个名称 # 生成template_code template_code = get_template_code_from_input_data(None) if not template_code: # 根据文件名生成template_code if 'XXX初核情况报告' in file_name: template_code = 'INVESTIGATION_REPORT' elif '请示报告卡' in file_name and '初核报告结论' in file_name: template_code = 'REPORT_CARD_CONCLUSION' else: template_code = f'TEMPLATE_{template_id % 1000000}' # 准备input_data input_data = json.dumps({ 'template_code': template_code, 'business_type': 'INVESTIGATION', 'placeholders': template_info['placeholders'] }, ensure_ascii=False) # 创建新记录 insert_sql = """ INSERT INTO f_polic_file_config (id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state) VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, %s) """ cursor.execute(insert_sql, ( template_id, TENANT_ID, None, # parent_id 需要根据实际情况设置 template_name, input_data, minio_path, CREATED_BY, UPDATED_BY, 1 # state: 1表示启用 )) print(f" [OK] 创建模板配置: {template_name}, ID: {template_id}") conn.commit() return template_id else: # 更新现有记录 template_id = existing_template['id'] template_name = existing_template['name'] # 获取现有的template_code existing_input_data = existing_template.get('input_data') template_code = get_template_code_from_input_data(existing_input_data) if not template_code: # 根据文件名生成template_code if 'XXX初核情况报告' in file_name: template_code = 'INVESTIGATION_REPORT' elif '请示报告卡' in file_name and '初核报告结论' in file_name: template_code = 'REPORT_CARD_CONCLUSION' else: template_code = f'TEMPLATE_{template_id % 1000000}' # 准备input_data input_data = json.dumps({ 'template_code': template_code, 'business_type': 'INVESTIGATION', 'placeholders': template_info['placeholders'] }, ensure_ascii=False) update_sql = """ UPDATE f_polic_file_config SET file_path = %s, input_data = %s, updated_time = NOW(), updated_by = %s, state = 1 WHERE id = %s AND tenant_id = %s """ cursor.execute(update_sql, ( minio_path, input_data, UPDATED_BY, template_id, TENANT_ID )) print(f" [OK] 更新模板配置: {template_name}, ID: {template_id}") print(f" 占位符数量: {len(template_info['placeholders'])}") if template_info['placeholders']: print(f" 占位符: {', '.join(template_info['placeholders'][:10])}{'...' if len(template_info['placeholders']) > 10 else ''}") conn.commit() return template_id except Exception as e: conn.rollback() raise Exception(f"更新模板配置失败: {str(e)}") finally: cursor.close() def main(): """主函数""" print("=" * 80) print("更新模板文件信息并上传到MinIO") print("=" * 80) # 连接数据库 try: conn = pymysql.connect(**DB_CONFIG) print("✓ 数据库连接成功") except Exception as e: print(f"✗ 数据库连接失败: {str(e)}") return # 创建MinIO客户端 try: minio_client = Minio( MINIO_CONFIG['endpoint'], access_key=MINIO_CONFIG['access_key'], secret_key=MINIO_CONFIG['secret_key'], secure=MINIO_CONFIG['secure'] ) # 检查存储桶是否存在 found = minio_client.bucket_exists(BUCKET_NAME) if not found: print(f"✗ 存储桶 '{BUCKET_NAME}' 不存在") conn.close() return print("✓ MinIO连接成功") except Exception as e: print(f"✗ MinIO连接失败: {str(e)}") conn.close() return # 处理每个模板文件 success_count = 0 failed_files = [] for template_file in TEMPLATE_FILES: print(f"\n{'=' * 80}") print(f"处理模板: {template_file}") print(f"{'=' * 80}") # 检查文件是否存在 if not os.path.exists(template_file): print(f" [ERROR] 文件不存在: {template_file}") failed_files.append(template_file) continue try: # 提取占位符 print(f" 正在提取占位符...") placeholders = extract_placeholders_from_docx(template_file) print(f" ✓ 提取到 {len(placeholders)} 个占位符") if placeholders: print(f" 占位符: {', '.join(placeholders[:10])}{'...' if len(placeholders) > 10 else ''}") # 准备模板信息 file_name = Path(template_file).name template_info = { 'file_path': template_file, 'file_name': file_name, 'placeholders': placeholders } # 上传到MinIO print(f" 正在上传到MinIO...") minio_path = upload_to_minio(minio_client, template_file, file_name) print(f" ✓ 上传成功: {minio_path}") # 更新数据库 print(f" 正在更新数据库...") template_id = update_template(conn, template_file, template_info, minio_path) print(f" ✓ 更新成功,模板ID: {template_id}") success_count += 1 except Exception as e: print(f" [ERROR] 处理失败: {str(e)}") import traceback traceback.print_exc() failed_files.append(template_file) # 总结 print(f"\n{'=' * 80}") print("处理完成") print(f"{'=' * 80}") print(f"成功: {success_count}/{len(TEMPLATE_FILES)}") if failed_files: print(f"失败的文件:") for file in failed_files: print(f" - {file}") conn.close() if __name__ == '__main__': main()