""" 文档生成服务 - 处理Word模板填充和MinIO文件上传 """ import os import re import tempfile from typing import Dict, List, Optional from datetime import datetime from pathlib import Path from docx import Document from minio import Minio from minio.error import S3Error import pymysql class DocumentService: """文档生成服务类""" def __init__(self): # MinIO配置 self.minio_config = { 'endpoint': os.getenv('MINIO_ENDPOINT', 'minio.datacubeworld.com:9000'), 'access_key': os.getenv('MINIO_ACCESS_KEY', 'JOLXFXny3avFSzB0uRA5'), 'secret_key': os.getenv('MINIO_SECRET_KEY', 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I'), 'secure': os.getenv('MINIO_SECURE', 'true').lower() == 'true' } self.bucket_name = os.getenv('MINIO_BUCKET', 'finyx') # 数据库配置 self.db_config = { 'host': os.getenv('DB_HOST', '152.136.177.240'), 'port': int(os.getenv('DB_PORT', 5012)), 'user': os.getenv('DB_USER', 'finyx'), 'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), 'database': os.getenv('DB_NAME', 'finyx'), 'charset': 'utf8mb4' } self.tenant_id = 615873064429507639 def get_connection(self): """获取数据库连接""" return pymysql.connect(**self.db_config) def get_minio_client(self): """获取MinIO客户端""" return Minio( self.minio_config['endpoint'], access_key=self.minio_config['access_key'], secret_key=self.minio_config['secret_key'], secure=self.minio_config['secure'] ) def get_file_config_by_template_code(self, template_code: str) -> Optional[Dict]: """ 根据模板编码获取文件配置 Args: template_code: 模板编码,如 'PRELIMINARY_VERIFICATION_APPROVAL' Returns: 文件配置信息,包含: id, name, file_path, template_code """ import json conn = self.get_connection() cursor = conn.cursor(pymysql.cursors.DictCursor) try: # 查询文件配置(template_code存储在input_data的JSON字段中) sql = """ SELECT id, name, file_path, input_data FROM f_polic_file_config WHERE tenant_id = %s AND state = 1 """ cursor.execute(sql, (self.tenant_id,)) configs = cursor.fetchall() # 从input_data的JSON中查找匹配的template_code for config in configs: try: input_data = json.loads(config['input_data']) if config['input_data'] else {} if input_data.get('template_code') == template_code: return { 'id': config['id'], 'name': config['name'], 'file_path': config['file_path'], 'template_code': template_code } except (json.JSONDecodeError, TypeError): continue return None finally: cursor.close() conn.close() def download_template_from_minio(self, file_path: str) -> str: """ 从MinIO下载模板文件到临时目录 Args: file_path: MinIO中的相对路径,如 '/615873064429507639/TEMPLATE/2024/11/初步核实审批表模板.docx' Returns: 本地临时文件路径 """ client = self.get_minio_client() # 创建临时文件 temp_dir = tempfile.gettempdir() temp_file = os.path.join(temp_dir, f"template_{datetime.now().strftime('%Y%m%d%H%M%S')}.docx") try: # 从相对路径中提取对象名称(去掉开头的/) object_name = file_path.lstrip('/') # 下载文件 client.fget_object(self.bucket_name, object_name, temp_file) return temp_file except S3Error as e: raise Exception(f"从MinIO下载模板文件失败: {str(e)}") def fill_template(self, template_path: str, field_data: Dict[str, str]) -> str: """ 填充Word模板中的占位符 Args: template_path: 模板文件路径 field_data: 字段数据字典,格式: {'field_code': 'field_value'} Returns: 填充后的文档路径 """ try: # 打开模板文档 doc = Document(template_path) # 替换占位符 {{field_code}} 为实际值 for paragraph in doc.paragraphs: # 替换段落文本中的占位符 for field_code, field_value in field_data.items(): placeholder = f"{{{{{field_code}}}}}" if placeholder in paragraph.text: # 替换占位符 for run in paragraph.runs: if placeholder in run.text: run.text = run.text.replace(placeholder, field_value or '') # 替换表格中的占位符 for table in doc.tables: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: for field_code, field_value in field_data.items(): placeholder = f"{{{{{field_code}}}}}" if placeholder in paragraph.text: for run in paragraph.runs: if placeholder in run.text: run.text = run.text.replace(placeholder, field_value or '') # 保存到临时文件 temp_dir = tempfile.gettempdir() output_file = os.path.join(temp_dir, f"filled_{datetime.now().strftime('%Y%m%d%H%M%S')}.docx") doc.save(output_file) return output_file except Exception as e: raise Exception(f"填充模板失败: {str(e)}") def upload_to_minio(self, file_path: str, file_name: str) -> str: """ 上传文件到MinIO Args: file_path: 本地文件路径 file_name: 文件名称 Returns: MinIO中的相对路径 """ client = self.get_minio_client() try: # 生成MinIO对象路径(相对路径) now = datetime.now() # 使用日期路径组织文件 object_name = f"{self.tenant_id}/{now.strftime('%Y%m%d%H%M%S')}/{file_name}" # 上传文件 client.fput_object( self.bucket_name, object_name, file_path, content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document' ) # 返回相对路径(以/开头) return f"/{object_name}" except S3Error as e: raise Exception(f"上传文件到MinIO失败: {str(e)}") def generate_document(self, template_code: str, input_data: List[Dict], file_info: Dict) -> Dict: """ 生成文档 Args: template_code: 模板编码 input_data: 输入数据列表,格式: [{'fieldCode': 'xxx', 'fieldValue': 'xxx'}] file_info: 文件信息,格式: {'fileId': 1, 'fileName': 'xxx.doc', 'templateCode': 'xxx'} Returns: 生成结果,包含: filePath """ # 获取文件配置 file_config = self.get_file_config_by_template_code(template_code) if not file_config: raise Exception(f"模板编码 {template_code} 不存在") # 将input_data转换为字典格式 field_data = {} for item in input_data: field_code = item.get('fieldCode', '') field_value = item.get('fieldValue', '') if field_code: field_data[field_code] = field_value or '' # 下载模板 template_path = None filled_doc_path = None try: template_path = self.download_template_from_minio(file_config['file_path']) # 填充模板 filled_doc_path = self.fill_template(template_path, field_data) # 生成文档名称(.docx格式) original_file_name = file_info.get('fileName', 'generated.doc') generated_file_name = self.generate_document_name(original_file_name, field_data) # 上传到MinIO(使用生成的文档名) file_path = self.upload_to_minio(filled_doc_path, generated_file_name) return { 'filePath': file_path, 'fileName': generated_file_name # 返回生成的文档名 } finally: # 清理临时文件 if template_path and os.path.exists(template_path): try: os.remove(template_path) except: pass if filled_doc_path and os.path.exists(filled_doc_path): try: os.remove(filled_doc_path) except: pass def generate_document_id(self) -> str: """生成文档ID""" now = datetime.now() return f"DOC{now.strftime('%Y%m%d%H%M%S')}{str(now.microsecond)[:3]}" def generate_document_name(self, original_file_name: str, field_data: Dict[str, str]) -> str: """ 生成文档名称 Args: original_file_name: 原始文件名称 field_data: 字段数据 Returns: 生成的文档名称,如 "初步核实审批表_张三.docx" """ # 提取文件基础名称(不含扩展名) base_name = Path(original_file_name).stem # 尝试从字段数据中提取被核查人姓名作为后缀 suffix = '' if 'target_name' in field_data and field_data['target_name']: suffix = f"_{field_data['target_name']}" # 生成新文件名 return f"{base_name}{suffix}.docx"