293 lines
11 KiB
Python
293 lines
11 KiB
Python
"""
|
||
文档生成服务 - 处理Word模板填充和MinIO文件上传
|
||
"""
|
||
import os
|
||
import re
|
||
import tempfile
|
||
from typing import Dict, List, Optional
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from docx import Document
|
||
from minio import Minio
|
||
from minio.error import S3Error
|
||
import pymysql
|
||
|
||
|
||
class DocumentService:
|
||
"""文档生成服务类"""
|
||
|
||
def __init__(self):
|
||
# MinIO配置
|
||
self.minio_config = {
|
||
'endpoint': os.getenv('MINIO_ENDPOINT', 'minio.datacubeworld.com:9000'),
|
||
'access_key': os.getenv('MINIO_ACCESS_KEY', 'JOLXFXny3avFSzB0uRA5'),
|
||
'secret_key': os.getenv('MINIO_SECRET_KEY', 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I'),
|
||
'secure': os.getenv('MINIO_SECURE', 'true').lower() == 'true'
|
||
}
|
||
self.bucket_name = os.getenv('MINIO_BUCKET', 'finyx')
|
||
|
||
# 数据库配置
|
||
self.db_config = {
|
||
'host': os.getenv('DB_HOST', '152.136.177.240'),
|
||
'port': int(os.getenv('DB_PORT', 5012)),
|
||
'user': os.getenv('DB_USER', 'finyx'),
|
||
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
|
||
'database': os.getenv('DB_NAME', 'finyx'),
|
||
'charset': 'utf8mb4'
|
||
}
|
||
self.tenant_id = 615873064429507639
|
||
|
||
def get_connection(self):
|
||
"""获取数据库连接"""
|
||
return pymysql.connect(**self.db_config)
|
||
|
||
def get_minio_client(self):
|
||
"""获取MinIO客户端"""
|
||
return Minio(
|
||
self.minio_config['endpoint'],
|
||
access_key=self.minio_config['access_key'],
|
||
secret_key=self.minio_config['secret_key'],
|
||
secure=self.minio_config['secure']
|
||
)
|
||
|
||
def get_file_config_by_template_code(self, template_code: str) -> Optional[Dict]:
|
||
"""
|
||
根据模板编码获取文件配置
|
||
|
||
Args:
|
||
template_code: 模板编码,如 'PRELIMINARY_VERIFICATION_APPROVAL'
|
||
|
||
Returns:
|
||
文件配置信息,包含: id, name, file_path, template_code
|
||
"""
|
||
import json
|
||
conn = self.get_connection()
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
try:
|
||
# 查询文件配置(template_code存储在input_data的JSON字段中)
|
||
sql = """
|
||
SELECT id, name, file_path, input_data
|
||
FROM f_polic_file_config
|
||
WHERE tenant_id = %s
|
||
AND state = 1
|
||
"""
|
||
cursor.execute(sql, (self.tenant_id,))
|
||
configs = cursor.fetchall()
|
||
|
||
# 从input_data的JSON中查找匹配的template_code
|
||
for config in configs:
|
||
try:
|
||
input_data = json.loads(config['input_data']) if config['input_data'] else {}
|
||
if input_data.get('template_code') == template_code:
|
||
return {
|
||
'id': config['id'],
|
||
'name': config['name'],
|
||
'file_path': config['file_path'],
|
||
'template_code': template_code
|
||
}
|
||
except (json.JSONDecodeError, TypeError):
|
||
continue
|
||
|
||
return None
|
||
|
||
finally:
|
||
cursor.close()
|
||
conn.close()
|
||
|
||
def download_template_from_minio(self, file_path: str) -> str:
|
||
"""
|
||
从MinIO下载模板文件到临时目录
|
||
|
||
Args:
|
||
file_path: MinIO中的相对路径,如 '/615873064429507639/TEMPLATE/2024/11/初步核实审批表模板.docx'
|
||
|
||
Returns:
|
||
本地临时文件路径
|
||
"""
|
||
client = self.get_minio_client()
|
||
|
||
# 创建临时文件
|
||
temp_dir = tempfile.gettempdir()
|
||
temp_file = os.path.join(temp_dir, f"template_{datetime.now().strftime('%Y%m%d%H%M%S')}.docx")
|
||
|
||
try:
|
||
# 从相对路径中提取对象名称(去掉开头的/)
|
||
object_name = file_path.lstrip('/')
|
||
|
||
# 下载文件
|
||
client.fget_object(self.bucket_name, object_name, temp_file)
|
||
|
||
return temp_file
|
||
|
||
except S3Error as e:
|
||
raise Exception(f"从MinIO下载模板文件失败: {str(e)}")
|
||
|
||
def fill_template(self, template_path: str, field_data: Dict[str, str]) -> str:
|
||
"""
|
||
填充Word模板中的占位符
|
||
|
||
Args:
|
||
template_path: 模板文件路径
|
||
field_data: 字段数据字典,格式: {'field_code': 'field_value'}
|
||
|
||
Returns:
|
||
填充后的文档路径
|
||
"""
|
||
try:
|
||
# 打开模板文档
|
||
doc = Document(template_path)
|
||
|
||
# 替换占位符 {{field_code}} 为实际值
|
||
for paragraph in doc.paragraphs:
|
||
# 替换段落文本中的占位符
|
||
for field_code, field_value in field_data.items():
|
||
placeholder = f"{{{{{field_code}}}}}"
|
||
if placeholder in paragraph.text:
|
||
# 替换占位符
|
||
for run in paragraph.runs:
|
||
if placeholder in run.text:
|
||
run.text = run.text.replace(placeholder, field_value or '')
|
||
|
||
# 替换表格中的占位符
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for paragraph in cell.paragraphs:
|
||
for field_code, field_value in field_data.items():
|
||
placeholder = f"{{{{{field_code}}}}}"
|
||
if placeholder in paragraph.text:
|
||
for run in paragraph.runs:
|
||
if placeholder in run.text:
|
||
run.text = run.text.replace(placeholder, field_value or '')
|
||
|
||
# 保存到临时文件
|
||
temp_dir = tempfile.gettempdir()
|
||
output_file = os.path.join(temp_dir, f"filled_{datetime.now().strftime('%Y%m%d%H%M%S')}.docx")
|
||
doc.save(output_file)
|
||
|
||
return output_file
|
||
|
||
except Exception as e:
|
||
raise Exception(f"填充模板失败: {str(e)}")
|
||
|
||
def upload_to_minio(self, file_path: str, file_name: str) -> str:
|
||
"""
|
||
上传文件到MinIO
|
||
|
||
Args:
|
||
file_path: 本地文件路径
|
||
file_name: 文件名称
|
||
|
||
Returns:
|
||
MinIO中的相对路径
|
||
"""
|
||
client = self.get_minio_client()
|
||
|
||
try:
|
||
# 生成MinIO对象路径(相对路径)
|
||
now = datetime.now()
|
||
# 使用日期路径组织文件
|
||
object_name = f"{self.tenant_id}/{now.strftime('%Y%m%d%H%M%S')}/{file_name}"
|
||
|
||
# 上传文件
|
||
client.fput_object(
|
||
self.bucket_name,
|
||
object_name,
|
||
file_path,
|
||
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||
)
|
||
|
||
# 返回相对路径(以/开头)
|
||
return f"/{object_name}"
|
||
|
||
except S3Error as e:
|
||
raise Exception(f"上传文件到MinIO失败: {str(e)}")
|
||
|
||
def generate_document(self, template_code: str, input_data: List[Dict], file_info: Dict) -> Dict:
|
||
"""
|
||
生成文档
|
||
|
||
Args:
|
||
template_code: 模板编码
|
||
input_data: 输入数据列表,格式: [{'fieldCode': 'xxx', 'fieldValue': 'xxx'}]
|
||
file_info: 文件信息,格式: {'fileId': 1, 'fileName': 'xxx.doc', 'templateCode': 'xxx'}
|
||
|
||
Returns:
|
||
生成结果,包含: filePath
|
||
"""
|
||
# 获取文件配置
|
||
file_config = self.get_file_config_by_template_code(template_code)
|
||
if not file_config:
|
||
raise Exception(f"模板编码 {template_code} 不存在")
|
||
|
||
# 将input_data转换为字典格式
|
||
field_data = {}
|
||
for item in input_data:
|
||
field_code = item.get('fieldCode', '')
|
||
field_value = item.get('fieldValue', '')
|
||
if field_code:
|
||
field_data[field_code] = field_value or ''
|
||
|
||
# 下载模板
|
||
template_path = None
|
||
filled_doc_path = None
|
||
try:
|
||
template_path = self.download_template_from_minio(file_config['file_path'])
|
||
|
||
# 填充模板
|
||
filled_doc_path = self.fill_template(template_path, field_data)
|
||
|
||
# 生成文档名称(.docx格式)
|
||
original_file_name = file_info.get('fileName', 'generated.doc')
|
||
generated_file_name = self.generate_document_name(original_file_name, field_data)
|
||
|
||
# 上传到MinIO(使用生成的文档名)
|
||
file_path = self.upload_to_minio(filled_doc_path, generated_file_name)
|
||
|
||
return {
|
||
'filePath': file_path,
|
||
'fileName': generated_file_name # 返回生成的文档名
|
||
}
|
||
|
||
finally:
|
||
# 清理临时文件
|
||
if template_path and os.path.exists(template_path):
|
||
try:
|
||
os.remove(template_path)
|
||
except:
|
||
pass
|
||
if filled_doc_path and os.path.exists(filled_doc_path):
|
||
try:
|
||
os.remove(filled_doc_path)
|
||
except:
|
||
pass
|
||
|
||
def generate_document_id(self) -> str:
|
||
"""生成文档ID"""
|
||
now = datetime.now()
|
||
return f"DOC{now.strftime('%Y%m%d%H%M%S')}{str(now.microsecond)[:3]}"
|
||
|
||
def generate_document_name(self, original_file_name: str, field_data: Dict[str, str]) -> str:
|
||
"""
|
||
生成文档名称
|
||
|
||
Args:
|
||
original_file_name: 原始文件名称
|
||
field_data: 字段数据
|
||
|
||
Returns:
|
||
生成的文档名称,如 "初步核实审批表_张三.docx"
|
||
"""
|
||
# 提取文件基础名称(不含扩展名)
|
||
base_name = Path(original_file_name).stem
|
||
|
||
# 尝试从字段数据中提取被核查人姓名作为后缀
|
||
suffix = ''
|
||
if 'target_name' in field_data and field_data['target_name']:
|
||
suffix = f"_{field_data['target_name']}"
|
||
|
||
# 生成新文件名
|
||
return f"{base_name}{suffix}.docx"
|
||
|