ai-business-write/services/document_service.py

303 lines
11 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
文档生成服务 - 处理Word模板填充和MinIO文件上传
"""
import os
import re
import tempfile
from typing import Dict, List, Optional
from datetime import datetime
from pathlib import Path
from docx import Document
from minio import Minio
from minio.error import S3Error
import pymysql
class DocumentService:
"""文档生成服务类"""
def __init__(self):
# MinIO配置
self.minio_config = {
'endpoint': os.getenv('MINIO_ENDPOINT', 'minio.datacubeworld.com:9000'),
'access_key': os.getenv('MINIO_ACCESS_KEY', 'JOLXFXny3avFSzB0uRA5'),
'secret_key': os.getenv('MINIO_SECRET_KEY', 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I'),
'secure': os.getenv('MINIO_SECURE', 'true').lower() == 'true'
}
self.bucket_name = os.getenv('MINIO_BUCKET', 'finyx')
# 数据库配置
self.db_config = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
self.tenant_id = 615873064429507639
def get_connection(self):
"""获取数据库连接"""
return pymysql.connect(**self.db_config)
def get_minio_client(self):
"""获取MinIO客户端"""
return Minio(
self.minio_config['endpoint'],
access_key=self.minio_config['access_key'],
secret_key=self.minio_config['secret_key'],
secure=self.minio_config['secure']
)
def get_file_config_by_template_code(self, template_code: str) -> Optional[Dict]:
"""
根据模板编码获取文件配置
Args:
template_code: 模板编码,如 'PRELIMINARY_VERIFICATION_APPROVAL'
Returns:
文件配置信息,包含: id, name, file_path, template_code
"""
import json
conn = self.get_connection()
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 查询文件配置template_code可能存储在template_code列或input_data的JSON字段中
sql = """
SELECT id, name, file_path, input_data, template_code
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
"""
cursor.execute(sql, (self.tenant_id,))
configs = cursor.fetchall()
# 查找匹配的template_code优先检查template_code列然后检查input_data JSON
for config in configs:
# 方法1: 检查template_code列
if config.get('template_code') == template_code:
return {
'id': config['id'],
'name': config['name'],
'file_path': config['file_path'],
'template_code': template_code
}
# 方法2: 从input_data的JSON中查找匹配的template_code
try:
input_data = json.loads(config['input_data']) if config['input_data'] else {}
if input_data.get('template_code') == template_code:
return {
'id': config['id'],
'name': config['name'],
'file_path': config['file_path'],
'template_code': template_code
}
except (json.JSONDecodeError, TypeError):
continue
return None
finally:
cursor.close()
conn.close()
def download_template_from_minio(self, file_path: str) -> str:
"""
从MinIO下载模板文件到临时目录
Args:
file_path: MinIO中的相对路径'/615873064429507639/TEMPLATE/2024/11/初步核实审批表模板.docx'
Returns:
本地临时文件路径
"""
client = self.get_minio_client()
# 创建临时文件
temp_dir = tempfile.gettempdir()
temp_file = os.path.join(temp_dir, f"template_{datetime.now().strftime('%Y%m%d%H%M%S')}.docx")
try:
# 从相对路径中提取对象名称(去掉开头的/
object_name = file_path.lstrip('/')
# 下载文件
client.fget_object(self.bucket_name, object_name, temp_file)
return temp_file
except S3Error as e:
raise Exception(f"从MinIO下载模板文件失败: {str(e)}")
def fill_template(self, template_path: str, field_data: Dict[str, str]) -> str:
"""
填充Word模板中的占位符
Args:
template_path: 模板文件路径
field_data: 字段数据字典,格式: {'field_code': 'field_value'}
Returns:
填充后的文档路径
"""
try:
# 打开模板文档
doc = Document(template_path)
# 替换占位符 {{field_code}} 为实际值
for paragraph in doc.paragraphs:
# 替换段落文本中的占位符
for field_code, field_value in field_data.items():
placeholder = f"{{{{{field_code}}}}}"
if placeholder in paragraph.text:
# 替换占位符
for run in paragraph.runs:
if placeholder in run.text:
run.text = run.text.replace(placeholder, field_value or '')
# 替换表格中的占位符
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
for field_code, field_value in field_data.items():
placeholder = f"{{{{{field_code}}}}}"
if placeholder in paragraph.text:
for run in paragraph.runs:
if placeholder in run.text:
run.text = run.text.replace(placeholder, field_value or '')
# 保存到临时文件
temp_dir = tempfile.gettempdir()
output_file = os.path.join(temp_dir, f"filled_{datetime.now().strftime('%Y%m%d%H%M%S')}.docx")
doc.save(output_file)
return output_file
except Exception as e:
raise Exception(f"填充模板失败: {str(e)}")
def upload_to_minio(self, file_path: str, file_name: str) -> str:
"""
上传文件到MinIO
Args:
file_path: 本地文件路径
file_name: 文件名称
Returns:
MinIO中的相对路径
"""
client = self.get_minio_client()
try:
# 生成MinIO对象路径相对路径
now = datetime.now()
# 使用日期路径组织文件
object_name = f"{self.tenant_id}/{now.strftime('%Y%m%d%H%M%S')}/{file_name}"
# 上传文件
client.fput_object(
self.bucket_name,
object_name,
file_path,
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
# 返回相对路径(以/开头)
return f"/{object_name}"
except S3Error as e:
raise Exception(f"上传文件到MinIO失败: {str(e)}")
def generate_document(self, template_code: str, input_data: List[Dict], file_info: Dict) -> Dict:
"""
生成文档
Args:
template_code: 模板编码
input_data: 输入数据列表,格式: [{'fieldCode': 'xxx', 'fieldValue': 'xxx'}]
file_info: 文件信息,格式: {'fileId': 1, 'fileName': 'xxx.doc', 'templateCode': 'xxx'}
Returns:
生成结果,包含: filePath
"""
# 获取文件配置
file_config = self.get_file_config_by_template_code(template_code)
if not file_config:
raise Exception(f"模板编码 {template_code} 不存在")
# 将input_data转换为字典格式
field_data = {}
for item in input_data:
field_code = item.get('fieldCode', '')
field_value = item.get('fieldValue', '')
if field_code:
field_data[field_code] = field_value or ''
# 下载模板
template_path = None
filled_doc_path = None
try:
template_path = self.download_template_from_minio(file_config['file_path'])
# 填充模板
filled_doc_path = self.fill_template(template_path, field_data)
# 生成文档名称(.docx格式
original_file_name = file_info.get('fileName', 'generated.doc')
generated_file_name = self.generate_document_name(original_file_name, field_data)
# 上传到MinIO使用生成的文档名
file_path = self.upload_to_minio(filled_doc_path, generated_file_name)
return {
'filePath': file_path,
'fileName': generated_file_name # 返回生成的文档名
}
finally:
# 清理临时文件
if template_path and os.path.exists(template_path):
try:
os.remove(template_path)
except:
pass
if filled_doc_path and os.path.exists(filled_doc_path):
try:
os.remove(filled_doc_path)
except:
pass
def generate_document_id(self) -> str:
"""生成文档ID"""
now = datetime.now()
return f"DOC{now.strftime('%Y%m%d%H%M%S')}{str(now.microsecond)[:3]}"
def generate_document_name(self, original_file_name: str, field_data: Dict[str, str]) -> str:
"""
生成文档名称
Args:
original_file_name: 原始文件名称
field_data: 字段数据
Returns:
生成的文档名称,如 "初步核实审批表_张三.docx"
"""
# 提取文件基础名称(不含扩展名)
base_name = Path(original_file_name).stem
# 尝试从字段数据中提取被核查人姓名作为后缀
suffix = ''
if 'target_name' in field_data and field_data['target_name']:
suffix = f"_{field_data['target_name']}"
# 生成新文件名
return f"{base_name}{suffix}.docx"