ai-business-write/services/document_service.py

332 lines
12 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
文档生成服务 - 处理Word模板填充和MinIO文件上传
"""
import os
import re
import tempfile
from typing import Dict, List, Optional
from datetime import datetime, timedelta
from pathlib import Path
from docx import Document
from minio import Minio
from minio.error import S3Error
import pymysql
class DocumentService:
"""文档生成服务类"""
def __init__(self):
# MinIO配置
self.minio_config = {
'endpoint': os.getenv('MINIO_ENDPOINT', 'minio.datacubeworld.com:9000'),
'access_key': os.getenv('MINIO_ACCESS_KEY', 'JOLXFXny3avFSzB0uRA5'),
'secret_key': os.getenv('MINIO_SECRET_KEY', 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I'),
'secure': os.getenv('MINIO_SECURE', 'true').lower() == 'true'
}
self.bucket_name = os.getenv('MINIO_BUCKET', 'finyx')
# 数据库配置
self.db_config = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
self.tenant_id = 615873064429507639
def get_connection(self):
"""获取数据库连接"""
return pymysql.connect(**self.db_config)
def get_minio_client(self):
"""获取MinIO客户端"""
return Minio(
self.minio_config['endpoint'],
access_key=self.minio_config['access_key'],
secret_key=self.minio_config['secret_key'],
secure=self.minio_config['secure']
)
def get_file_config_by_id(self, file_id: int) -> Optional[Dict]:
"""
根据文件ID获取文件配置
Args:
file_id: 文件配置ID
Returns:
文件配置信息,包含: id, name, file_path
"""
conn = self.get_connection()
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE id = %s
AND tenant_id = %s
AND state = 1
"""
cursor.execute(sql, (file_id, self.tenant_id))
config = cursor.fetchone()
if config:
return {
'id': config['id'],
'name': config['name'],
'file_path': config['file_path']
}
return None
finally:
cursor.close()
conn.close()
def download_template_from_minio(self, file_path: str) -> str:
"""
从MinIO下载模板文件到临时目录
Args:
file_path: MinIO中的相对路径'/615873064429507639/TEMPLATE/2024/11/初步核实审批表模板.docx'
Returns:
本地临时文件路径
"""
# 检查file_path是否为None或空
if not file_path:
raise Exception("模板文件路径不能为空请检查数据库中模板配置的file_path字段")
client = self.get_minio_client()
# 创建临时文件
temp_dir = tempfile.gettempdir()
temp_file = os.path.join(temp_dir, f"template_{datetime.now().strftime('%Y%m%d%H%M%S')}.docx")
try:
# 从相对路径中提取对象名称(去掉开头的/
object_name = file_path.lstrip('/')
# 下载文件
client.fget_object(self.bucket_name, object_name, temp_file)
return temp_file
except S3Error as e:
raise Exception(f"从MinIO下载模板文件失败: {str(e)}")
def fill_template(self, template_path: str, field_data: Dict[str, str]) -> str:
"""
填充Word模板中的占位符
Args:
template_path: 模板文件路径
field_data: 字段数据字典,格式: {'field_code': 'field_value'}
Returns:
填充后的文档路径
"""
try:
# 打开模板文档
doc = Document(template_path)
# 替换占位符 {{field_code}} 为实际值
for paragraph in doc.paragraphs:
# 替换段落文本中的占位符
for field_code, field_value in field_data.items():
placeholder = f"{{{{{field_code}}}}}"
if placeholder in paragraph.text:
# 替换占位符
for run in paragraph.runs:
if placeholder in run.text:
run.text = run.text.replace(placeholder, field_value or '')
# 替换表格中的占位符
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
for field_code, field_value in field_data.items():
placeholder = f"{{{{{field_code}}}}}"
if placeholder in paragraph.text:
for run in paragraph.runs:
if placeholder in run.text:
run.text = run.text.replace(placeholder, field_value or '')
# 保存到临时文件
temp_dir = tempfile.gettempdir()
output_file = os.path.join(temp_dir, f"filled_{datetime.now().strftime('%Y%m%d%H%M%S')}.docx")
doc.save(output_file)
return output_file
except Exception as e:
raise Exception(f"填充模板失败: {str(e)}")
def upload_to_minio(self, file_path: str, file_name: str) -> str:
"""
上传文件到MinIO
Args:
file_path: 本地文件路径
file_name: 文件名称
Returns:
MinIO中的相对路径
"""
client = self.get_minio_client()
try:
# 生成MinIO对象路径相对路径
now = datetime.now()
# 使用日期路径组织文件
object_name = f"{self.tenant_id}/{now.strftime('%Y%m%d%H%M%S')}/{file_name}"
# 上传文件
client.fput_object(
self.bucket_name,
object_name,
file_path,
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
# 返回相对路径(以/开头)
return f"/{object_name}"
except S3Error as e:
raise Exception(f"上传文件到MinIO失败: {str(e)}")
def generate_document(self, file_id: int, input_data: List[Dict], file_info: Dict) -> Dict:
"""
生成文档
Args:
file_id: 文件配置ID
input_data: 输入数据列表,格式: [{'fieldCode': 'xxx', 'fieldValue': 'xxx'}]
file_info: 文件信息,格式: {'fileId': 1, 'fileName': 'xxx.doc'}
Returns:
生成结果,包含: filePath
"""
# 获取文件配置
file_config = self.get_file_config_by_id(file_id)
if not file_config:
raise Exception(f"文件ID {file_id} 对应的模板不存在或未启用")
# 检查file_path是否存在
file_path = file_config.get('file_path')
if not file_path:
raise Exception(f"文件ID {file_id} ({file_config.get('name', '')}) 的文件路径(file_path)为空,请检查数据库配置")
# 将input_data转换为字典格式
field_data = {}
for item in input_data:
field_code = item.get('fieldCode', '')
field_value = item.get('fieldValue', '')
if field_code:
field_data[field_code] = field_value or ''
# 下载模板
template_path = None
filled_doc_path = None
try:
template_path = self.download_template_from_minio(file_path)
# 填充模板
filled_doc_path = self.fill_template(template_path, field_data)
# 生成文档名称(.docx格式
original_file_name = file_info.get('fileName', 'generated.doc')
generated_file_name = self.generate_document_name(original_file_name, field_data)
# 上传到MinIO使用生成的文档名
file_path = self.upload_to_minio(filled_doc_path, generated_file_name)
# 生成预签名下载URL
download_url = self.generate_presigned_download_url(file_path)
return {
'filePath': file_path,
'fileName': generated_file_name, # 返回生成的文档名
'downloadUrl': download_url # 返回预签名下载URL
}
finally:
# 清理临时文件
if template_path and os.path.exists(template_path):
try:
os.remove(template_path)
except:
pass
if filled_doc_path and os.path.exists(filled_doc_path):
try:
os.remove(filled_doc_path)
except:
pass
def generate_document_id(self) -> str:
"""生成文档ID"""
now = datetime.now()
return f"DOC{now.strftime('%Y%m%d%H%M%S')}{str(now.microsecond)[:3]}"
def generate_document_name(self, original_file_name: str, field_data: Dict[str, str]) -> str:
"""
生成文档名称
Args:
original_file_name: 原始文件名称
field_data: 字段数据
Returns:
生成的文档名称,如 "初步核实审批表_张三.docx"
"""
# 提取文件基础名称(不含扩展名)
base_name = Path(original_file_name).stem
# 尝试从字段数据中提取被核查人姓名作为后缀
suffix = ''
if 'target_name' in field_data and field_data['target_name']:
suffix = f"_{field_data['target_name']}"
# 生成新文件名
return f"{base_name}{suffix}.docx"
def generate_presigned_download_url(self, file_path: str, expires_days: int = 7) -> Optional[str]:
"""
生成MinIO预签名下载URL
Args:
file_path: MinIO中的相对路径'/615873064429507639/20251205090700/初步核实审批表_张三.docx'
expires_days: URL有效期天数默认7天
Returns:
预签名下载URL如果生成失败则返回None
"""
try:
if not file_path:
return None
client = self.get_minio_client()
# 从相对路径中提取对象名称(去掉开头的/
object_name = file_path.lstrip('/')
# 生成预签名URL
url = client.presigned_get_object(
self.bucket_name,
object_name,
expires=timedelta(days=expires_days)
)
return url
except Exception as e:
# 如果生成URL失败记录错误但不影响主流程
print(f"生成预签名URL失败: {str(e)}")
return None