404 lines
13 KiB
Python
404 lines
13 KiB
Python
"""
|
||
更新两个模板文件的信息并上传到MinIO
|
||
- 8.XXX初核情况报告.docx
|
||
- 8-1请示报告卡(初核报告结论) .docx
|
||
"""
|
||
import os
|
||
import re
|
||
import json
|
||
import sys
|
||
import pymysql
|
||
from minio import Minio
|
||
from minio.error import S3Error
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from docx import Document
|
||
from typing import Dict, List, Optional
|
||
|
||
# 设置输出编码为UTF-8(Windows兼容)
|
||
if sys.platform == 'win32':
|
||
import io
|
||
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8', errors='replace')
|
||
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8', errors='replace')
|
||
|
||
# MinIO连接配置
|
||
MINIO_CONFIG = {
|
||
'endpoint': 'minio.datacubeworld.com:9000',
|
||
'access_key': 'JOLXFXny3avFSzB0uRA5',
|
||
'secret_key': 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I',
|
||
'secure': True
|
||
}
|
||
|
||
# 数据库连接配置
|
||
DB_CONFIG = {
|
||
'host': '152.136.177.240',
|
||
'port': 5012,
|
||
'user': 'finyx',
|
||
'password': '6QsGK6MpePZDE57Z',
|
||
'database': 'finyx',
|
||
'charset': 'utf8mb4'
|
||
}
|
||
|
||
# 固定值
|
||
TENANT_ID = 615873064429507639
|
||
CREATED_BY = 655162080928945152
|
||
UPDATED_BY = 655162080928945152
|
||
BUCKET_NAME = 'finyx'
|
||
|
||
# 要处理的模板文件
|
||
TEMPLATE_FILES = [
|
||
'template_finish/2-初核模版/3.初核结论/8.XXX初核情况报告.docx',
|
||
'template_finish/2-初核模版/3.初核结论/8-1请示报告卡(初核报告结论) .docx'
|
||
]
|
||
|
||
# 模板名称映射(用于查找数据库中的记录)
|
||
TEMPLATE_NAME_MAP = {
|
||
'8.XXX初核情况报告.docx': ['8.XXX初核情况报告', 'XXX初核情况报告'],
|
||
'8-1请示报告卡(初核报告结论) .docx': ['8-1请示报告卡(初核报告结论) ', '请示报告卡(初核报告结论)']
|
||
}
|
||
|
||
|
||
def generate_id():
|
||
"""生成ID"""
|
||
import time
|
||
import random
|
||
timestamp = int(time.time() * 1000)
|
||
random_part = random.randint(100000, 999999)
|
||
return timestamp * 1000 + random_part
|
||
|
||
|
||
def extract_placeholders_from_docx(file_path: str) -> List[str]:
|
||
"""
|
||
从docx文件中提取所有占位符
|
||
|
||
Args:
|
||
file_path: docx文件路径
|
||
|
||
Returns:
|
||
占位符列表,格式: ['field_code1', 'field_code2', ...]
|
||
"""
|
||
placeholders = set()
|
||
pattern = r'\{\{([^}]+)\}\}' # 匹配 {{field_code}} 格式
|
||
|
||
try:
|
||
doc = Document(file_path)
|
||
|
||
# 从段落中提取占位符
|
||
for paragraph in doc.paragraphs:
|
||
text = paragraph.text
|
||
matches = re.findall(pattern, text)
|
||
for match in matches:
|
||
cleaned = match.strip()
|
||
# 过滤掉不完整的占位符(包含 { 或 } 的)
|
||
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||
placeholders.add(cleaned)
|
||
|
||
# 从表格中提取占位符
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for paragraph in cell.paragraphs:
|
||
text = paragraph.text
|
||
matches = re.findall(pattern, text)
|
||
for match in matches:
|
||
cleaned = match.strip()
|
||
# 过滤掉不完整的占位符(包含 { 或 } 的)
|
||
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||
placeholders.add(cleaned)
|
||
|
||
except Exception as e:
|
||
print(f" 错误: 读取文件失败 - {str(e)}")
|
||
return []
|
||
|
||
return sorted(list(placeholders))
|
||
|
||
|
||
def normalize_template_name(file_name: str) -> str:
|
||
"""
|
||
标准化模板名称(去掉扩展名、括号内容、数字前缀等)
|
||
|
||
Args:
|
||
file_name: 文件名,如 "8.XXX初核情况报告.docx"
|
||
|
||
Returns:
|
||
标准化后的名称,如 "XXX初核情况报告"
|
||
"""
|
||
# 去掉扩展名
|
||
name = Path(file_name).stem
|
||
|
||
# 去掉括号内容
|
||
name = re.sub(r'[((].*?[))]', '', name)
|
||
name = name.strip()
|
||
|
||
# 去掉数字前缀和点号
|
||
name = re.sub(r'^\d+[\.\-]?\s*', '', name)
|
||
name = name.strip()
|
||
|
||
return name
|
||
|
||
|
||
def upload_to_minio(client: Minio, file_path: str, template_name: str) -> str:
|
||
"""上传文件到MinIO"""
|
||
try:
|
||
now = datetime.now()
|
||
object_name = f'{TENANT_ID}/TEMPLATE/{now.year}/{now.month:02d}/{template_name}'
|
||
|
||
client.fput_object(
|
||
BUCKET_NAME,
|
||
object_name,
|
||
file_path,
|
||
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||
)
|
||
|
||
return f"/{object_name}"
|
||
|
||
except Exception as e:
|
||
raise Exception(f"上传到MinIO失败: {str(e)}")
|
||
|
||
|
||
def find_template_by_names(conn, possible_names: List[str]) -> Optional[Dict]:
|
||
"""根据可能的模板名称查找数据库中的模板"""
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
try:
|
||
# 尝试每个可能的名称
|
||
for name in possible_names:
|
||
sql = """
|
||
SELECT id, name, file_path, parent_id, input_data
|
||
FROM f_polic_file_config
|
||
WHERE tenant_id = %s AND name = %s
|
||
"""
|
||
cursor.execute(sql, (TENANT_ID, name))
|
||
result = cursor.fetchone()
|
||
if result:
|
||
return result
|
||
|
||
return None
|
||
finally:
|
||
cursor.close()
|
||
|
||
|
||
def get_template_code_from_input_data(input_data: Optional[str]) -> str:
|
||
"""从input_data中提取template_code,如果没有则生成一个"""
|
||
if input_data:
|
||
try:
|
||
data = json.loads(input_data)
|
||
return data.get('template_code', '')
|
||
except:
|
||
pass
|
||
return ''
|
||
|
||
|
||
def update_template(conn, template_file_path: str, template_info: Dict, minio_path: str):
|
||
"""
|
||
更新模板配置
|
||
|
||
Args:
|
||
conn: 数据库连接
|
||
template_file_path: 模板文件路径
|
||
template_info: 模板信息(包含占位符等)
|
||
minio_path: MinIO中的文件路径
|
||
"""
|
||
cursor = conn.cursor()
|
||
|
||
try:
|
||
file_name = Path(template_file_path).name
|
||
possible_names = TEMPLATE_NAME_MAP.get(file_name, [normalize_template_name(file_name)])
|
||
|
||
# 查找现有记录
|
||
existing_template = find_template_by_names(conn, possible_names)
|
||
|
||
if not existing_template:
|
||
print(f" [WARN] 未找到数据库记录,将创建新记录")
|
||
template_id = generate_id()
|
||
template_name = possible_names[0] # 使用第一个名称
|
||
|
||
# 生成template_code
|
||
template_code = get_template_code_from_input_data(None)
|
||
if not template_code:
|
||
# 根据文件名生成template_code
|
||
if 'XXX初核情况报告' in file_name:
|
||
template_code = 'INVESTIGATION_REPORT'
|
||
elif '请示报告卡' in file_name and '初核报告结论' in file_name:
|
||
template_code = 'REPORT_CARD_CONCLUSION'
|
||
else:
|
||
template_code = f'TEMPLATE_{template_id % 1000000}'
|
||
|
||
# 准备input_data
|
||
input_data = json.dumps({
|
||
'template_code': template_code,
|
||
'business_type': 'INVESTIGATION',
|
||
'placeholders': template_info['placeholders']
|
||
}, ensure_ascii=False)
|
||
|
||
# 创建新记录
|
||
insert_sql = """
|
||
INSERT INTO f_polic_file_config
|
||
(id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state)
|
||
VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, %s)
|
||
"""
|
||
cursor.execute(insert_sql, (
|
||
template_id,
|
||
TENANT_ID,
|
||
None, # parent_id 需要根据实际情况设置
|
||
template_name,
|
||
input_data,
|
||
minio_path,
|
||
CREATED_BY,
|
||
UPDATED_BY,
|
||
1 # state: 1表示启用
|
||
))
|
||
print(f" [OK] 创建模板配置: {template_name}, ID: {template_id}")
|
||
conn.commit()
|
||
return template_id
|
||
else:
|
||
# 更新现有记录
|
||
template_id = existing_template['id']
|
||
template_name = existing_template['name']
|
||
|
||
# 获取现有的template_code
|
||
existing_input_data = existing_template.get('input_data')
|
||
template_code = get_template_code_from_input_data(existing_input_data)
|
||
if not template_code:
|
||
# 根据文件名生成template_code
|
||
if 'XXX初核情况报告' in file_name:
|
||
template_code = 'INVESTIGATION_REPORT'
|
||
elif '请示报告卡' in file_name and '初核报告结论' in file_name:
|
||
template_code = 'REPORT_CARD_CONCLUSION'
|
||
else:
|
||
template_code = f'TEMPLATE_{template_id % 1000000}'
|
||
|
||
# 准备input_data
|
||
input_data = json.dumps({
|
||
'template_code': template_code,
|
||
'business_type': 'INVESTIGATION',
|
||
'placeholders': template_info['placeholders']
|
||
}, ensure_ascii=False)
|
||
|
||
update_sql = """
|
||
UPDATE f_polic_file_config
|
||
SET file_path = %s, input_data = %s, updated_time = NOW(), updated_by = %s, state = 1
|
||
WHERE id = %s AND tenant_id = %s
|
||
"""
|
||
cursor.execute(update_sql, (
|
||
minio_path,
|
||
input_data,
|
||
UPDATED_BY,
|
||
template_id,
|
||
TENANT_ID
|
||
))
|
||
print(f" [OK] 更新模板配置: {template_name}, ID: {template_id}")
|
||
print(f" 占位符数量: {len(template_info['placeholders'])}")
|
||
if template_info['placeholders']:
|
||
print(f" 占位符: {', '.join(template_info['placeholders'][:10])}{'...' if len(template_info['placeholders']) > 10 else ''}")
|
||
conn.commit()
|
||
return template_id
|
||
|
||
except Exception as e:
|
||
conn.rollback()
|
||
raise Exception(f"更新模板配置失败: {str(e)}")
|
||
finally:
|
||
cursor.close()
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print("=" * 80)
|
||
print("更新模板文件信息并上传到MinIO")
|
||
print("=" * 80)
|
||
|
||
# 连接数据库
|
||
try:
|
||
conn = pymysql.connect(**DB_CONFIG)
|
||
print("✓ 数据库连接成功")
|
||
except Exception as e:
|
||
print(f"✗ 数据库连接失败: {str(e)}")
|
||
return
|
||
|
||
# 创建MinIO客户端
|
||
try:
|
||
minio_client = Minio(
|
||
MINIO_CONFIG['endpoint'],
|
||
access_key=MINIO_CONFIG['access_key'],
|
||
secret_key=MINIO_CONFIG['secret_key'],
|
||
secure=MINIO_CONFIG['secure']
|
||
)
|
||
|
||
# 检查存储桶是否存在
|
||
found = minio_client.bucket_exists(BUCKET_NAME)
|
||
if not found:
|
||
print(f"✗ 存储桶 '{BUCKET_NAME}' 不存在")
|
||
conn.close()
|
||
return
|
||
print("✓ MinIO连接成功")
|
||
except Exception as e:
|
||
print(f"✗ MinIO连接失败: {str(e)}")
|
||
conn.close()
|
||
return
|
||
|
||
# 处理每个模板文件
|
||
success_count = 0
|
||
failed_files = []
|
||
|
||
for template_file in TEMPLATE_FILES:
|
||
print(f"\n{'=' * 80}")
|
||
print(f"处理模板: {template_file}")
|
||
print(f"{'=' * 80}")
|
||
|
||
# 检查文件是否存在
|
||
if not os.path.exists(template_file):
|
||
print(f" [ERROR] 文件不存在: {template_file}")
|
||
failed_files.append(template_file)
|
||
continue
|
||
|
||
try:
|
||
# 提取占位符
|
||
print(f" 正在提取占位符...")
|
||
placeholders = extract_placeholders_from_docx(template_file)
|
||
print(f" ✓ 提取到 {len(placeholders)} 个占位符")
|
||
if placeholders:
|
||
print(f" 占位符: {', '.join(placeholders[:10])}{'...' if len(placeholders) > 10 else ''}")
|
||
|
||
# 准备模板信息
|
||
file_name = Path(template_file).name
|
||
template_info = {
|
||
'file_path': template_file,
|
||
'file_name': file_name,
|
||
'placeholders': placeholders
|
||
}
|
||
|
||
# 上传到MinIO
|
||
print(f" 正在上传到MinIO...")
|
||
minio_path = upload_to_minio(minio_client, template_file, file_name)
|
||
print(f" ✓ 上传成功: {minio_path}")
|
||
|
||
# 更新数据库
|
||
print(f" 正在更新数据库...")
|
||
template_id = update_template(conn, template_file, template_info, minio_path)
|
||
print(f" ✓ 更新成功,模板ID: {template_id}")
|
||
|
||
success_count += 1
|
||
|
||
except Exception as e:
|
||
print(f" [ERROR] 处理失败: {str(e)}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
failed_files.append(template_file)
|
||
|
||
# 总结
|
||
print(f"\n{'=' * 80}")
|
||
print("处理完成")
|
||
print(f"{'=' * 80}")
|
||
print(f"成功: {success_count}/{len(TEMPLATE_FILES)}")
|
||
if failed_files:
|
||
print(f"失败的文件:")
|
||
for file in failed_files:
|
||
print(f" - {file}")
|
||
|
||
conn.close()
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main()
|
||
|