ai-business-write/update_all_templates.py

468 lines
15 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
更新 template_finish 目录下所有模板文件
重新上传到 MinIO 并更新数据库信息,确保模板文件是最新版本
"""
import os
import sys
import json
import pymysql
from minio import Minio
from minio.error import S3Error
from datetime import datetime
from pathlib import Path
from typing import Dict, List, Optional
# 设置控制台编码为UTF-8Windows兼容
if sys.platform == 'win32':
try:
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')
except:
pass
# MinIO连接配置
MINIO_CONFIG = {
'endpoint': 'minio.datacubeworld.com:9000',
'access_key': 'JOLXFXny3avFSzB0uRA5',
'secret_key': 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I',
'secure': True # 使用HTTPS
}
# 数据库连接配置
DB_CONFIG = {
'host': '152.136.177.240',
'port': 5012,
'user': 'finyx',
'password': '6QsGK6MpePZDE57Z',
'database': 'finyx',
'charset': 'utf8mb4'
}
# 固定值
TENANT_ID = 615873064429507639
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
BUCKET_NAME = 'finyx'
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
# 文档类型映射(根据完整文件名识别,保持原文件名不变)
# 每个文件名都是独立的模板使用完整文件名作为key
DOCUMENT_TYPE_MAPPING = {
"1.请示报告卡XXX": {
"template_code": "REPORT_CARD",
"name": "1.请示报告卡XXX",
"business_type": "INVESTIGATION"
},
"2.初步核实审批表XXX": {
"template_code": "PRELIMINARY_VERIFICATION_APPROVAL",
"name": "2.初步核实审批表XXX",
"business_type": "INVESTIGATION"
},
"3.附件初核方案(XXX)": {
"template_code": "INVESTIGATION_PLAN",
"name": "3.附件初核方案(XXX)",
"business_type": "INVESTIGATION"
},
"谈话通知书第一联": {
"template_code": "NOTIFICATION_LETTER_1",
"name": "谈话通知书第一联",
"business_type": "INVESTIGATION"
},
"谈话通知书第二联": {
"template_code": "NOTIFICATION_LETTER_2",
"name": "谈话通知书第二联",
"business_type": "INVESTIGATION"
},
"谈话通知书第三联": {
"template_code": "NOTIFICATION_LETTER_3",
"name": "谈话通知书第三联",
"business_type": "INVESTIGATION"
},
"1.请示报告卡(初核谈话)": {
"template_code": "REPORT_CARD_INTERVIEW",
"name": "1.请示报告卡(初核谈话)",
"business_type": "INVESTIGATION"
},
"2谈话审批表": {
"template_code": "INTERVIEW_APPROVAL_FORM",
"name": "2谈话审批表",
"business_type": "INVESTIGATION"
},
"3.谈话前安全风险评估表": {
"template_code": "PRE_INTERVIEW_RISK_ASSESSMENT",
"name": "3.谈话前安全风险评估表",
"business_type": "INVESTIGATION"
},
"4.谈话方案": {
"template_code": "INTERVIEW_PLAN",
"name": "4.谈话方案",
"business_type": "INVESTIGATION"
},
"5.谈话后安全风险评估表": {
"template_code": "POST_INTERVIEW_RISK_ASSESSMENT",
"name": "5.谈话后安全风险评估表",
"business_type": "INVESTIGATION"
},
"1.谈话笔录": {
"template_code": "INTERVIEW_RECORD",
"name": "1.谈话笔录",
"business_type": "INVESTIGATION"
},
"2.谈话询问对象情况摸底调查30问": {
"template_code": "INVESTIGATION_30_QUESTIONS",
"name": "2.谈话询问对象情况摸底调查30问",
"business_type": "INVESTIGATION"
},
"3.被谈话人权利义务告知书": {
"template_code": "RIGHTS_OBLIGATIONS_NOTICE",
"name": "3.被谈话人权利义务告知书",
"business_type": "INVESTIGATION"
},
"4.点对点交接单": {
"template_code": "HANDOVER_FORM",
"name": "4.点对点交接单",
"business_type": "INVESTIGATION"
},
"4.点对点交接单2": {
"template_code": "HANDOVER_FORM_2",
"name": "4.点对点交接单2",
"business_type": "INVESTIGATION"
},
"5.陪送交接单(新)": {
"template_code": "ESCORT_HANDOVER_FORM",
"name": "5.陪送交接单(新)",
"business_type": "INVESTIGATION"
},
"6.1保密承诺书(谈话对象使用-非中共党员用)": {
"template_code": "CONFIDENTIALITY_COMMITMENT_NON_PARTY",
"name": "6.1保密承诺书(谈话对象使用-非中共党员用)",
"business_type": "INVESTIGATION"
},
"6.2保密承诺书(谈话对象使用-中共党员用)": {
"template_code": "CONFIDENTIALITY_COMMITMENT_PARTY",
"name": "6.2保密承诺书(谈话对象使用-中共党员用)",
"business_type": "INVESTIGATION"
},
"7.办案人员-办案安全保密承诺书": {
"template_code": "INVESTIGATOR_CONFIDENTIALITY_COMMITMENT",
"name": "7.办案人员-办案安全保密承诺书",
"business_type": "INVESTIGATION"
},
"8-1请示报告卡初核报告结论 ": {
"template_code": "REPORT_CARD_CONCLUSION",
"name": "8-1请示报告卡初核报告结论 ",
"business_type": "INVESTIGATION"
},
"8.XXX初核情况报告": {
"template_code": "INVESTIGATION_REPORT",
"name": "8.XXX初核情况报告",
"business_type": "INVESTIGATION"
}
}
def identify_document_type(file_name: str) -> Optional[Dict]:
"""
根据完整文件名识别文档类型(保持原文件名不变)
Args:
file_name: 文件名(不含扩展名)
Returns:
文档类型配置如果无法识别返回None
"""
# 获取文件名(不含扩展名),保持原样
base_name = Path(file_name).stem
# 直接使用完整文件名进行精确匹配
if base_name in DOCUMENT_TYPE_MAPPING:
return DOCUMENT_TYPE_MAPPING[base_name]
# 如果精确匹配失败返回None不进行任何修改或模糊匹配
return None
def upload_to_minio(file_path: Path, minio_client: Minio) -> str:
"""
上传文件到MinIO覆盖已存在的文件
Args:
file_path: 本地文件路径
minio_client: MinIO客户端实例
Returns:
MinIO中的相对路径
"""
try:
# 检查存储桶是否存在
found = minio_client.bucket_exists(BUCKET_NAME)
if not found:
raise Exception(f"存储桶 '{BUCKET_NAME}' 不存在,请先创建")
# 生成MinIO对象路径使用当前日期确保是最新版本
now = datetime.now()
object_name = f'{TENANT_ID}/TEMPLATE/{now.year}/{now.month:02d}/{file_path.name}'
# 上传文件fput_object 会自动覆盖已存在的文件)
minio_client.fput_object(
BUCKET_NAME,
object_name,
str(file_path),
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
# 返回相对路径(以/开头)
return f"/{object_name}"
except S3Error as e:
raise Exception(f"MinIO错误: {e}")
except Exception as e:
raise Exception(f"上传文件时发生错误: {e}")
def update_file_config(conn, doc_config: Dict, file_path: str) -> int:
"""
更新或创建文件配置记录
Args:
conn: 数据库连接
doc_config: 文档配置
file_path: MinIO文件路径
Returns:
文件配置ID
"""
cursor = conn.cursor()
current_time = datetime.now()
try:
# 检查是否已存在(通过 template_code 查找)
select_sql = """
SELECT id, name, file_path FROM f_polic_file_config
WHERE tenant_id = %s AND template_code = %s
"""
cursor.execute(select_sql, (TENANT_ID, doc_config['template_code']))
existing = cursor.fetchone()
# 构建 input_data
input_data = json.dumps({
'template_code': doc_config['template_code'],
'business_type': doc_config['business_type']
}, ensure_ascii=False)
if existing:
file_config_id, old_name, old_path = existing
# 更新现有记录
update_sql = """
UPDATE f_polic_file_config
SET file_path = %s,
input_data = %s,
name = %s,
updated_time = %s,
updated_by = %s,
state = 1
WHERE id = %s AND tenant_id = %s
"""
cursor.execute(update_sql, (
file_path,
input_data,
doc_config['name'],
current_time,
UPDATED_BY,
file_config_id,
TENANT_ID
))
conn.commit()
print(f" [OK] 更新数据库记录 (ID: {file_config_id})")
if old_path != file_path:
print(f" 旧路径: {old_path}")
print(f" 新路径: {file_path}")
return file_config_id
else:
# 创建新记录
import time
import random
timestamp = int(time.time() * 1000)
random_part = random.randint(100000, 999999)
file_config_id = timestamp * 1000 + random_part
insert_sql = """
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, input_data, file_path, template_code,
created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
"""
cursor.execute(insert_sql, (
file_config_id,
TENANT_ID,
None, # parent_id
doc_config['name'],
input_data,
file_path,
doc_config['template_code'],
current_time,
CREATED_BY,
current_time,
CREATED_BY,
1 # state: 1表示启用
))
conn.commit()
print(f" [OK] 创建新数据库记录 (ID: {file_config_id})")
return file_config_id
except Exception as e:
conn.rollback()
raise Exception(f"更新数据库失败: {str(e)}")
finally:
cursor.close()
def update_all_templates():
"""
更新所有模板文件重新上传到MinIO并更新数据库
"""
print("="*80)
print("开始更新所有模板文件")
print("="*80)
print(f"模板目录: {TEMPLATES_DIR}")
print()
if not TEMPLATES_DIR.exists():
print(f"错误: 模板目录不存在: {TEMPLATES_DIR}")
return
# 连接数据库和MinIO
try:
conn = pymysql.connect(**DB_CONFIG)
print("[OK] 数据库连接成功")
minio_client = Minio(
MINIO_CONFIG['endpoint'],
access_key=MINIO_CONFIG['access_key'],
secret_key=MINIO_CONFIG['secret_key'],
secure=MINIO_CONFIG['secure']
)
# 检查存储桶
if not minio_client.bucket_exists(BUCKET_NAME):
raise Exception(f"存储桶 '{BUCKET_NAME}' 不存在,请先创建")
print("[OK] MinIO连接成功")
print()
except Exception as e:
print(f"[ERROR] 连接失败: {e}")
return
# 统计信息
processed_count = 0
updated_count = 0
created_count = 0
skipped_count = 0
failed_count = 0
failed_files = []
# 遍历所有.docx文件
print("="*80)
print("开始处理模板文件...")
print("="*80)
print()
for root, dirs, files in os.walk(TEMPLATES_DIR):
for file in files:
# 只处理.docx文件跳过临时文件
if not file.endswith('.docx') or file.startswith('~$'):
continue
file_path = Path(root) / file
# 识别文档类型
doc_config = identify_document_type(file)
if not doc_config:
print(f"\n[{processed_count + skipped_count + failed_count + 1}] [WARN] 跳过: {file}")
print(f" 原因: 无法识别文档类型")
print(f" 路径: {file_path}")
skipped_count += 1
continue
processed_count += 1
print(f"\n[{processed_count}] 处理: {file}")
print(f" 类型: {doc_config.get('template_code', 'UNKNOWN')}")
print(f" 名称: {doc_config.get('name', 'UNKNOWN')}")
print(f" 路径: {file_path}")
try:
# 检查文件是否存在
if not file_path.exists():
raise FileNotFoundError(f"文件不存在: {file_path}")
# 获取文件信息
file_size = file_path.stat().st_size
file_mtime = datetime.fromtimestamp(file_path.stat().st_mtime)
print(f" 大小: {file_size:,} 字节")
print(f" 修改时间: {file_mtime.strftime('%Y-%m-%d %H:%M:%S')}")
# 上传到MinIO覆盖旧版本
print(f" 上传到MinIO...")
minio_path = upload_to_minio(file_path, minio_client)
print(f" [OK] MinIO路径: {minio_path}")
# 更新数据库
print(f" 更新数据库...")
file_config_id = update_file_config(conn, doc_config, minio_path)
# 判断是更新还是创建
cursor = conn.cursor()
check_sql = """
SELECT created_time, updated_time FROM f_polic_file_config
WHERE id = %s
"""
cursor.execute(check_sql, (file_config_id,))
result = cursor.fetchone()
cursor.close()
if result:
created_time, updated_time = result
if created_time == updated_time:
created_count += 1
else:
updated_count += 1
print(f" [OK] 处理成功 (配置ID: {file_config_id})")
except Exception as e:
failed_count += 1
failed_files.append((str(file_path), str(e)))
print(f" [ERROR] 处理失败: {e}")
import traceback
traceback.print_exc()
# 关闭数据库连接
conn.close()
# 输出统计信息
print("\n" + "="*80)
print("更新完成")
print("="*80)
print(f"总处理数: {processed_count}")
print(f" 成功更新: {updated_count}")
print(f" 成功创建: {created_count}")
print(f" 跳过: {skipped_count}")
print(f" 失败: {failed_count}")
if failed_files:
print("\n失败的文件:")
for file_path, error in failed_files:
print(f" - {file_path}")
print(f" 错误: {error}")
print("\n所有模板文件已更新到最新版本!")
if __name__ == '__main__':
update_all_templates()