351 lines
12 KiB
Python
351 lines
12 KiB
Python
"""
|
||
批量将template_finish文件夹下的模板文件注册到数据库并上传到MinIO
|
||
"""
|
||
import os
|
||
import re
|
||
import json
|
||
import pymysql
|
||
from minio import Minio
|
||
from minio.error import S3Error
|
||
from datetime import datetime
|
||
from pathlib import Path
|
||
from docx import Document
|
||
from typing import Dict, List, Optional
|
||
|
||
|
||
# MinIO连接配置
|
||
MINIO_CONFIG = {
|
||
'endpoint': 'minio.datacubeworld.com:9000',
|
||
'access_key': 'JOLXFXny3avFSzB0uRA5',
|
||
'secret_key': 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I',
|
||
'secure': True
|
||
}
|
||
|
||
# 数据库连接配置
|
||
DB_CONFIG = {
|
||
'host': '152.136.177.240',
|
||
'port': 5012,
|
||
'user': 'finyx',
|
||
'password': '6QsGK6MpePZDE57Z',
|
||
'database': 'finyx',
|
||
'charset': 'utf8mb4'
|
||
}
|
||
|
||
# 固定值
|
||
TENANT_ID = 615873064429507639
|
||
CREATED_BY = 655162080928945152
|
||
UPDATED_BY = 655162080928945152
|
||
CURRENT_TIME = datetime.now()
|
||
BUCKET_NAME = 'finyx'
|
||
|
||
|
||
def generate_id():
|
||
"""生成ID"""
|
||
import time
|
||
import random
|
||
timestamp = int(time.time() * 1000)
|
||
random_part = random.randint(100000, 999999)
|
||
return timestamp * 1000 + random_part
|
||
|
||
|
||
def extract_placeholders_from_docx(file_path):
|
||
"""从docx文件中提取所有占位符"""
|
||
placeholders = set()
|
||
pattern = r'\{\{([^}]+)\}\}'
|
||
|
||
try:
|
||
doc = Document(file_path)
|
||
|
||
# 从段落中提取占位符
|
||
for paragraph in doc.paragraphs:
|
||
text = paragraph.text
|
||
matches = re.findall(pattern, text)
|
||
for match in matches:
|
||
placeholders.add(match.strip())
|
||
|
||
# 从表格中提取占位符
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for paragraph in cell.paragraphs:
|
||
text = paragraph.text
|
||
matches = re.findall(pattern, text)
|
||
for match in matches:
|
||
placeholders.add(match.strip())
|
||
|
||
except Exception as e:
|
||
print(f" 错误: 读取文件失败 - {str(e)}")
|
||
return []
|
||
|
||
return sorted(list(placeholders))
|
||
|
||
|
||
def generate_template_code(file_name: str, relative_path: str) -> str:
|
||
"""
|
||
根据文件名和路径生成模板编码
|
||
|
||
例如:
|
||
- "2.初步核实审批表(XXX).docx" -> "PRELIMINARY_VERIFICATION_APPROVAL"
|
||
- "1.请示报告卡(XXX).docx" -> "REQUEST_REPORT_CARD"
|
||
"""
|
||
# 提取基础名称(去掉扩展名和括号内容)
|
||
base_name = Path(file_name).stem
|
||
base_name = re.sub(r'(.*?)', '', base_name) # 去掉括号内容
|
||
base_name = re.sub(r'\(.*?\)', '', base_name) # 去掉英文括号内容
|
||
base_name = base_name.strip().rstrip('(').rstrip('(')
|
||
|
||
# 去掉数字前缀
|
||
base_name = re.sub(r'^\d+[\.\-]?', '', base_name).strip()
|
||
|
||
# 生成编码:转换为大写,中文字符映射
|
||
code_mapping = {
|
||
'请示报告卡': 'REQUEST_REPORT_CARD',
|
||
'初步核实审批表': 'PRELIMINARY_VERIFICATION_APPROVAL',
|
||
'附件初核方案': 'PRELIMINARY_VERIFICATION_PLAN',
|
||
'谈话通知书': 'INTERVIEW_NOTICE',
|
||
'谈话审批表': 'INTERVIEW_APPROVAL',
|
||
'谈话前安全风险评估表': 'PRE_INTERVIEW_RISK_ASSESSMENT',
|
||
'谈话方案': 'INTERVIEW_PLAN',
|
||
'谈话后安全风险评估表': 'POST_INTERVIEW_RISK_ASSESSMENT',
|
||
'谈话笔录': 'INTERVIEW_RECORD',
|
||
'谈话询问对象情况摸底调查30问': 'INTERVIEW_OBJECT_INVESTIGATION',
|
||
'被谈话人权利义务告知书': 'INTERVIEWEE_RIGHTS_OBLIGATIONS_NOTICE',
|
||
'点对点交接单': 'POINT_TO_POINT_HANDOVER',
|
||
'陪送交接单': 'ESCORT_HANDOVER',
|
||
'保密承诺书': 'CONFIDENTIALITY_COMMITMENT',
|
||
'办案人员-办案安全保密承诺书': 'CASE_OFFICER_SECURITY_COMMITMENT',
|
||
'请示报告卡(初核谈话)': 'REQUEST_REPORT_CARD_INTERVIEW',
|
||
'请示报告卡(初核报告结论)': 'REQUEST_REPORT_CARD_CONCLUSION',
|
||
'XXX初核情况报告': 'PRELIMINARY_VERIFICATION_REPORT'
|
||
}
|
||
|
||
# 查找映射
|
||
for key, code in code_mapping.items():
|
||
if key in base_name:
|
||
# 如果是谈话通知书,可能需要区分第几联
|
||
if '谈话通知书' in base_name:
|
||
if '第一联' in base_name:
|
||
return 'INTERVIEW_NOTICE_FIRST'
|
||
elif '第二联' in base_name:
|
||
return 'INTERVIEW_NOTICE_SECOND'
|
||
elif '第三联' in base_name:
|
||
return 'INTERVIEW_NOTICE_THIRD'
|
||
# 如果是保密承诺书,区分是否党员
|
||
if '保密承诺书' in base_name:
|
||
if '非中共党员' in base_name or '非党员' in base_name:
|
||
return 'CONFIDENTIALITY_COMMITMENT_NON_PARTY'
|
||
elif '中共党员' in base_name or '党员' in base_name:
|
||
return 'CONFIDENTIALITY_COMMITMENT_PARTY'
|
||
return code
|
||
|
||
# 如果没有匹配,使用通用规则生成
|
||
# 将中文转换为拼音首字母(简化处理,实际应使用pypinyin)
|
||
# 这里先使用简化规则
|
||
code = base_name.upper()
|
||
code = re.sub(r'[^\w]', '_', code)
|
||
code = re.sub(r'_+', '_', code).strip('_')
|
||
|
||
return code if code else f'TEMPLATE_{generate_id() % 1000000}'
|
||
|
||
|
||
def upload_to_minio(client: Minio, file_path: str, template_name: str) -> str:
|
||
"""上传文件到MinIO"""
|
||
try:
|
||
now = datetime.now()
|
||
object_name = f'{TENANT_ID}/TEMPLATE/{now.year}/{now.month:02d}/{template_name}'
|
||
|
||
client.fput_object(
|
||
BUCKET_NAME,
|
||
object_name,
|
||
file_path,
|
||
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||
)
|
||
|
||
return f"/{object_name}"
|
||
|
||
except Exception as e:
|
||
raise Exception(f"上传到MinIO失败: {str(e)}")
|
||
|
||
|
||
def register_template_to_db(conn, template_info: Dict) -> int:
|
||
"""注册模板到数据库"""
|
||
cursor = conn.cursor()
|
||
|
||
try:
|
||
# 检查是否已存在
|
||
check_sql = """
|
||
SELECT id FROM f_polic_file_config
|
||
WHERE tenant_id = %s AND name = %s
|
||
"""
|
||
cursor.execute(check_sql, (TENANT_ID, template_info['name']))
|
||
existing = cursor.fetchone()
|
||
|
||
if existing:
|
||
file_config_id = existing[0]
|
||
# 更新现有记录
|
||
update_sql = """
|
||
UPDATE f_polic_file_config
|
||
SET file_path = %s, input_data = %s, updated_time = %s, updated_by = %s, state = 1
|
||
WHERE id = %s AND tenant_id = %s
|
||
"""
|
||
cursor.execute(update_sql, (
|
||
template_info['file_path'],
|
||
template_info['input_data'],
|
||
CURRENT_TIME,
|
||
UPDATED_BY,
|
||
file_config_id,
|
||
TENANT_ID
|
||
))
|
||
print(f" ✓ 更新文件配置: {template_info['name']}, ID: {file_config_id}")
|
||
else:
|
||
file_config_id = generate_id()
|
||
insert_sql = """
|
||
INSERT INTO f_polic_file_config
|
||
(id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state)
|
||
VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
|
||
"""
|
||
cursor.execute(insert_sql, (
|
||
file_config_id,
|
||
TENANT_ID,
|
||
template_info.get('parent_id'),
|
||
template_info['name'],
|
||
template_info['input_data'],
|
||
template_info['file_path'],
|
||
CURRENT_TIME,
|
||
CREATED_BY,
|
||
CURRENT_TIME,
|
||
CREATED_BY,
|
||
1 # state: 1表示启用
|
||
))
|
||
print(f" ✓ 创建文件配置: {template_info['name']}, ID: {file_config_id}")
|
||
|
||
conn.commit()
|
||
return file_config_id
|
||
|
||
except Exception as e:
|
||
conn.rollback()
|
||
raise Exception(f"注册到数据库失败: {str(e)}")
|
||
|
||
|
||
def process_templates_in_directory(base_dir: str):
|
||
"""处理目录下所有模板文件"""
|
||
base_path = Path(base_dir)
|
||
if not base_path.exists():
|
||
print(f"错误: 目录不存在 - {base_dir}")
|
||
return
|
||
|
||
# 连接数据库和MinIO
|
||
print("=" * 80)
|
||
print("连接数据库和MinIO...")
|
||
print("=" * 80)
|
||
|
||
conn = pymysql.connect(**DB_CONFIG)
|
||
minio_client = Minio(
|
||
MINIO_CONFIG['endpoint'],
|
||
access_key=MINIO_CONFIG['access_key'],
|
||
secret_key=MINIO_CONFIG['secret_key'],
|
||
secure=MINIO_CONFIG['secure']
|
||
)
|
||
|
||
# 检查存储桶
|
||
if not minio_client.bucket_exists(BUCKET_NAME):
|
||
print(f"错误: 存储桶 '{BUCKET_NAME}' 不存在")
|
||
return
|
||
|
||
print(f"✓ 存储桶 '{BUCKET_NAME}' 已存在\n")
|
||
|
||
# 处理结果
|
||
processed_count = 0
|
||
success_count = 0
|
||
failed_count = 0
|
||
failed_files = []
|
||
|
||
# 遍历所有docx文件
|
||
print("=" * 80)
|
||
print("开始处理模板文件...")
|
||
print("=" * 80)
|
||
print()
|
||
|
||
for docx_file in sorted(base_path.rglob("*.docx")):
|
||
# 跳过临时文件
|
||
if docx_file.name.startswith("~$"):
|
||
continue
|
||
|
||
processed_count += 1
|
||
relative_path = docx_file.relative_to(base_path)
|
||
|
||
print(f"[{processed_count}] 处理: {relative_path}")
|
||
|
||
try:
|
||
# 提取占位符
|
||
placeholders = extract_placeholders_from_docx(str(docx_file))
|
||
print(f" 占位符数量: {len(placeholders)}")
|
||
|
||
# 生成模板编码和名称
|
||
template_code = generate_template_code(docx_file.name, str(relative_path))
|
||
template_name = docx_file.name
|
||
|
||
# 上传到MinIO
|
||
print(f" 正在上传到MinIO...")
|
||
file_path = upload_to_minio(minio_client, str(docx_file), template_name)
|
||
print(f" ✓ 上传成功: {file_path}")
|
||
|
||
# 准备数据库记录
|
||
input_data = json.dumps({
|
||
'template_code': template_code,
|
||
'business_type': 'INVESTIGATION', # 默认为调查核实
|
||
'placeholders': placeholders # 保存占位符列表供参考
|
||
}, ensure_ascii=False)
|
||
|
||
template_info = {
|
||
'name': template_name.replace('.docx', ''), # 去掉扩展名作为名称
|
||
'template_code': template_code,
|
||
'file_path': file_path,
|
||
'input_data': input_data,
|
||
'parent_id': None
|
||
}
|
||
|
||
# 注册到数据库
|
||
print(f" 正在注册到数据库...")
|
||
file_config_id = register_template_to_db(conn, template_info)
|
||
print(f" ✓ 注册成功,配置ID: {file_config_id}")
|
||
|
||
success_count += 1
|
||
print()
|
||
|
||
except Exception as e:
|
||
failed_count += 1
|
||
failed_files.append((str(relative_path), str(e)))
|
||
print(f" ✗ 处理失败: {str(e)}\n")
|
||
|
||
# 关闭连接
|
||
conn.close()
|
||
|
||
# 打印汇总
|
||
print("=" * 80)
|
||
print("处理汇总")
|
||
print("=" * 80)
|
||
print(f"总文件数: {processed_count}")
|
||
print(f"成功: {success_count}")
|
||
print(f"失败: {failed_count}")
|
||
|
||
if failed_files:
|
||
print("\n失败的文件:")
|
||
for file_path, error in failed_files:
|
||
print(f" - {file_path}: {error}")
|
||
|
||
print("\n处理完成!")
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
template_dir = os.path.join(os.path.dirname(__file__), 'template_finish')
|
||
|
||
print(f"模板目录: {template_dir}")
|
||
print()
|
||
|
||
process_templates_in_directory(template_dir)
|
||
|
||
|
||
if __name__ == '__main__':
|
||
main() |