ai-business-write/init_template_tree_from_directory.py

545 lines
19 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
从 template_finish 目录初始化模板树状结构
删除旧数据,根据目录结构完全重建
"""
import os
import json
import pymysql
from pathlib import Path
from typing import Dict, List, Optional, Tuple
from datetime import datetime
from minio import Minio
from minio.error import S3Error
# 数据库连接配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
# MinIO连接配置
MINIO_CONFIG = {
'endpoint': 'minio.datacubeworld.com:9000',
'access_key': 'JOLXFXny3avFSzB0uRA5',
'secret_key': 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I',
'secure': True
}
TENANT_ID = 615873064429507639
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
BUCKET_NAME = 'finyx'
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
# 文档类型映射
DOCUMENT_TYPE_MAPPING = {
"1.请示报告卡XXX": {
"template_code": "REPORT_CARD",
"name": "1.请示报告卡XXX",
"business_type": "INVESTIGATION"
},
"2.初步核实审批表XXX": {
"template_code": "PRELIMINARY_VERIFICATION_APPROVAL",
"name": "2.初步核实审批表XXX",
"business_type": "INVESTIGATION"
},
"3.附件初核方案(XXX)": {
"template_code": "INVESTIGATION_PLAN",
"name": "3.附件初核方案(XXX)",
"business_type": "INVESTIGATION"
},
"谈话通知书第一联": {
"template_code": "NOTIFICATION_LETTER_1",
"name": "谈话通知书第一联",
"business_type": "INVESTIGATION"
},
"谈话通知书第二联": {
"template_code": "NOTIFICATION_LETTER_2",
"name": "谈话通知书第二联",
"business_type": "INVESTIGATION"
},
"谈话通知书第三联": {
"template_code": "NOTIFICATION_LETTER_3",
"name": "谈话通知书第三联",
"business_type": "INVESTIGATION"
},
"1.请示报告卡(初核谈话)": {
"template_code": "REPORT_CARD_INTERVIEW",
"name": "1.请示报告卡(初核谈话)",
"business_type": "INVESTIGATION"
},
"2谈话审批表": {
"template_code": "INTERVIEW_APPROVAL_FORM",
"name": "2谈话审批表",
"business_type": "INVESTIGATION"
},
"3.谈话前安全风险评估表": {
"template_code": "PRE_INTERVIEW_RISK_ASSESSMENT",
"name": "3.谈话前安全风险评估表",
"business_type": "INVESTIGATION"
},
"4.谈话方案": {
"template_code": "INTERVIEW_PLAN",
"name": "4.谈话方案",
"business_type": "INVESTIGATION"
},
"5.谈话后安全风险评估表": {
"template_code": "POST_INTERVIEW_RISK_ASSESSMENT",
"name": "5.谈话后安全风险评估表",
"business_type": "INVESTIGATION"
},
"1.谈话笔录": {
"template_code": "INTERVIEW_RECORD",
"name": "1.谈话笔录",
"business_type": "INVESTIGATION"
},
"2.谈话询问对象情况摸底调查30问": {
"template_code": "INVESTIGATION_30_QUESTIONS",
"name": "2.谈话询问对象情况摸底调查30问",
"business_type": "INVESTIGATION"
},
"3.被谈话人权利义务告知书": {
"template_code": "RIGHTS_OBLIGATIONS_NOTICE",
"name": "3.被谈话人权利义务告知书",
"business_type": "INVESTIGATION"
},
"4.点对点交接单": {
"template_code": "HANDOVER_FORM",
"name": "4.点对点交接单",
"business_type": "INVESTIGATION"
},
"5.陪送交接单(新)": {
"template_code": "ESCORT_HANDOVER_FORM",
"name": "5.陪送交接单(新)",
"business_type": "INVESTIGATION"
},
"6.1保密承诺书(谈话对象使用-非中共党员用)": {
"template_code": "CONFIDENTIALITY_COMMITMENT_NON_PARTY",
"name": "6.1保密承诺书(谈话对象使用-非中共党员用)",
"business_type": "INVESTIGATION"
},
"6.2保密承诺书(谈话对象使用-中共党员用)": {
"template_code": "CONFIDENTIALITY_COMMITMENT_PARTY",
"name": "6.2保密承诺书(谈话对象使用-中共党员用)",
"business_type": "INVESTIGATION"
},
"7.办案人员-办案安全保密承诺书": {
"template_code": "INVESTIGATOR_CONFIDENTIALITY_COMMITMENT",
"name": "7.办案人员-办案安全保密承诺书",
"business_type": "INVESTIGATION"
},
"8-1请示报告卡初核报告结论 ": {
"template_code": "REPORT_CARD_CONCLUSION",
"name": "8-1请示报告卡初核报告结论 ",
"business_type": "INVESTIGATION"
},
"8.XXX初核情况报告": {
"template_code": "INVESTIGATION_REPORT",
"name": "8.XXX初核情况报告",
"business_type": "INVESTIGATION"
}
}
def generate_id():
"""生成ID"""
import time
import random
timestamp = int(time.time() * 1000)
random_part = random.randint(100000, 999999)
return timestamp * 1000 + random_part
def identify_document_type(file_name: str) -> Optional[Dict]:
"""根据完整文件名识别文档类型"""
base_name = Path(file_name).stem
if base_name in DOCUMENT_TYPE_MAPPING:
return DOCUMENT_TYPE_MAPPING[base_name]
return None
def upload_to_minio(file_path: Path) -> str:
"""上传文件到MinIO"""
try:
client = Minio(
MINIO_CONFIG['endpoint'],
access_key=MINIO_CONFIG['access_key'],
secret_key=MINIO_CONFIG['secret_key'],
secure=MINIO_CONFIG['secure']
)
found = client.bucket_exists(BUCKET_NAME)
if not found:
raise Exception(f"存储桶 '{BUCKET_NAME}' 不存在,请先创建")
now = datetime.now()
object_name = f'{TENANT_ID}/TEMPLATE/{now.year}/{now.month:02d}/{file_path.name}'
client.fput_object(
BUCKET_NAME,
object_name,
str(file_path),
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
return f"/{object_name}"
except S3Error as e:
raise Exception(f"MinIO错误: {e}")
except Exception as e:
raise Exception(f"上传文件时发生错误: {e}")
def scan_directory_structure(base_dir: Path) -> List[Dict]:
"""
扫描目录结构,返回按层级排序的节点列表
每个节点包含type, name, path, parent_path, level, template_code, file_path
"""
nodes = []
def process_path(path: Path, parent_path: Optional[str] = None, level: int = 0):
"""递归处理路径"""
if path.is_file() and path.suffix == '.docx':
file_name = path.stem
doc_config = identify_document_type(file_name)
nodes.append({
'type': 'file',
'name': file_name,
'path': str(path),
'parent_path': parent_path,
'level': level,
'template_code': doc_config['template_code'] if doc_config else None,
'doc_config': doc_config,
'file_path': path
})
elif path.is_dir():
dir_name = path.name
nodes.append({
'type': 'directory',
'name': dir_name,
'path': str(path),
'parent_path': parent_path,
'level': level,
'template_code': None,
'doc_config': None,
'file_path': None
})
for child in sorted(path.iterdir()):
if child.name != '__pycache__':
process_path(child, str(path), level + 1)
if TEMPLATES_DIR.exists():
for item in sorted(TEMPLATES_DIR.iterdir()):
if item.name != '__pycache__':
process_path(item, None, 0)
# 按层级排序
return sorted(nodes, key=lambda x: (x['level'], x['path']))
def delete_old_data(conn, dry_run: bool = True):
"""删除旧数据"""
cursor = conn.cursor()
try:
print("\n" + "="*80)
print("删除旧数据")
print("="*80)
# 1. 先删除关联表 f_polic_file_field
print("\n1. 删除 f_polic_file_field 关联记录...")
if not dry_run:
# 先获取所有相关的 file_id
select_file_ids_sql = """
SELECT id FROM f_polic_file_config
WHERE tenant_id = %s
"""
cursor.execute(select_file_ids_sql, (TENANT_ID,))
file_ids = [row[0] for row in cursor.fetchall()]
if file_ids:
# 使用占位符构建SQL
placeholders = ','.join(['%s'] * len(file_ids))
delete_file_field_sql = f"""
DELETE FROM f_polic_file_field
WHERE tenant_id = %s AND file_id IN ({placeholders})
"""
cursor.execute(delete_file_field_sql, [TENANT_ID] + file_ids)
deleted_count = cursor.rowcount
print(f" ✓ 删除了 {deleted_count} 条关联记录")
else:
print(" ✓ 没有需要删除的关联记录")
else:
# 模拟模式:只统计
count_sql = """
SELECT COUNT(*) FROM f_polic_file_field
WHERE tenant_id = %s AND file_id IN (
SELECT id FROM f_polic_file_config WHERE tenant_id = %s
)
"""
cursor.execute(count_sql, (TENANT_ID, TENANT_ID))
count = cursor.fetchone()[0]
print(f" [模拟] 将删除 {count} 条关联记录")
# 2. 删除 f_polic_file_config 记录
print("\n2. 删除 f_polic_file_config 记录...")
delete_config_sql = """
DELETE FROM f_polic_file_config
WHERE tenant_id = %s
"""
if not dry_run:
cursor.execute(delete_config_sql, (TENANT_ID,))
deleted_count = cursor.rowcount
print(f" ✓ 删除了 {deleted_count} 条配置记录")
conn.commit()
else:
count_sql = "SELECT COUNT(*) FROM f_polic_file_config WHERE tenant_id = %s"
cursor.execute(count_sql, (TENANT_ID,))
count = cursor.fetchone()[0]
print(f" [模拟] 将删除 {count} 条配置记录")
return True
except Exception as e:
if not dry_run:
conn.rollback()
print(f" ✗ 删除失败: {e}")
raise
finally:
cursor.close()
def create_tree_structure(conn, nodes: List[Dict], upload_files: bool = True, dry_run: bool = True):
"""创建树状结构"""
cursor = conn.cursor()
try:
if not dry_run:
conn.autocommit(False)
print("\n" + "="*80)
print("创建树状结构")
print("="*80)
# 创建路径到ID的映射
path_to_id = {}
created_count = 0
updated_count = 0
# 按层级顺序处理
for node in nodes:
node_path = node['path']
node_name = node['name']
parent_path = node['parent_path']
level = node['level']
# 获取父节点ID
parent_id = path_to_id.get(parent_path) if parent_path else None
if node['type'] == 'directory':
# 创建目录节点
node_id = generate_id()
path_to_id[node_path] = node_id
if not dry_run:
# 目录节点不包含 template_code 字段
insert_sql = """
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, input_data, file_path,
created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, %s)
"""
cursor.execute(insert_sql, (
node_id,
TENANT_ID,
parent_id,
node_name,
None,
None,
CREATED_BY,
UPDATED_BY,
1
))
indent = " " * level
parent_info = f" [父: {path_to_id.get(parent_path, 'None')}]" if parent_path else ""
print(f"{indent}{'[模拟]' if dry_run else ''}创建目录: {node_name} (ID: {node_id}){parent_info}")
created_count += 1
else:
# 创建文件节点
node_id = generate_id()
path_to_id[node_path] = node_id
doc_config = node.get('doc_config')
template_code = node.get('template_code')
file_path_obj = node.get('file_path')
# 上传文件到MinIO如果需要
minio_path = None
if upload_files and file_path_obj and file_path_obj.exists():
try:
if not dry_run:
minio_path = upload_to_minio(file_path_obj)
else:
minio_path = f"/{TENANT_ID}/TEMPLATE/2025/12/{file_path_obj.name}"
print(f" {'[模拟]' if dry_run else ''}上传文件: {file_path_obj.name}{minio_path}")
except Exception as e:
print(f" ⚠ 上传文件失败: {e}")
# 继续执行使用None作为路径
# 构建 input_data
input_data = None
if doc_config:
input_data = json.dumps({
'template_code': doc_config['template_code'],
'business_type': doc_config['business_type']
}, ensure_ascii=False)
if not dry_run:
# 如果 template_code 为 None使用空字符串
template_code_value = template_code if template_code else ''
insert_sql = """
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, input_data, file_path, template_code,
created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, %s)
"""
cursor.execute(insert_sql, (
node_id,
TENANT_ID,
parent_id,
node_name,
input_data,
minio_path,
template_code_value,
CREATED_BY,
UPDATED_BY,
1
))
indent = " " * level
parent_info = f" [父: {path_to_id.get(parent_path, 'None')}]" if parent_path else ""
template_info = f" [code: {template_code}]" if template_code else ""
print(f"{indent}{'[模拟]' if dry_run else ''}创建文件: {node_name} (ID: {node_id}){parent_info}{template_info}")
created_count += 1
if not dry_run:
conn.commit()
print(f"\n✓ 创建完成!共创建 {created_count} 个节点")
else:
print(f"\n[模拟模式] 将创建 {created_count} 个节点")
return path_to_id
except Exception as e:
if not dry_run:
conn.rollback()
print(f"\n✗ 创建失败: {e}")
import traceback
traceback.print_exc()
raise
finally:
cursor.close()
def main():
"""主函数"""
print("="*80)
print("初始化模板树状结构(从目录结构完全重建)")
print("="*80)
print("\n⚠️ 警告:此操作将删除当前租户的所有模板数据!")
print(" 包括:")
print(" - f_polic_file_config 表中的所有记录")
print(" - f_polic_file_field 表中的相关关联记录")
print(" 然后根据 template_finish 目录结构完全重建")
# 确认
print("\n" + "="*80)
confirm1 = input("\n确认继续?(yes/no默认no): ").strip().lower()
if confirm1 != 'yes':
print("已取消")
return
# 连接数据库
try:
conn = pymysql.connect(**DB_CONFIG)
print("✓ 数据库连接成功")
except Exception as e:
print(f"✗ 数据库连接失败: {e}")
return
try:
# 扫描目录结构
print("\n扫描目录结构...")
nodes = scan_directory_structure(TEMPLATES_DIR)
print(f" 找到 {len(nodes)} 个节点")
print(f" 其中目录: {len([n for n in nodes if n['type'] == 'directory'])}")
print(f" 其中文件: {len([n for n in nodes if n['type'] == 'file'])}")
# 显示预览
print("\n目录结构预览:")
for node in nodes[:10]: # 只显示前10个
indent = " " * node['level']
type_icon = "📁" if node['type'] == 'directory' else "📄"
print(f"{indent}{type_icon} {node['name']}")
if len(nodes) > 10:
print(f" ... 还有 {len(nodes) - 10} 个节点")
# 询问是否上传文件
print("\n" + "="*80)
upload_files = input("\n是否上传文件到MinIO(yes/no默认yes): ").strip().lower()
upload_files = upload_files != 'no'
# 先执行模拟删除
print("\n执行模拟删除...")
delete_old_data(conn, dry_run=True)
# 再执行模拟创建
print("\n执行模拟创建...")
create_tree_structure(conn, nodes, upload_files=upload_files, dry_run=True)
# 最终确认
print("\n" + "="*80)
confirm2 = input("\n确认执行实际更新?(yes/no默认no): ").strip().lower()
if confirm2 != 'yes':
print("已取消")
return
# 执行实际删除
print("\n执行实际删除...")
delete_old_data(conn, dry_run=False)
# 执行实际创建
print("\n执行实际创建...")
create_tree_structure(conn, nodes, upload_files=upload_files, dry_run=False)
print("\n" + "="*80)
print("初始化完成!")
print("="*80)
except Exception as e:
print(f"\n✗ 初始化失败: {e}")
import traceback
traceback.print_exc()
finally:
conn.close()
print("\n数据库连接已关闭")
if __name__ == '__main__':
main()