修正生成文档错误测试

This commit is contained in:
python 2025-12-30 10:41:35 +08:00
parent 7bb69af45e
commit d27c18d0d2
12 changed files with 4728 additions and 6 deletions

38
app.py
View File

@ -686,6 +686,22 @@ def generate_document():
if not data: if not data:
return error_response(400, "请求参数不能为空") return error_response(400, "请求参数不能为空")
# 获取tenant_id从请求参数或请求体中获取
tenant_id = request.args.get('tenant_id') or data.get('tenant_id')
if tenant_id:
try:
tenant_id = int(tenant_id)
except (ValueError, TypeError):
return error_response(400, "tenant_id必须是整数")
else:
# 如果未提供tenant_id尝试从环境变量获取默认使用1
import os
tenant_id_str = os.getenv('TENANT_ID', '1')
try:
tenant_id = int(tenant_id_str)
except (ValueError, TypeError):
tenant_id = 1
input_data = data.get('inputData', []) input_data = data.get('inputData', [])
file_list = data.get('fpolicFieldParamFileList', []) file_list = data.get('fpolicFieldParamFileList', [])
@ -723,7 +739,8 @@ def generate_document():
result = document_service.generate_document( result = document_service.generate_document(
file_id=file_id, file_id=file_id,
input_data=input_data, input_data=input_data,
file_info=file_info file_info=file_info,
tenant_id=tenant_id
) )
# 使用生成的文档名称(.docx格式而不是原始文件名 # 使用生成的文档名称(.docx格式而不是原始文件名
@ -834,6 +851,22 @@ def get_document_by_task():
# 生成文档ID # 生成文档ID
document_id = document_service.generate_document_id() document_id = document_service.generate_document_id()
# 获取tenant_id从请求参数或请求体中获取
tenant_id = request.args.get('tenant_id') or data.get('tenant_id')
if tenant_id:
try:
tenant_id = int(tenant_id)
except (ValueError, TypeError):
return error_response(400, "tenant_id必须是整数")
else:
# 如果未提供tenant_id尝试从环境变量获取默认使用1
import os
tenant_id_str = os.getenv('TENANT_ID', '1')
try:
tenant_id = int(tenant_id_str)
except (ValueError, TypeError):
tenant_id = 1
# 处理每个文件 # 处理每个文件
result_file_list = [] result_file_list = []
first_document_name = None # 用于存储第一个生成的文档名 first_document_name = None # 用于存储第一个生成的文档名
@ -851,7 +884,8 @@ def get_document_by_task():
result = document_service.generate_document( result = document_service.generate_document(
file_id=file_id, file_id=file_id,
input_data=input_data, input_data=input_data,
file_info=file_info file_info=file_info,
tenant_id=tenant_id
) )
# 使用生成的文档名称(.docx格式而不是原始文件名 # 使用生成的文档名称(.docx格式而不是原始文件名

View File

@ -0,0 +1,539 @@
"""
检查数据库中的ID关系是否正确
功能
1. 检查f_polic_file_config表中的数据
2. 检查f_polic_field表中的数据
3. 检查f_polic_file_field表中的关联关系
4. 验证ID关系是否正确匹配
5. 找出孤立数据和错误关联
使用方法
python check_database_id_relations.py --host 10.100.31.21 --port 3306 --user finyx --password FknJYz3FA5WDYtsd --database finyx --tenant-id 1
"""
import os
import sys
import pymysql
import argparse
from typing import Dict, List, Set, Optional
from collections import defaultdict
# 设置输出编码为UTF-8Windows兼容
if sys.platform == 'win32':
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def get_db_config_from_args() -> Dict:
"""从命令行参数获取数据库配置"""
parser = argparse.ArgumentParser(
description='检查数据库中的ID关系是否正确',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例
python check_database_id_relations.py --host 10.100.31.21 --port 3306 --user finyx --password FknJYz3FA5WDYtsd --database finyx --tenant-id 1
"""
)
parser.add_argument('--host', type=str, required=True, help='MySQL服务器地址')
parser.add_argument('--port', type=int, required=True, help='MySQL服务器端口')
parser.add_argument('--user', type=str, required=True, help='MySQL用户名')
parser.add_argument('--password', type=str, required=True, help='MySQL密码')
parser.add_argument('--database', type=str, required=True, help='数据库名称')
parser.add_argument('--tenant-id', type=int, required=True, help='租户ID')
parser.add_argument('--file-id', type=int, help='检查特定的文件ID')
args = parser.parse_args()
return {
'host': args.host,
'port': args.port,
'user': args.user,
'password': args.password,
'database': args.database,
'charset': 'utf8mb4',
'tenant_id': args.tenant_id,
'file_id': args.file_id
}
def test_db_connection(config: Dict) -> Optional[pymysql.Connection]:
"""测试数据库连接"""
try:
conn = pymysql.connect(
host=config['host'],
port=config['port'],
user=config['user'],
password=config['password'],
database=config['database'],
charset=config['charset']
)
return conn
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return None
def check_file_config(conn, tenant_id: int, file_id: Optional[int] = None):
"""检查f_polic_file_config表"""
print_section("检查 f_polic_file_config 表")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
if file_id:
# 检查特定文件ID
cursor.execute("""
SELECT id, tenant_id, parent_id, name, file_path, state
FROM f_polic_file_config
WHERE id = %s AND tenant_id = %s
""", (file_id, tenant_id))
result = cursor.fetchone()
if result:
print(f"\n 文件ID {file_id} 的信息:")
print(f" - ID: {result['id']}")
print(f" - 租户ID: {result['tenant_id']}")
print(f" - 父级ID: {result['parent_id']}")
print(f" - 名称: {result['name']}")
print(f" - 文件路径: {result['file_path']}")
# 处理state字段可能是bytes或int
state_raw = result['state']
if isinstance(state_raw, bytes):
state_value = int.from_bytes(state_raw, byteorder='big')
elif state_raw is not None:
state_value = int(state_raw)
else:
state_value = 0
print(f" - 状态: {state_value} ({'启用' if state_value == 1 else '禁用'})")
if state_value != 1:
print_result(False, f"文件ID {file_id} 的状态为禁用state={state_value}")
else:
print_result(True, f"文件ID {file_id} 存在且已启用")
else:
print_result(False, f"文件ID {file_id} 不存在或不属于租户 {tenant_id}")
return
# 统计信息
cursor.execute("""
SELECT
COUNT(*) as total,
SUM(CASE WHEN state = 1 THEN 1 ELSE 0 END) as enabled,
SUM(CASE WHEN state = 0 THEN 1 ELSE 0 END) as disabled,
SUM(CASE WHEN file_path IS NOT NULL AND file_path != '' THEN 1 ELSE 0 END) as files,
SUM(CASE WHEN file_path IS NULL OR file_path = '' THEN 1 ELSE 0 END) as directories
FROM f_polic_file_config
WHERE tenant_id = %s
""", (tenant_id,))
stats = cursor.fetchone()
print(f"\n 统计信息:")
print(f" - 总记录数: {stats['total']}")
print(f" - 启用记录: {stats['enabled']}")
print(f" - 禁用记录: {stats['disabled']}")
print(f" - 文件节点: {stats['files']}")
print(f" - 目录节点: {stats['directories']}")
# 检查parent_id引用
cursor.execute("""
SELECT fc1.id, fc1.name, fc1.parent_id
FROM f_polic_file_config fc1
LEFT JOIN f_polic_file_config fc2 ON fc1.parent_id = fc2.id AND fc1.tenant_id = fc2.tenant_id
WHERE fc1.tenant_id = %s
AND fc1.parent_id IS NOT NULL
AND fc2.id IS NULL
""", (tenant_id,))
broken_parents = cursor.fetchall()
if broken_parents:
print(f"\n [警告] 发现 {len(broken_parents)} 个parent_id引用错误:")
for item in broken_parents[:10]:
print(f" - ID: {item['id']}, 名称: {item['name']}, parent_id: {item['parent_id']} (不存在)")
if len(broken_parents) > 10:
print(f" ... 还有 {len(broken_parents) - 10}")
else:
print_result(True, "所有parent_id引用正确")
finally:
cursor.close()
def check_fields(conn, tenant_id: int):
"""检查f_polic_field表"""
print_section("检查 f_polic_field 表")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 统计信息
cursor.execute("""
SELECT
field_type,
COUNT(*) as total,
SUM(CASE WHEN state = 1 THEN 1 ELSE 0 END) as enabled,
SUM(CASE WHEN state = 0 THEN 1 ELSE 0 END) as disabled
FROM f_polic_field
WHERE tenant_id = %s
GROUP BY field_type
""", (tenant_id,))
stats = cursor.fetchall()
print(f"\n 统计信息:")
for stat in stats:
field_type_name = "输入字段" if stat['field_type'] == 1 else "输出字段" if stat['field_type'] == 2 else "未知"
print(f" - {field_type_name} (field_type={stat['field_type']}):")
print(f" 总记录数: {stat['total']}")
print(f" 启用: {stat['enabled']}")
print(f" 禁用: {stat['disabled']}")
# 检查重复的filed_code
cursor.execute("""
SELECT filed_code, field_type, COUNT(*) as count
FROM f_polic_field
WHERE tenant_id = %s
AND state = 1
GROUP BY filed_code, field_type
HAVING count > 1
""", (tenant_id,))
duplicates = cursor.fetchall()
if duplicates:
print(f"\n [警告] 发现重复的filed_code:")
for dup in duplicates:
print(f" - filed_code: {dup['filed_code']}, field_type: {dup['field_type']}, 重复数: {dup['count']}")
else:
print_result(True, "没有重复的filed_code")
finally:
cursor.close()
def check_file_field_relations(conn, tenant_id: int, file_id: Optional[int] = None):
"""检查f_polic_file_field表"""
print_section("检查 f_polic_file_field 表(关联关系)")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 统计信息
cursor.execute("""
SELECT COUNT(*) as total
FROM f_polic_file_field
WHERE tenant_id = %s AND state = 1
""", (tenant_id,))
total_relations = cursor.fetchone()['total']
print(f"\n 总关联关系数: {total_relations}")
if file_id:
# 检查特定文件ID的关联关系
cursor.execute("""
SELECT fff.id, fff.file_id, fff.filed_id, fff.state,
fc.name as file_name, fc.file_path, fc.state as file_state,
f.name as field_name, f.filed_code, f.field_type, f.state as field_state
FROM f_polic_file_field fff
LEFT JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id
LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
WHERE fff.tenant_id = %s AND fff.file_id = %s
""", (tenant_id, file_id))
relations = cursor.fetchall()
if relations:
print(f"\n 文件ID {file_id} 的关联关系 ({len(relations)} 条):")
for rel in relations:
print(f"\n 关联ID: {rel['id']}")
print(f" - file_id: {rel['file_id']}")
if rel['file_name']:
print(f" 模板: {rel['file_name']} (路径: {rel['file_path']})")
# 处理state字段可能是bytes或int
state_raw = rel['file_state']
if isinstance(state_raw, bytes):
file_state = int.from_bytes(state_raw, byteorder='big')
elif state_raw is not None:
file_state = int(state_raw)
else:
file_state = 0
print(f" 状态: {file_state} ({'启用' if file_state == 1 else '禁用'})")
else:
print(f" [错误] 模板不存在!")
print(f" - filed_id: {rel['filed_id']}")
if rel['field_name']:
field_type_name = "输入字段" if rel['field_type'] == 1 else "输出字段" if rel['field_type'] == 2 else "未知"
# 处理state字段可能是bytes或int
state_raw = rel['field_state']
if isinstance(state_raw, bytes):
field_state = int.from_bytes(state_raw, byteorder='big')
elif state_raw is not None:
field_state = int(state_raw)
else:
field_state = 0
print(f" 字段: {rel['field_name']} ({rel['filed_code']}, {field_type_name})")
print(f" 状态: {field_state} ({'启用' if field_state == 1 else '禁用'})")
else:
print(f" [错误] 字段不存在!")
else:
print(f"\n 文件ID {file_id} 没有关联关系")
# 检查孤立的关联关系file_id不存在
cursor.execute("""
SELECT fff.id, fff.file_id, fff.filed_id
FROM f_polic_file_field fff
LEFT JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id
WHERE fff.tenant_id = %s
AND fff.state = 1
AND fc.id IS NULL
""", (tenant_id,))
orphaned_file_relations = cursor.fetchall()
if orphaned_file_relations:
print(f"\n [错误] 发现 {len(orphaned_file_relations)} 个孤立的关联关系file_id不存在:")
for rel in orphaned_file_relations[:10]:
print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}")
if len(orphaned_file_relations) > 10:
print(f" ... 还有 {len(orphaned_file_relations) - 10}")
else:
print_result(True, "所有file_id引用正确")
# 检查孤立的关联关系filed_id不存在
cursor.execute("""
SELECT fff.id, fff.file_id, fff.filed_id
FROM f_polic_file_field fff
LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
WHERE fff.tenant_id = %s
AND fff.state = 1
AND f.id IS NULL
""", (tenant_id,))
orphaned_field_relations = cursor.fetchall()
if orphaned_field_relations:
print(f"\n [错误] 发现 {len(orphaned_field_relations)} 个孤立的关联关系filed_id不存在:")
for rel in orphaned_field_relations[:10]:
print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}")
if len(orphaned_field_relations) > 10:
print(f" ... 还有 {len(orphaned_field_relations) - 10}")
else:
print_result(True, "所有filed_id引用正确")
# 检查关联到禁用模板或字段的关联关系
cursor.execute("""
SELECT fff.id, fff.file_id, fff.filed_id,
fc.state as file_state, f.state as field_state
FROM f_polic_file_field fff
LEFT JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id
LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
WHERE fff.tenant_id = %s
AND fff.state = 1
AND (fc.state != 1 OR f.state != 1)
""", (tenant_id,))
disabled_relations = cursor.fetchall()
if disabled_relations:
print(f"\n [警告] 发现 {len(disabled_relations)} 个关联到禁用模板或字段的关联关系:")
for rel in disabled_relations[:10]:
print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}")
print(f" 模板状态: {rel['file_state']}, 字段状态: {rel['field_state']}")
if len(disabled_relations) > 10:
print(f" ... 还有 {len(disabled_relations) - 10}")
else:
print_result(True, "所有关联关系都关联到启用的模板和字段")
finally:
cursor.close()
def check_specific_file(conn, tenant_id: int, file_id: int):
"""检查特定文件ID的完整信息"""
print_section(f"详细检查文件ID {file_id}")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 1. 检查文件配置
cursor.execute("""
SELECT id, tenant_id, parent_id, name, file_path, state, created_time, updated_time
FROM f_polic_file_config
WHERE id = %s AND tenant_id = %s
""", (file_id, tenant_id))
file_config = cursor.fetchone()
if not file_config:
print_result(False, f"文件ID {file_id} 不存在或不属于租户 {tenant_id}")
return
print(f"\n 文件配置信息:")
print(f" - ID: {file_config['id']}")
print(f" - 租户ID: {file_config['tenant_id']}")
print(f" - 父级ID: {file_config['parent_id']}")
print(f" - 名称: {file_config['name']}")
print(f" - 文件路径: {file_config['file_path']}")
# 处理state字段可能是bytes或int
state_raw = file_config['state']
if isinstance(state_raw, bytes):
file_state = int.from_bytes(state_raw, byteorder='big')
elif state_raw is not None:
file_state = int(state_raw)
else:
file_state = 0
print(f" - 状态: {file_state} ({'启用' if file_state == 1 else '禁用'})")
print(f" - 创建时间: {file_config['created_time']}")
print(f" - 更新时间: {file_config['updated_time']}")
# 2. 检查父级
if file_config['parent_id']:
cursor.execute("""
SELECT id, name, file_path, state
FROM f_polic_file_config
WHERE id = %s AND tenant_id = %s
""", (file_config['parent_id'], tenant_id))
parent = cursor.fetchone()
if parent:
# 处理state字段可能是bytes或int
state_raw = parent['state']
if isinstance(state_raw, bytes):
parent_state = int.from_bytes(state_raw, byteorder='big')
elif state_raw is not None:
parent_state = int(state_raw)
else:
parent_state = 0
print(f"\n 父级信息:")
print(f" - ID: {parent['id']}")
print(f" - 名称: {parent['name']}")
print(f" - 状态: {parent_state} ({'启用' if parent_state == 1 else '禁用'})")
else:
print(f"\n [错误] 父级ID {file_config['parent_id']} 不存在!")
# 3. 检查关联的字段
cursor.execute("""
SELECT fff.id as relation_id, fff.filed_id,
f.name as field_name, f.filed_code, f.field_type, f.state as field_state
FROM f_polic_file_field fff
LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
WHERE fff.tenant_id = %s AND fff.file_id = %s AND fff.state = 1
ORDER BY f.field_type, f.filed_code
""", (tenant_id, file_id))
relations = cursor.fetchall()
print(f"\n 关联的字段 ({len(relations)} 个):")
input_fields = []
output_fields = []
for rel in relations:
field_type_name = "输入字段" if rel['field_type'] == 1 else "输出字段" if rel['field_type'] == 2 else "未知"
# 处理state字段可能是bytes或int
state_raw = rel['field_state']
if isinstance(state_raw, bytes):
field_state = int.from_bytes(state_raw, byteorder='big')
elif state_raw is not None:
field_state = int(state_raw)
else:
field_state = 0
field_info = f" - {rel['field_name']} ({rel['filed_code']}, {field_type_name})"
if field_state != 1:
field_info += f" [状态: 禁用]"
if not rel['field_name']:
field_info += f" [错误: 字段不存在!]"
if rel['field_type'] == 1:
input_fields.append(field_info)
else:
output_fields.append(field_info)
if input_fields:
print(f"\n 输入字段 ({len(input_fields)} 个):")
for info in input_fields:
print(info)
if output_fields:
print(f"\n 输出字段 ({len(output_fields)} 个):")
for info in output_fields:
print(info)
# 4. 检查是否有孤立的关联关系
cursor.execute("""
SELECT fff.id, fff.filed_id
FROM f_polic_file_field fff
LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
WHERE fff.tenant_id = %s AND fff.file_id = %s AND fff.state = 1 AND f.id IS NULL
""", (tenant_id, file_id))
orphaned = cursor.fetchall()
if orphaned:
print(f"\n [错误] 发现 {len(orphaned)} 个孤立的关联关系(字段不存在):")
for rel in orphaned:
print(f" - 关联ID: {rel['id']}, filed_id: {rel['filed_id']}")
finally:
cursor.close()
def main():
"""主函数"""
print_section("数据库ID关系检查工具")
# 获取配置
config = get_db_config_from_args()
# 显示配置信息
print_section("配置信息")
print(f" 数据库服务器: {config['host']}:{config['port']}")
print(f" 数据库名称: {config['database']}")
print(f" 用户名: {config['user']}")
print(f" 租户ID: {config['tenant_id']}")
if config.get('file_id'):
print(f" 检查文件ID: {config['file_id']}")
# 连接数据库
print_section("连接数据库")
conn = test_db_connection(config)
if not conn:
return
print_result(True, "数据库连接成功")
try:
tenant_id = config['tenant_id']
file_id = config.get('file_id')
# 检查各个表
check_file_config(conn, tenant_id, file_id)
check_fields(conn, tenant_id)
check_file_field_relations(conn, tenant_id, file_id)
# 如果指定了文件ID进行详细检查
if file_id:
check_specific_file(conn, tenant_id, file_id)
# 总结
print_section("检查完成")
print("请查看上述检查结果,找出问题所在")
finally:
conn.close()
print_result(True, "数据库连接已关闭")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n[中断] 用户取消操作")
sys.exit(0)
except Exception as e:
print(f"\n[错误] 发生异常: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(1)

View File

@ -0,0 +1,874 @@
"""
清理并重新同步模板数据到指定数据库
功能
1. 清理指定tenant_id下的旧数据包括MinIO路径的数据
2. 清理相关的字段关联关系
3. 重新扫描template_finish/目录
4. 重新创建/更新模板数据
5. 重新建立字段关联关系
使用方法
python clean_and_resync_templates.py --host 10.100.31.21 --port 3306 --user finyx --password FknJYz3FA5WDYtsd --database finyx --tenant-id 1
"""
import os
import sys
import pymysql
import argparse
from pathlib import Path
from typing import Dict, List, Set, Optional
import re
from docx import Document
import getpass
# 设置输出编码为UTF-8Windows兼容
if sys.platform == 'win32':
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def generate_id():
"""生成ID"""
import time
return int(time.time() * 1000000)
def get_db_config_from_args() -> Optional[Dict]:
"""从命令行参数获取数据库配置"""
parser = argparse.ArgumentParser(
description='清理并重新同步模板数据到指定数据库',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例
python clean_and_resync_templates.py --host 10.100.31.21 --port 3306 --user finyx --password FknJYz3FA5WDYtsd --database finyx --tenant-id 1
"""
)
parser.add_argument('--host', type=str, required=True, help='MySQL服务器地址')
parser.add_argument('--port', type=int, required=True, help='MySQL服务器端口')
parser.add_argument('--user', type=str, required=True, help='MySQL用户名')
parser.add_argument('--password', type=str, required=True, help='MySQL密码')
parser.add_argument('--database', type=str, required=True, help='数据库名称')
parser.add_argument('--tenant-id', type=int, required=True, help='租户ID')
parser.add_argument('--dry-run', action='store_true', help='预览模式(不实际更新数据库)')
parser.add_argument('--skip-clean', action='store_true', help='跳过清理步骤(只同步)')
args = parser.parse_args()
return {
'host': args.host,
'port': args.port,
'user': args.user,
'password': args.password,
'database': args.database,
'charset': 'utf8mb4',
'tenant_id': args.tenant_id,
'dry_run': args.dry_run,
'skip_clean': args.skip_clean
}
def test_db_connection(config: Dict) -> Optional[pymysql.Connection]:
"""测试数据库连接"""
try:
conn = pymysql.connect(
host=config['host'],
port=config['port'],
user=config['user'],
password=config['password'],
database=config['database'],
charset=config['charset']
)
return conn
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return None
def scan_local_templates() -> Dict[str, Path]:
"""扫描本地template_finish目录返回file_path -> Path的映射"""
templates = {}
if not TEMPLATES_DIR.exists():
return templates
for item in TEMPLATES_DIR.rglob("*"):
if item.is_file() and item.suffix.lower() in ['.docx', '.doc']:
rel_path = item.relative_to(PROJECT_ROOT)
rel_path_str = str(rel_path).replace('\\', '/')
templates[rel_path_str] = item
return templates
def clean_old_data(conn, tenant_id: int, local_templates: Dict[str, Path], dry_run: bool = False):
"""清理旧数据"""
print_section("清理旧数据")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 1. 获取所有模板
cursor.execute("""
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
""", (tenant_id,))
all_templates = cursor.fetchall()
print(f" 数据库中的模板总数: {len(all_templates)}")
# 2. 识别需要删除的模板
to_delete = []
minio_paths = []
invalid_paths = []
duplicate_paths = []
# 统计file_path
path_count = {}
for template in all_templates:
file_path = template.get('file_path')
if file_path:
if file_path not in path_count:
path_count[file_path] = []
path_count[file_path].append(template)
for template in all_templates:
file_path = template.get('file_path')
template_id = template['id']
# 检查是否是MinIO路径
if file_path and ('minio' in file_path.lower() or file_path.startswith('http://') or file_path.startswith('https://')):
minio_paths.append(template)
to_delete.append(template_id)
continue
# 检查文件路径是否在本地存在
if file_path:
if file_path not in local_templates:
invalid_paths.append(template)
to_delete.append(template_id)
continue
# 检查是否有重复路径
if len(path_count.get(file_path, [])) > 1:
# 保留第一个,删除其他的
if template != path_count[file_path][0]:
duplicate_paths.append(template)
to_delete.append(template_id)
continue
# 3. 统计需要删除的数据
print(f"\n 需要删除的模板:")
print(f" - MinIO路径的模板: {len(minio_paths)}")
print(f" - 无效路径的模板: {len(invalid_paths)}")
print(f" - 重复路径的模板: {len(duplicate_paths)}")
print(f" - 总计: {len(to_delete)}")
if to_delete and not dry_run:
# 4. 删除字段关联关系
print("\n 删除字段关联关系...")
if to_delete:
placeholders = ','.join(['%s'] * len(to_delete))
delete_relations_sql = f"""
DELETE FROM f_polic_file_field
WHERE tenant_id = %s
AND file_id IN ({placeholders})
"""
cursor.execute(delete_relations_sql, [tenant_id] + to_delete)
deleted_relations = cursor.rowcount
print(f" 删除了 {deleted_relations} 条字段关联关系")
# 5. 删除模板记录
print("\n 删除模板记录...")
delete_templates_sql = f"""
UPDATE f_polic_file_config
SET state = 0, updated_time = NOW(), updated_by = %s
WHERE tenant_id = %s
AND id IN ({placeholders})
"""
cursor.execute(delete_templates_sql, [UPDATED_BY, tenant_id] + to_delete)
deleted_templates = cursor.rowcount
print(f" 删除了 {deleted_templates} 个模板记录标记为state=0")
conn.commit()
print_result(True, f"清理完成:删除了 {deleted_templates} 个模板记录")
elif to_delete:
print("\n [预览模式] 将删除上述模板记录")
else:
print_result(True, "没有需要清理的数据")
return {
'total': len(all_templates),
'deleted': len(to_delete),
'minio_paths': len(minio_paths),
'invalid_paths': len(invalid_paths),
'duplicate_paths': len(duplicate_paths)
}
finally:
cursor.close()
def scan_directory_structure(base_dir: Path) -> Dict:
"""扫描目录结构"""
directories = []
files = []
def scan_recursive(current_path: Path, parent_path: Optional[str] = None):
"""递归扫描目录"""
if not current_path.exists() or not current_path.is_dir():
return
# 获取相对路径
rel_path = current_path.relative_to(base_dir)
rel_path_str = str(rel_path).replace('\\', '/')
# 添加目录节点
if rel_path_str != '.':
directories.append({
'name': current_path.name,
'path': rel_path_str,
'parent_path': parent_path
})
# 扫描子项
for item in sorted(current_path.iterdir()):
if item.is_dir():
scan_recursive(item, rel_path_str)
elif item.is_file() and item.suffix.lower() in ['.docx', '.doc']:
file_rel_path = item.relative_to(base_dir)
file_rel_path_str = str(file_rel_path).replace('\\', '/')
files.append({
'name': item.name,
'path': file_rel_path_str,
'parent_path': rel_path_str if rel_path_str != '.' else None
})
scan_recursive(base_dir)
return {
'directories': directories,
'files': files
}
def get_existing_templates(conn, tenant_id: int) -> Dict:
"""获取现有模板只获取state=1的"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute("""
SELECT id, name, file_path, parent_id
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
""", (tenant_id,))
templates = cursor.fetchall()
result = {
'by_path': {},
'by_name': {},
'by_id': {}
}
for t in templates:
result['by_id'][t['id']] = t
if t['file_path']:
result['by_path'][t['file_path']] = t
else:
name = t['name']
if name not in result['by_name']:
result['by_name'][name] = []
result['by_name'][name].append(t)
return result
finally:
cursor.close()
def sync_template_hierarchy(conn, tenant_id: int, dry_run: bool = False):
"""同步模板层级结构"""
print_section("同步模板层级结构")
# 1. 扫描目录结构
print("1. 扫描目录结构...")
structure = scan_directory_structure(TEMPLATES_DIR)
print_result(True, f"找到 {len(structure['directories'])} 个目录,{len(structure['files'])} 个文件")
if not structure['directories'] and not structure['files']:
print_result(False, "未找到任何目录或文件")
return None
# 2. 获取现有模板
print("\n2. 获取现有模板...")
existing_templates = get_existing_templates(conn, tenant_id)
print_result(True, f"找到 {len(existing_templates['by_path'])} 个文件模板,{len(existing_templates['by_name'])} 个目录模板")
# 3. 创建/更新目录节点
print("\n3. 创建/更新目录节点...")
path_to_id = {}
dir_created = 0
dir_updated = 0
for dir_info in structure['directories']:
parent_id = None
if dir_info['parent_path']:
parent_id = path_to_id.get(dir_info['parent_path'])
existing = None
candidates = existing_templates['by_name'].get(dir_info['name'], [])
for candidate in candidates:
if candidate.get('parent_id') == parent_id and not candidate.get('file_path'):
existing = candidate
break
if existing:
dir_id = existing['id']
if existing.get('parent_id') != parent_id:
dir_updated += 1
if not dry_run:
cursor = conn.cursor()
cursor.execute("""
UPDATE f_polic_file_config
SET parent_id = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s AND tenant_id = %s
""", (parent_id, UPDATED_BY, dir_id, tenant_id))
conn.commit()
cursor.close()
else:
dir_id = generate_id()
dir_created += 1
if not dry_run:
cursor = conn.cursor()
cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NULL, NOW(), %s, NOW(), %s, 1)
""", (dir_id, tenant_id, parent_id, dir_info['name'], CREATED_BY, UPDATED_BY))
conn.commit()
cursor.close()
path_to_id[dir_info['path']] = dir_id
print_result(True, f"创建 {dir_created} 个目录,更新 {dir_updated} 个目录")
# 4. 创建/更新文件节点
print("\n4. 创建/更新文件节点...")
file_created = 0
file_updated = 0
for file_info in structure['files']:
parent_id = None
if file_info['parent_path']:
parent_id = path_to_id.get(file_info['parent_path'])
existing = existing_templates['by_path'].get(file_info['path'])
if existing:
file_id = existing['id']
if existing.get('parent_id') != parent_id or existing.get('name') != file_info['name']:
file_updated += 1
if not dry_run:
cursor = conn.cursor()
cursor.execute("""
UPDATE f_polic_file_config
SET parent_id = %s, name = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s AND tenant_id = %s
""", (parent_id, file_info['name'], UPDATED_BY, file_id, tenant_id))
conn.commit()
cursor.close()
else:
file_id = generate_id()
file_created += 1
if not dry_run:
cursor = conn.cursor()
cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (file_id, tenant_id, parent_id, file_info['name'], file_info['path'], CREATED_BY, UPDATED_BY))
conn.commit()
cursor.close()
print_result(True, f"创建 {file_created} 个文件,更新 {file_updated} 个文件")
return {
'directories_created': dir_created,
'directories_updated': dir_updated,
'files_created': file_created,
'files_updated': file_updated
}
def get_input_fields(conn, tenant_id: int) -> Dict[str, int]:
"""获取输入字段"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, filed_code, name
FROM f_polic_field
WHERE tenant_id = %s
AND field_type = 1
AND filed_code IN ('clue_info', 'target_basic_info_clue')
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
fields = cursor.fetchall()
result = {}
for field in fields:
result[field['filed_code']] = field['id']
return result
finally:
cursor.close()
def get_output_fields(conn, tenant_id: int) -> Dict[str, int]:
"""获取所有输出字段"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, filed_code, name
FROM f_polic_field
WHERE tenant_id = %s
AND field_type = 2
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
fields = cursor.fetchall()
result = {}
for field in fields:
result[field['filed_code']] = field['id']
return result
finally:
cursor.close()
def extract_placeholders_from_docx(file_path: Path) -> Set[str]:
"""从docx文件中提取所有占位符"""
placeholders = set()
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
try:
doc = Document(file_path)
# 从段落中提取
for paragraph in doc.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
# 从表格中提取
for table in doc.tables:
try:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
except:
continue
except Exception as e:
pass
return placeholders
def create_missing_input_field(conn, tenant_id: int, field_code: str) -> Optional[int]:
"""创建缺失的输入字段"""
cursor = conn.cursor()
try:
field_id = generate_id()
field_name_map = {
'clue_info': '线索信息',
'target_basic_info_clue': '被核查人基本信息(线索)'
}
field_name = field_name_map.get(field_code, field_code.replace('_', ' '))
insert_sql = """
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
field_id,
tenant_id,
field_name,
field_code,
1,
CREATED_BY,
UPDATED_BY
))
conn.commit()
return field_id
except Exception as e:
conn.rollback()
return None
finally:
cursor.close()
def create_missing_output_field(conn, tenant_id: int, field_code: str) -> Optional[int]:
"""创建缺失的输出字段"""
cursor = conn.cursor()
try:
# 先检查是否已存在
check_cursor = conn.cursor(pymysql.cursors.DictCursor)
check_cursor.execute("""
SELECT id FROM f_polic_field
WHERE tenant_id = %s AND filed_code = %s
""", (tenant_id, field_code))
existing = check_cursor.fetchone()
check_cursor.close()
if existing:
return existing['id']
# 创建新字段
field_id = generate_id()
field_name = field_code.replace('_', ' ')
insert_sql = """
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
field_id,
tenant_id,
field_name,
field_code,
2,
CREATED_BY,
UPDATED_BY
))
conn.commit()
return field_id
except Exception as e:
conn.rollback()
return None
finally:
cursor.close()
def get_existing_relations(conn, tenant_id: int, file_id: int) -> Set[int]:
"""获取模板的现有关联关系"""
cursor = conn.cursor()
try:
sql = """
SELECT filed_id
FROM f_polic_file_field
WHERE tenant_id = %s
AND file_id = %s
AND state = 1
"""
cursor.execute(sql, (tenant_id, file_id))
results = cursor.fetchall()
return {row[0] for row in results}
finally:
cursor.close()
def sync_field_relations(conn, tenant_id: int, dry_run: bool = False):
"""同步字段关联关系"""
print_section("同步字段关联关系")
# 1. 获取输入字段
print("1. 获取输入字段...")
input_fields = get_input_fields(conn, tenant_id)
if not input_fields:
print(" 创建缺失的输入字段...")
for field_code in ['clue_info', 'target_basic_info_clue']:
field_id = create_missing_input_field(conn, tenant_id, field_code)
if field_id:
input_fields[field_code] = field_id
if not input_fields:
print_result(False, "无法获取或创建输入字段")
return None
input_field_ids = list(input_fields.values())
print_result(True, f"找到 {len(input_field_ids)} 个输入字段")
# 2. 获取输出字段
print("\n2. 获取输出字段...")
output_fields = get_output_fields(conn, tenant_id)
print_result(True, f"找到 {len(output_fields)} 个输出字段")
# 3. 获取所有模板
print("\n3. 获取所有模板...")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND file_path IS NOT NULL
AND file_path != ''
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
templates = cursor.fetchall()
finally:
cursor.close()
print_result(True, f"找到 {len(templates)} 个模板")
if not templates:
print_result(False, "未找到模板")
return None
# 4. 先清理所有现有关联关系
print("\n4. 清理现有关联关系...")
if not dry_run:
cursor = conn.cursor()
try:
cursor.execute("""
DELETE FROM f_polic_file_field
WHERE tenant_id = %s
""", (tenant_id,))
deleted_count = cursor.rowcount
conn.commit()
print_result(True, f"删除了 {deleted_count} 条旧关联关系")
finally:
cursor.close()
else:
print(" [预览模式] 将清理所有现有关联关系")
# 5. 扫描模板占位符并创建关联关系
print("\n5. 扫描模板占位符并创建关联关系...")
total_updated = 0
total_errors = 0
all_placeholders_found = set()
missing_fields = set()
for i, template in enumerate(templates, 1):
template_id = template['id']
template_name = template['name']
file_path = template['file_path']
if i % 20 == 0:
print(f" 处理进度: {i}/{len(templates)}")
# 检查本地文件是否存在
local_file = PROJECT_ROOT / file_path
if not local_file.exists():
total_errors += 1
continue
# 提取占位符
placeholders = extract_placeholders_from_docx(local_file)
all_placeholders_found.update(placeholders)
# 根据占位符找到对应的输出字段ID
output_field_ids = []
for placeholder in placeholders:
if placeholder in output_fields:
output_field_ids.append(output_fields[placeholder])
else:
# 字段不存在,尝试创建
missing_fields.add(placeholder)
field_id = create_missing_output_field(conn, tenant_id, placeholder)
if field_id:
output_fields[placeholder] = field_id
output_field_ids.append(field_id)
# 创建关联关系
all_field_ids = input_field_ids + output_field_ids
if not dry_run and all_field_ids:
cursor = conn.cursor()
try:
for field_id in all_field_ids:
relation_id = generate_id()
insert_sql = """
INSERT INTO f_polic_file_field
(id, tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
relation_id,
tenant_id,
template_id,
field_id,
CREATED_BY,
UPDATED_BY
))
conn.commit()
total_updated += 1
except Exception as e:
conn.rollback()
total_errors += 1
finally:
cursor.close()
else:
total_updated += 1
# 6. 统计结果
print_section("字段关联同步结果")
print(f" 总模板数: {len(templates)}")
print(f" 已处理: {total_updated}")
print(f" 错误: {total_errors}")
print(f" 发现的占位符总数: {len(all_placeholders_found)}")
print(f" 创建的字段数: {len(missing_fields)}")
return {
'total_templates': len(templates),
'updated': total_updated,
'errors': total_errors,
'placeholders_found': len(all_placeholders_found),
'fields_created': len(missing_fields)
}
def main():
"""主函数"""
print_section("清理并重新同步模板数据")
# 获取配置
config = get_db_config_from_args()
# 显示配置信息
print_section("配置信息")
print(f" 数据库服务器: {config['host']}:{config['port']}")
print(f" 数据库名称: {config['database']}")
print(f" 用户名: {config['user']}")
print(f" 租户ID: {config['tenant_id']}")
print(f" 预览模式: {'' if config['dry_run'] else ''}")
print(f" 跳过清理: {'' if config['skip_clean'] else ''}")
if config['dry_run']:
print("\n[注意] 当前为预览模式,不会实际更新数据库")
# 确认
if not config.get('dry_run'):
print("\n[警告] 此操作将清理指定租户下的旧数据并重新同步")
confirm = input("确认执行?[yes/N]: ").strip().lower()
if confirm != 'yes':
print("已取消")
return
# 连接数据库
print_section("连接数据库")
conn = test_db_connection(config)
if not conn:
return
print_result(True, "数据库连接成功")
try:
tenant_id = config['tenant_id']
dry_run = config['dry_run']
skip_clean = config['skip_clean']
results = {}
# 1. 扫描本地模板
print_section("扫描本地模板")
local_templates = scan_local_templates()
print_result(True, f"找到 {len(local_templates)} 个本地模板文件")
# 2. 清理旧数据
if not skip_clean:
clean_result = clean_old_data(conn, tenant_id, local_templates, dry_run)
results['clean'] = clean_result
else:
print_section("跳过清理步骤")
print(" 已跳过清理步骤")
# 3. 同步模板层级结构
hierarchy_result = sync_template_hierarchy(conn, tenant_id, dry_run)
results['hierarchy'] = hierarchy_result
# 4. 同步字段关联关系
fields_result = sync_field_relations(conn, tenant_id, dry_run)
results['fields'] = fields_result
# 5. 总结
print_section("同步完成")
if config['dry_run']:
print(" 本次为预览模式,未实际更新数据库")
else:
print(" 数据库已更新")
if 'clean' in results:
c = results['clean']
print(f"\n 清理结果:")
print(f" - 总模板数: {c['total']}")
print(f" - 删除模板: {c['deleted']}")
print(f" * MinIO路径: {c['minio_paths']}")
print(f" * 无效路径: {c['invalid_paths']}")
print(f" * 重复路径: {c['duplicate_paths']}")
if 'hierarchy' in results and results['hierarchy']:
h = results['hierarchy']
print(f"\n 层级结构:")
print(f" - 创建目录: {h['directories_created']}")
print(f" - 更新目录: {h['directories_updated']}")
print(f" - 创建文件: {h['files_created']}")
print(f" - 更新文件: {h['files_updated']}")
if 'fields' in results and results['fields']:
f = results['fields']
print(f"\n 字段关联:")
print(f" - 总模板数: {f['total_templates']}")
print(f" - 已处理: {f['updated']}")
print(f" - 发现的占位符: {f['placeholders_found']}")
print(f" - 创建的字段: {f['fields_created']}")
finally:
conn.close()
print_result(True, "数据库连接已关闭")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n[中断] 用户取消操作")
sys.exit(0)
except Exception as e:
print(f"\n[错误] 发生异常: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(1)

View File

@ -0,0 +1,102 @@
"""
修复document_service.py中的tenant_id查询问题
问题get_file_config_by_id方法没有检查tenant_id导致查询可能失败
解决方案在查询中添加tenant_id检查
"""
import re
from pathlib import Path
def fix_document_service():
"""修复document_service.py中的查询逻辑"""
file_path = Path("services/document_service.py")
if not file_path.exists():
print(f"[错误] 文件不存在: {file_path}")
return False
# 读取文件
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
# 查找get_file_config_by_id方法
pattern = r'(def get_file_config_by_id\(self, file_id: int\) -> Optional\[Dict\]:.*?)(\s+sql = """.*?WHERE id = %s\s+AND state = 1\s+""".*?cursor\.execute\(sql, \(file_id,\)\))'
match = re.search(pattern, content, re.DOTALL)
if not match:
print("[错误] 未找到get_file_config_by_id方法或查询语句")
return False
old_code = match.group(0)
# 检查是否已经包含tenant_id
if 'tenant_id' in old_code:
print("[信息] 查询已经包含tenant_id检查无需修复")
return True
# 生成新的代码
new_sql = ''' sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE id = %s
AND tenant_id = %s
AND state = 1
"""
# 获取tenant_id从环境变量或请求中获取
tenant_id = self.tenant_id if self.tenant_id else os.getenv('TENANT_ID', '1')
try:
tenant_id = int(tenant_id)
except (ValueError, TypeError):
tenant_id = 1 # 默认值
cursor.execute(sql, (file_id, tenant_id))'''
# 替换
new_code = re.sub(
r'sql = """.*?WHERE id = %s\s+AND state = 1\s+""".*?cursor\.execute\(sql, \(file_id,\)\)',
new_sql,
old_code,
flags=re.DOTALL
)
new_content = content.replace(old_code, new_code)
# 检查是否需要导入os
if 'import os' not in new_content and 'os.getenv' in new_content:
# 在文件开头添加import os如果还没有
if 'from dotenv import load_dotenv' in new_content:
new_content = new_content.replace('from dotenv import load_dotenv', 'from dotenv import load_dotenv\nimport os')
elif 'import pymysql' in new_content:
new_content = new_content.replace('import pymysql', 'import pymysql\nimport os')
else:
# 在文件开头添加
lines = new_content.split('\n')
import_line = 0
for i, line in enumerate(lines):
if line.startswith('import ') or line.startswith('from '):
import_line = i + 1
lines.insert(import_line, 'import os')
new_content = '\n'.join(lines)
# 写回文件
with open(file_path, 'w', encoding='utf-8') as f:
f.write(new_content)
print("[成功] 已修复get_file_config_by_id方法添加了tenant_id检查")
return True
if __name__ == "__main__":
print("="*70)
print("修复document_service.py中的tenant_id查询问题")
print("="*70)
if fix_document_service():
print("\n修复完成!")
print("\n注意:")
print("1. 请确保.env文件中配置了TENANT_ID")
print("2. 或者确保应用程序在调用时正确传递tenant_id")
print("3. 建议在app.py中从请求中获取tenant_id并传递给document_service")
else:
print("\n修复失败,请手动检查代码")

View File

@ -79,12 +79,13 @@ class DocumentService:
secure=self.minio_config['secure'] secure=self.minio_config['secure']
) )
def get_file_config_by_id(self, file_id: int) -> Optional[Dict]: def get_file_config_by_id(self, file_id: int, tenant_id: Optional[int] = None) -> Optional[Dict]:
""" """
根据文件ID获取文件配置 根据文件ID获取文件配置
Args: Args:
file_id: 文件配置ID file_id: 文件配置ID
tenant_id: 租户ID如果为None则从环境变量获取或使用默认值1
Returns: Returns:
文件配置信息包含: id, name, file_path 文件配置信息包含: id, name, file_path
@ -93,13 +94,23 @@ class DocumentService:
cursor = conn.cursor(pymysql.cursors.DictCursor) cursor = conn.cursor(pymysql.cursors.DictCursor)
try: try:
# 获取tenant_id
if tenant_id is None:
# 尝试从环境变量获取
tenant_id_str = os.getenv('TENANT_ID', '1')
try:
tenant_id = int(tenant_id_str)
except (ValueError, TypeError):
tenant_id = 1 # 默认值
sql = """ sql = """
SELECT id, name, file_path SELECT id, name, file_path
FROM f_polic_file_config FROM f_polic_file_config
WHERE id = %s WHERE id = %s
AND tenant_id = %s
AND state = 1 AND state = 1
""" """
cursor.execute(sql, (file_id,)) cursor.execute(sql, (file_id, tenant_id))
config = cursor.fetchone() config = cursor.fetchone()
if config: if config:
@ -899,7 +910,7 @@ class DocumentService:
except S3Error as e: except S3Error as e:
raise Exception(f"上传文件到MinIO失败: {str(e)}") raise Exception(f"上传文件到MinIO失败: {str(e)}")
def generate_document(self, file_id: int, input_data: List[Dict], file_info: Dict) -> Dict: def generate_document(self, file_id: int, input_data: List[Dict], file_info: Dict, tenant_id: Optional[int] = None) -> Dict:
""" """
生成文档 生成文档
@ -907,12 +918,13 @@ class DocumentService:
file_id: 文件配置ID file_id: 文件配置ID
input_data: 输入数据列表格式: [{'fieldCode': 'xxx', 'fieldValue': 'xxx'}] input_data: 输入数据列表格式: [{'fieldCode': 'xxx', 'fieldValue': 'xxx'}]
file_info: 文件信息格式: {'fileId': 1, 'fileName': 'xxx.doc'} file_info: 文件信息格式: {'fileId': 1, 'fileName': 'xxx.doc'}
tenant_id: 租户ID如果为None则从环境变量获取或使用默认值1
Returns: Returns:
生成结果包含: filePath 生成结果包含: filePath
""" """
# 获取文件配置 # 获取文件配置
file_config = self.get_file_config_by_id(file_id) file_config = self.get_file_config_by_id(file_id, tenant_id)
if not file_config: if not file_config:
# 提供更详细的错误信息 # 提供更详细的错误信息
raise Exception( raise Exception(

View File

@ -0,0 +1,779 @@
"""
跨数据库同步模板字段和关联关系
功能
1. .env文件读取源数据库配置
2. 同步到目标数据库10.100.31.21
3. 处理ID映射关系两个数据库的ID不同
4. 根据业务逻辑name, filed_code, file_path匹配数据
使用方法
python sync_templates_between_databases.py --target-host 10.100.31.21 --target-port 3306 --target-user finyx --target-password FknJYz3FA5WDYtsd --target-database finyx --target-tenant-id 1
"""
import os
import sys
import pymysql
import argparse
from pathlib import Path
from typing import Dict, List, Set, Optional, Tuple
from dotenv import load_dotenv
# 设置输出编码为UTF-8Windows兼容
if sys.platform == 'win32':
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# 加载环境变量
load_dotenv()
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def generate_id():
"""生成ID"""
import time
return int(time.time() * 1000000)
def get_source_db_config() -> Dict:
"""从.env文件读取源数据库配置"""
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')
if not all([db_host, db_port, db_user, db_password, db_name]):
raise ValueError(
"源数据库配置不完整,请在.env文件中配置以下环境变量\n"
"DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME"
)
return {
'host': db_host,
'port': int(db_port),
'user': db_user,
'password': db_password,
'database': db_name,
'charset': 'utf8mb4'
}
def get_target_db_config_from_args() -> Dict:
"""从命令行参数获取目标数据库配置"""
parser = argparse.ArgumentParser(
description='跨数据库同步模板、字段和关联关系',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例
python sync_templates_between_databases.py --target-host 10.100.31.21 --target-port 3306 --target-user finyx --target-password FknJYz3FA5WDYtsd --target-database finyx --target-tenant-id 1
"""
)
parser.add_argument('--target-host', type=str, required=True, help='目标MySQL服务器地址')
parser.add_argument('--target-port', type=int, required=True, help='目标MySQL服务器端口')
parser.add_argument('--target-user', type=str, required=True, help='目标MySQL用户名')
parser.add_argument('--target-password', type=str, required=True, help='目标MySQL密码')
parser.add_argument('--target-database', type=str, required=True, help='目标数据库名称')
parser.add_argument('--target-tenant-id', type=int, required=True, help='目标租户ID')
parser.add_argument('--source-tenant-id', type=int, help='源租户ID如果不指定将使用数据库中的第一个tenant_id')
parser.add_argument('--dry-run', action='store_true', help='预览模式(不实际更新数据库)')
args = parser.parse_args()
return {
'host': args.target_host,
'port': args.target_port,
'user': args.target_user,
'password': args.target_password,
'database': args.target_database,
'charset': 'utf8mb4',
'tenant_id': args.target_tenant_id,
'source_tenant_id': args.source_tenant_id,
'dry_run': args.dry_run
}
def test_db_connection(config: Dict, label: str) -> Optional[pymysql.Connection]:
"""测试数据库连接"""
try:
conn = pymysql.connect(
host=config['host'],
port=config['port'],
user=config['user'],
password=config['password'],
database=config['database'],
charset=config['charset']
)
return conn
except Exception as e:
print_result(False, f"{label}数据库连接失败: {str(e)}")
return None
def get_source_tenant_id(conn) -> int:
"""获取源数据库中的tenant_id"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1")
result = cursor.fetchone()
if result:
return result['tenant_id']
return 1
finally:
cursor.close()
def read_source_fields(conn, tenant_id: int) -> Tuple[Dict[str, Dict], Dict[str, Dict]]:
"""
从源数据库读取字段数据
Returns:
(input_fields_dict, output_fields_dict)
key: filed_code, value: 字段信息
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, tenant_id, name, filed_code, field_type, state
FROM f_polic_field
WHERE tenant_id = %s
AND state = 1
ORDER BY field_type, filed_code
"""
cursor.execute(sql, (tenant_id,))
fields = cursor.fetchall()
input_fields = {}
output_fields = {}
for field in fields:
field_info = {
'id': field['id'],
'tenant_id': field['tenant_id'],
'name': field['name'],
'filed_code': field['filed_code'],
'field_type': field['field_type'],
'state': field['state']
}
if field['field_type'] == 1:
input_fields[field['filed_code']] = field_info
elif field['field_type'] == 2:
output_fields[field['filed_code']] = field_info
return input_fields, output_fields
finally:
cursor.close()
def read_source_templates(conn, tenant_id: int) -> Dict[str, Dict]:
"""
从源数据库读取模板数据
Returns:
key: file_path (如果为空则使用name), value: 模板信息
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, tenant_id, parent_id, name, file_path, state
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
ORDER BY file_path, name
"""
cursor.execute(sql, (tenant_id,))
templates = cursor.fetchall()
result = {}
for template in templates:
# 使用file_path作为key如果没有file_path则使用name
key = template['file_path'] if template['file_path'] else f"DIR:{template['name']}"
result[key] = {
'id': template['id'],
'tenant_id': template['tenant_id'],
'parent_id': template['parent_id'],
'name': template['name'],
'file_path': template['file_path'],
'state': template['state']
}
return result
finally:
cursor.close()
def read_source_relations(conn, tenant_id: int) -> Dict[int, List[int]]:
"""
从源数据库读取字段关联关系
Returns:
key: file_id, value: [filed_id列表]
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT file_id, filed_id
FROM f_polic_file_field
WHERE tenant_id = %s
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
relations = cursor.fetchall()
result = {}
for rel in relations:
file_id = rel['file_id']
filed_id = rel['filed_id']
if file_id not in result:
result[file_id] = []
result[file_id].append(filed_id)
return result
finally:
cursor.close()
def sync_fields_to_target(conn, tenant_id: int, source_input_fields: Dict, source_output_fields: Dict,
dry_run: bool = False) -> Tuple[Dict[int, int], Dict[int, int]]:
"""
同步字段到目标数据库
Returns:
(input_field_id_map, output_field_id_map)
key: 源字段ID, value: 目标字段ID
"""
print_section("同步字段到目标数据库")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 1. 获取目标数据库中的现有字段
cursor.execute("""
SELECT id, filed_code, field_type
FROM f_polic_field
WHERE tenant_id = %s
AND state = 1
""", (tenant_id,))
existing_fields = cursor.fetchall()
existing_by_code = {}
for field in existing_fields:
key = (field['filed_code'], field['field_type'])
existing_by_code[key] = field['id']
print(f" 目标数据库现有字段: {len(existing_fields)}")
# 2. 同步输入字段
print("\n 同步输入字段...")
input_field_id_map = {}
input_created = 0
input_matched = 0
for code, source_field in source_input_fields.items():
key = (code, 1)
if key in existing_by_code:
# 字段已存在使用现有ID
target_id = existing_by_code[key]
input_field_id_map[source_field['id']] = target_id
input_matched += 1
else:
# 创建新字段
target_id = generate_id()
input_field_id_map[source_field['id']] = target_id
if not dry_run:
insert_cursor = conn.cursor()
try:
insert_cursor.execute("""
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
target_id,
tenant_id,
source_field['name'],
source_field['filed_code'],
1,
CREATED_BY,
UPDATED_BY
))
conn.commit()
input_created += 1
finally:
insert_cursor.close()
else:
input_created += 1
print(f" 匹配: {input_matched} 个,创建: {input_created}")
# 3. 同步输出字段
print("\n 同步输出字段...")
output_field_id_map = {}
output_created = 0
output_matched = 0
for code, source_field in source_output_fields.items():
key = (code, 2)
if key in existing_by_code:
# 字段已存在使用现有ID
target_id = existing_by_code[key]
output_field_id_map[source_field['id']] = target_id
output_matched += 1
else:
# 创建新字段
target_id = generate_id()
output_field_id_map[source_field['id']] = target_id
if not dry_run:
insert_cursor = conn.cursor()
try:
insert_cursor.execute("""
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
target_id,
tenant_id,
source_field['name'],
source_field['filed_code'],
2,
CREATED_BY,
UPDATED_BY
))
conn.commit()
output_created += 1
finally:
insert_cursor.close()
else:
output_created += 1
print(f" 匹配: {output_matched} 个,创建: {output_created}")
return input_field_id_map, output_field_id_map
finally:
cursor.close()
def sync_templates_to_target(conn, tenant_id: int, source_templates: Dict,
dry_run: bool = False) -> Dict[int, int]:
"""
同步模板到目标数据库
Returns:
template_id_map: key: 源模板ID, value: 目标模板ID
"""
print_section("同步模板到目标数据库")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 1. 获取目标数据库中的现有模板
cursor.execute("""
SELECT id, name, file_path, parent_id
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
""", (tenant_id,))
existing_templates = cursor.fetchall()
existing_by_path = {}
existing_by_name = {}
for template in existing_templates:
if template['file_path']:
existing_by_path[template['file_path']] = template
else:
# 目录节点
name = template['name']
if name not in existing_by_name:
existing_by_name[name] = []
existing_by_name[name].append(template)
print(f" 目标数据库现有模板: {len(existing_templates)}")
# 2. 先处理目录节点(按层级顺序)
print("\n 同步目录节点...")
template_id_map = {}
dir_created = 0
dir_matched = 0
# 分离目录和文件
dir_templates = {}
file_templates = {}
for key, source_template in source_templates.items():
if source_template['file_path']:
file_templates[key] = source_template
else:
dir_templates[key] = source_template
# 构建目录层级关系(需要先处理父目录)
# 按parent_id分组先处理没有parent_id的再处理有parent_id的
dirs_by_level = {}
for key, source_template in dir_templates.items():
level = 0
current = source_template
while current.get('parent_id'):
level += 1
# 查找父目录
parent_found = False
for t in dir_templates.values():
if t['id'] == current['parent_id']:
current = t
parent_found = True
break
if not parent_found:
break
if level not in dirs_by_level:
dirs_by_level[level] = []
dirs_by_level[level].append((key, source_template))
# 按层级顺序处理目录
for level in sorted(dirs_by_level.keys()):
for key, source_template in dirs_by_level[level]:
source_id = source_template['id']
name = source_template['name']
# 查找匹配的目录通过名称和parent_id
matched = None
target_parent_id = None
if source_template['parent_id']:
target_parent_id = template_id_map.get(source_template['parent_id'])
for existing in existing_by_name.get(name, []):
if not existing['file_path']: # 确保是目录节点
# 检查parent_id是否匹配
if existing['parent_id'] == target_parent_id:
matched = existing
break
if matched:
target_id = matched['id']
template_id_map[source_id] = target_id
dir_matched += 1
else:
target_id = generate_id()
template_id_map[source_id] = target_id
if not dry_run:
insert_cursor = conn.cursor()
try:
insert_cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NULL, NOW(), %s, NOW(), %s, 1)
""", (
target_id,
tenant_id,
target_parent_id,
name,
CREATED_BY,
UPDATED_BY
))
conn.commit()
dir_created += 1
finally:
insert_cursor.close()
else:
dir_created += 1
print(f" 匹配: {dir_matched} 个,创建: {dir_created}")
# 3. 处理文件节点
print("\n 同步文件节点...")
file_created = 0
file_matched = 0
file_updated = 0
for key, source_template in file_templates.items():
source_id = source_template['id']
file_path = source_template['file_path']
name = source_template['name']
# 通过file_path匹配
matched = existing_by_path.get(file_path)
if matched:
target_id = matched['id']
template_id_map[source_id] = target_id
file_matched += 1
# 检查是否需要更新
target_parent_id = None
if source_template['parent_id']:
target_parent_id = template_id_map.get(source_template['parent_id'])
if matched['parent_id'] != target_parent_id or matched['name'] != name:
file_updated += 1
if not dry_run:
update_cursor = conn.cursor()
try:
update_cursor.execute("""
UPDATE f_polic_file_config
SET parent_id = %s, name = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s AND tenant_id = %s
""", (target_parent_id, name, UPDATED_BY, target_id, tenant_id))
conn.commit()
finally:
update_cursor.close()
else:
target_id = generate_id()
template_id_map[source_id] = target_id
if not dry_run:
insert_cursor = conn.cursor()
try:
# 处理parent_id映射
target_parent_id = None
if source_template['parent_id']:
target_parent_id = template_id_map.get(source_template['parent_id'])
insert_cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
target_id,
tenant_id,
target_parent_id,
name,
file_path,
CREATED_BY,
UPDATED_BY
))
conn.commit()
file_created += 1
finally:
insert_cursor.close()
else:
file_created += 1
print(f" 匹配: {file_matched} 个,创建: {file_created} 个,更新: {file_updated}")
return template_id_map
finally:
cursor.close()
def sync_relations_to_target(conn, tenant_id: int, source_relations: Dict[int, List[int]],
template_id_map: Dict[int, int],
input_field_id_map: Dict[int, int],
output_field_id_map: Dict[int, int],
dry_run: bool = False):
"""同步字段关联关系到目标数据库"""
print_section("同步字段关联关系到目标数据库")
# 1. 清理现有关联关系
print("1. 清理现有关联关系...")
if not dry_run:
cursor = conn.cursor()
try:
cursor.execute("""
DELETE FROM f_polic_file_field
WHERE tenant_id = %s
""", (tenant_id,))
deleted_count = cursor.rowcount
conn.commit()
print_result(True, f"删除了 {deleted_count} 条旧关联关系")
finally:
cursor.close()
else:
print(" [预览模式] 将清理所有现有关联关系")
# 2. 创建新的关联关系
print("\n2. 创建新的关联关系...")
all_field_id_map = {**input_field_id_map, **output_field_id_map}
relations_created = 0
relations_skipped = 0
for source_file_id, source_field_ids in source_relations.items():
# 获取目标file_id
target_file_id = template_id_map.get(source_file_id)
if not target_file_id:
relations_skipped += 1
continue
# 转换field_id
target_field_ids = []
for source_field_id in source_field_ids:
target_field_id = all_field_id_map.get(source_field_id)
if target_field_id:
target_field_ids.append(target_field_id)
if not target_field_ids:
continue
# 创建关联关系
if not dry_run:
cursor = conn.cursor()
try:
for target_field_id in target_field_ids:
relation_id = generate_id()
cursor.execute("""
INSERT INTO f_polic_file_field
(id, tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
relation_id,
tenant_id,
target_file_id,
target_field_id,
CREATED_BY,
UPDATED_BY
))
conn.commit()
relations_created += len(target_field_ids)
except Exception as e:
conn.rollback()
print(f" [错误] 创建关联关系失败: {str(e)}")
finally:
cursor.close()
else:
relations_created += len(target_field_ids)
print_result(True, f"创建了 {relations_created} 条关联关系,跳过 {relations_skipped} 个模板")
return {
'created': relations_created,
'skipped': relations_skipped
}
def main():
"""主函数"""
print_section("跨数据库同步模板、字段和关联关系")
# 1. 获取源数据库配置(从.env
print_section("读取源数据库配置")
try:
source_config = get_source_db_config()
print_result(True, f"源数据库: {source_config['host']}:{source_config['port']}/{source_config['database']}")
except Exception as e:
print_result(False, str(e))
return
# 2. 获取目标数据库配置(从命令行参数)
print_section("读取目标数据库配置")
target_config = get_target_db_config_from_args()
print_result(True, f"目标数据库: {target_config['host']}:{target_config['port']}/{target_config['database']}")
print(f" 目标租户ID: {target_config['tenant_id']}")
if target_config['dry_run']:
print("\n[注意] 当前为预览模式,不会实际更新数据库")
# 3. 连接数据库
print_section("连接数据库")
source_conn = test_db_connection(source_config, "")
if not source_conn:
return
target_conn = test_db_connection(target_config, "目标")
if not target_conn:
source_conn.close()
return
print_result(True, "数据库连接成功")
try:
# 4. 获取源租户ID
source_tenant_id = target_config.get('source_tenant_id')
if not source_tenant_id:
source_tenant_id = get_source_tenant_id(source_conn)
print(f"\n源租户ID: {source_tenant_id}")
# 5. 读取源数据
print_section("读取源数据库数据")
print(" 读取字段...")
source_input_fields, source_output_fields = read_source_fields(source_conn, source_tenant_id)
print_result(True, f"输入字段: {len(source_input_fields)} 个,输出字段: {len(source_output_fields)}")
print("\n 读取模板...")
source_templates = read_source_templates(source_conn, source_tenant_id)
print_result(True, f"模板总数: {len(source_templates)}")
print("\n 读取关联关系...")
source_relations = read_source_relations(source_conn, source_tenant_id)
print_result(True, f"关联关系: {len(source_relations)} 个模板有字段关联")
# 6. 同步到目标数据库
target_tenant_id = target_config['tenant_id']
dry_run = target_config['dry_run']
# 6.1 同步字段
input_field_id_map, output_field_id_map = sync_fields_to_target(
target_conn, target_tenant_id,
source_input_fields, source_output_fields,
dry_run
)
# 6.2 同步模板
template_id_map = sync_templates_to_target(
target_conn, target_tenant_id,
source_templates,
dry_run
)
# 6.3 同步关联关系
relations_result = sync_relations_to_target(
target_conn, target_tenant_id,
source_relations,
template_id_map,
input_field_id_map,
output_field_id_map,
dry_run
)
# 7. 总结
print_section("同步完成")
if dry_run:
print(" 本次为预览模式,未实际更新数据库")
else:
print(" 数据库已更新")
print(f"\n 同步统计:")
print(f" - 输入字段: {len(input_field_id_map)}")
print(f" - 输出字段: {len(output_field_id_map)}")
print(f" - 模板: {len(template_id_map)}")
print(f" - 关联关系: {relations_result['created']}")
finally:
source_conn.close()
target_conn.close()
print_result(True, "数据库连接已关闭")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n[中断] 用户取消操作")
sys.exit(0)
except Exception as e:
print(f"\n[错误] 发生异常: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(1)

View File

@ -0,0 +1,562 @@
"""
更新所有模板的字段关联关系
1. 输入字段所有模板都关联 clue_info target_basic_info_clue
2. 输出字段根据模板中的占位符自动关联对应的输出字段
"""
import os
import pymysql
from pathlib import Path
from typing import Dict, List, Set, Optional
from dotenv import load_dotenv
import re
from docx import Document
# 加载环境变量
load_dotenv()
# 数据库配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def generate_id():
"""生成ID"""
import time
return int(time.time() * 1000000)
def get_actual_tenant_id(conn) -> int:
"""获取数据库中的实际tenant_id"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1")
result = cursor.fetchone()
if result:
return result['tenant_id']
return 1
finally:
cursor.close()
def get_input_fields(conn, tenant_id: int) -> Dict[str, int]:
"""
获取输入字段clue_info target_basic_info_clue
Returns:
字典key为field_codevalue为field_id
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, filed_code, name
FROM f_polic_field
WHERE tenant_id = %s
AND field_type = 1
AND filed_code IN ('clue_info', 'target_basic_info_clue')
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
fields = cursor.fetchall()
result = {}
for field in fields:
result[field['filed_code']] = field['id']
print(f" 输入字段: {field['name']} ({field['filed_code']}) - ID: {field['id']}")
return result
finally:
cursor.close()
def get_output_fields(conn, tenant_id: int) -> Dict[str, int]:
"""
获取所有输出字段
Returns:
字典key为filed_codevalue为field_id
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, filed_code, name
FROM f_polic_field
WHERE tenant_id = %s
AND field_type = 2
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
fields = cursor.fetchall()
result = {}
for field in fields:
result[field['filed_code']] = field['id']
return result
finally:
cursor.close()
def extract_placeholders_from_docx(file_path: Path) -> Set[str]:
"""从docx文件中提取所有占位符"""
placeholders = set()
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
try:
doc = Document(file_path)
# 从段落中提取
for paragraph in doc.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
# 从表格中提取
for table in doc.tables:
try:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
except:
continue
except Exception as e:
print(f" [错误] 读取文件失败: {str(e)}")
return placeholders
def get_all_templates(conn, tenant_id: int) -> List[Dict]:
"""获取所有模板(只获取文件节点,不包括目录节点)"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND file_path IS NOT NULL
AND file_path != ''
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
templates = cursor.fetchall()
return templates
finally:
cursor.close()
def get_existing_relations(conn, tenant_id: int, file_id: int) -> Set[int]:
"""获取模板的现有关联关系"""
cursor = conn.cursor()
try:
sql = """
SELECT filed_id
FROM f_polic_file_field
WHERE tenant_id = %s
AND file_id = %s
AND state = 1
"""
cursor.execute(sql, (tenant_id, file_id))
results = cursor.fetchall()
return {row[0] for row in results}
finally:
cursor.close()
def update_template_field_relations(conn, tenant_id: int, file_id: int, file_name: str,
input_field_ids: List[int], output_field_ids: List[int],
dry_run: bool = False):
"""
更新模板的字段关联关系
Args:
conn: 数据库连接
tenant_id: 租户ID
file_id: 模板ID
file_name: 模板名称
input_field_ids: 输入字段ID列表
output_field_ids: 输出字段ID列表
dry_run: 是否只是预览不实际更新
"""
cursor = conn.cursor()
try:
all_field_ids = set(input_field_ids + output_field_ids)
if not all_field_ids:
print(f" [跳过] {file_name}: 没有字段需要关联")
return
# 获取现有关联
existing_field_ids = get_existing_relations(conn, tenant_id, file_id)
# 需要添加的字段
to_add = all_field_ids - existing_field_ids
# 需要删除的字段(如果某个字段不在新列表中,但存在于现有关联中,且不是必需的输入字段,则删除)
# 注意:我们保留所有现有关联,只添加新的
to_remove = existing_field_ids - all_field_ids
if not to_add and not to_remove:
print(f" [保持] {file_name}: 关联关系已是最新")
return
if dry_run:
print(f" [预览] {file_name}:")
if to_add:
print(f" 将添加: {len(to_add)} 个字段")
if to_remove:
print(f" 将删除: {len(to_remove)} 个字段")
return
# 删除需要移除的关联
if to_remove:
placeholders = ','.join(['%s'] * len(to_remove))
delete_sql = f"""
DELETE FROM f_polic_file_field
WHERE tenant_id = %s
AND file_id = %s
AND filed_id IN ({placeholders})
"""
cursor.execute(delete_sql, [tenant_id, file_id] + list(to_remove))
deleted_count = cursor.rowcount
# 添加新的关联
added_count = 0
for field_id in to_add:
# 检查是否已存在(防止重复)
check_sql = """
SELECT id FROM f_polic_file_field
WHERE tenant_id = %s AND file_id = %s AND filed_id = %s
"""
cursor.execute(check_sql, (tenant_id, file_id, field_id))
if cursor.fetchone():
continue
relation_id = generate_id()
insert_sql = """
INSERT INTO f_polic_file_field
(id, tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
relation_id,
tenant_id,
file_id,
field_id,
CREATED_BY,
UPDATED_BY
))
added_count += 1
conn.commit()
action_parts = []
if added_count > 0:
action_parts.append(f"添加 {added_count}")
if to_remove and deleted_count > 0:
action_parts.append(f"删除 {deleted_count}")
if action_parts:
print(f" [更新] {file_name}: {', '.join(action_parts)}")
except Exception as e:
conn.rollback()
print(f" [错误] {file_name}: 更新失败 - {str(e)}")
finally:
cursor.close()
def create_missing_output_field(conn, tenant_id: int, field_code: str) -> Optional[int]:
"""
如果输出字段不存在创建它
Returns:
字段ID如果创建失败则返回None
"""
cursor = conn.cursor()
try:
# 先检查是否已存在
check_cursor = conn.cursor(pymysql.cursors.DictCursor)
check_cursor.execute("""
SELECT id FROM f_polic_field
WHERE tenant_id = %s AND filed_code = %s
""", (tenant_id, field_code))
existing = check_cursor.fetchone()
check_cursor.close()
if existing:
return existing['id']
# 创建新字段
field_id = generate_id()
field_name = field_code.replace('_', ' ') # 简单的名称生成
insert_sql = """
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
field_id,
tenant_id,
field_name,
field_code,
2, # field_type=2 表示输出字段
CREATED_BY,
UPDATED_BY
))
conn.commit()
print(f" [创建字段] {field_code} (ID: {field_id})")
return field_id
except Exception as e:
conn.rollback()
print(f" [错误] 创建字段失败 {field_code}: {str(e)}")
return None
finally:
cursor.close()
def main():
"""主函数"""
print_section("更新所有模板的字段关联关系")
# 1. 连接数据库
print_section("1. 连接数据库")
try:
conn = pymysql.connect(**DB_CONFIG)
print_result(True, "数据库连接成功")
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return
try:
# 2. 获取实际的tenant_id
print_section("2. 获取实际的tenant_id")
tenant_id = get_actual_tenant_id(conn)
print_result(True, f"实际tenant_id: {tenant_id}")
# 3. 获取输入字段
print_section("3. 获取输入字段")
input_fields = get_input_fields(conn, tenant_id)
if not input_fields:
print_result(False, "未找到输入字段 clue_info 和 target_basic_info_clue")
print(" 将尝试创建这些字段...")
# 创建缺失的输入字段
for field_code in ['clue_info', 'target_basic_info_clue']:
field_id = create_missing_input_field(conn, tenant_id, field_code)
if field_id:
input_fields[field_code] = field_id
if not input_fields:
print_result(False, "无法获取或创建输入字段,终止操作")
return
input_field_ids = list(input_fields.values())
print_result(True, f"找到 {len(input_field_ids)} 个输入字段")
# 4. 获取输出字段
print_section("4. 获取输出字段")
output_fields = get_output_fields(conn, tenant_id)
print_result(True, f"找到 {len(output_fields)} 个输出字段")
# 5. 获取所有模板
print_section("5. 获取所有模板")
templates = get_all_templates(conn, tenant_id)
print_result(True, f"找到 {len(templates)} 个模板")
if not templates:
print_result(False, "未找到模板")
return
# 6. 扫描模板占位符并更新关联关系
print_section("6. 扫描模板占位符并更新关联关系")
total_updated = 0
total_kept = 0
total_errors = 0
all_placeholders_found = set()
missing_fields = set()
for i, template in enumerate(templates, 1):
template_id = template['id']
template_name = template['name']
file_path = template['file_path']
if i % 10 == 0:
print(f" 处理进度: {i}/{len(templates)}")
# 检查本地文件是否存在
local_file = PROJECT_ROOT / file_path
if not local_file.exists():
print(f" [跳过] {template_name}: 文件不存在 - {file_path}")
total_errors += 1
continue
# 提取占位符
placeholders = extract_placeholders_from_docx(local_file)
all_placeholders_found.update(placeholders)
# 根据占位符找到对应的输出字段ID
output_field_ids = []
for placeholder in placeholders:
if placeholder in output_fields:
output_field_ids.append(output_fields[placeholder])
else:
# 字段不存在,尝试创建
missing_fields.add(placeholder)
field_id = create_missing_output_field(conn, tenant_id, placeholder)
if field_id:
output_fields[placeholder] = field_id
output_field_ids.append(field_id)
# 更新关联关系
try:
update_template_field_relations(
conn, tenant_id, template_id, template_name,
input_field_ids, output_field_ids, dry_run=False
)
total_updated += 1
except Exception as e:
print(f" [错误] {template_name}: {str(e)}")
total_errors += 1
# 7. 统计结果
print_section("7. 更新结果统计")
print(f" 总模板数: {len(templates)}")
print(f" 已更新: {total_updated}")
print(f" 错误: {total_errors}")
print(f" 发现的占位符总数: {len(all_placeholders_found)}")
print(f" 缺失的字段(已创建): {len(missing_fields)}")
if missing_fields:
print(f"\n 创建的字段列表:")
for field_code in sorted(missing_fields):
print(f" - {field_code}")
# 8. 验证关联关系
print_section("8. 验证关联关系")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 统计有输入字段关联的模板
cursor.execute("""
SELECT COUNT(DISTINCT fff.file_id) as count
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id
WHERE fff.tenant_id = %s
AND f.field_type = 1
AND fff.state = 1
""", (tenant_id,))
templates_with_input = cursor.fetchone()['count']
print(f" 有输入字段关联的模板: {templates_with_input}")
# 统计有输出字段关联的模板
cursor.execute("""
SELECT COUNT(DISTINCT fff.file_id) as count
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id
WHERE fff.tenant_id = %s
AND f.field_type = 2
AND fff.state = 1
""", (tenant_id,))
templates_with_output = cursor.fetchone()['count']
print(f" 有输出字段关联的模板: {templates_with_output}")
# 统计总关联数
cursor.execute("""
SELECT COUNT(*) as count
FROM f_polic_file_field
WHERE tenant_id = %s
AND state = 1
""", (tenant_id,))
total_relations = cursor.fetchone()['count']
print(f" 总关联关系数: {total_relations}")
finally:
cursor.close()
finally:
conn.close()
print_result(True, "数据库连接已关闭")
print_section("完成")
def create_missing_input_field(conn, tenant_id: int, field_code: str) -> Optional[int]:
"""创建缺失的输入字段"""
cursor = conn.cursor()
try:
field_id = generate_id()
field_name_map = {
'clue_info': '线索信息',
'target_basic_info_clue': '被核查人基本信息(线索)'
}
field_name = field_name_map.get(field_code, field_code.replace('_', ' '))
insert_sql = """
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
field_id,
tenant_id,
field_name,
field_code,
1, # field_type=1 表示输入字段
CREATED_BY,
UPDATED_BY
))
conn.commit()
print(f" [创建输入字段] {field_code} ({field_name}) - ID: {field_id}")
return field_id
except Exception as e:
conn.rollback()
print(f" [错误] 创建输入字段失败 {field_code}: {str(e)}")
return None
finally:
cursor.close()
if __name__ == "__main__":
main()

928
update_templates_custom.py Normal file
View File

@ -0,0 +1,928 @@
"""
模板更新脚本 - 支持自定义数据库连接和租户ID配置
功能
1. 更新模板层级结构根据template_finish/目录结构
2. 更新模板字段关联关系输入字段和输出字段
使用方法
1. 命令行参数方式
python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1
2. 交互式输入方式
python update_templates_custom.py
"""
import os
import sys
import pymysql
import argparse
from pathlib import Path
from typing import Dict, List, Set, Optional
import re
from docx import Document
import getpass
# 设置输出编码为UTF-8Windows兼容
if sys.platform == 'win32':
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def generate_id():
"""生成ID"""
import time
return int(time.time() * 1000000)
def get_db_config_from_args() -> Optional[Dict]:
"""从命令行参数获取数据库配置"""
parser = argparse.ArgumentParser(
description='模板更新脚本 - 支持自定义数据库连接和租户ID',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例
# 使用命令行参数
python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1
# 使用交互式输入
python update_templates_custom.py
"""
)
parser.add_argument('--host', type=str, help='MySQL服务器地址')
parser.add_argument('--port', type=int, help='MySQL服务器端口')
parser.add_argument('--user', type=str, help='MySQL用户名')
parser.add_argument('--password', type=str, help='MySQL密码')
parser.add_argument('--database', type=str, help='数据库名称')
parser.add_argument('--tenant-id', type=int, help='租户ID')
parser.add_argument('--dry-run', action='store_true', help='预览模式(不实际更新数据库)')
parser.add_argument('--update-hierarchy', action='store_true', default=True, help='更新模板层级结构(默认启用)')
parser.add_argument('--update-fields', action='store_true', default=True, help='更新字段关联关系(默认启用)')
args = parser.parse_args()
# 如果所有参数都提供了,返回配置
if all([args.host, args.port, args.user, args.password, args.database, args.tenant_id]):
return {
'host': args.host,
'port': args.port,
'user': args.user,
'password': args.password,
'database': args.database,
'charset': 'utf8mb4',
'tenant_id': args.tenant_id,
'dry_run': args.dry_run,
'update_hierarchy': args.update_hierarchy,
'update_fields': args.update_fields
}
return None
def get_db_config_interactive() -> Dict:
"""交互式获取数据库配置"""
print_section("数据库连接配置")
print("请输入数据库连接信息(直接回车使用默认值):")
host = input("MySQL服务器地址 [152.136.177.240]: ").strip() or "152.136.177.240"
port_str = input("MySQL服务器端口 [5012]: ").strip() or "5012"
port = int(port_str) if port_str.isdigit() else 5012
user = input("MySQL用户名 [finyx]: ").strip() or "finyx"
password = getpass.getpass("MySQL密码 [留空使用默认]: ").strip()
if not password:
password = "6QsGK6MpePZDE57Z"
database = input("数据库名称 [finyx]: ").strip() or "finyx"
print("\n租户配置:")
tenant_id_str = input("租户ID (tenant_id) [必填]: ").strip()
if not tenant_id_str:
print("[错误] 租户ID不能为空")
sys.exit(1)
try:
tenant_id = int(tenant_id_str)
except ValueError:
print("[错误] 租户ID必须是数字")
sys.exit(1)
print("\n更新选项:")
update_hierarchy = input("更新模板层级结构?[Y/n]: ").strip().lower() != 'n'
update_fields = input("更新字段关联关系?[Y/n]: ").strip().lower() != 'n'
dry_run = input("预览模式(不实际更新)?[y/N]: ").strip().lower() == 'y'
return {
'host': host,
'port': port,
'user': user,
'password': password,
'database': database,
'charset': 'utf8mb4',
'tenant_id': tenant_id,
'dry_run': dry_run,
'update_hierarchy': update_hierarchy,
'update_fields': update_fields
}
def test_db_connection(config: Dict) -> Optional[pymysql.Connection]:
"""测试数据库连接"""
try:
conn = pymysql.connect(
host=config['host'],
port=config['port'],
user=config['user'],
password=config['password'],
database=config['database'],
charset=config['charset']
)
return conn
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return None
# ==================== 模板层级结构更新 ====================
def scan_directory_structure(base_dir: Path) -> Dict:
"""扫描目录结构"""
directories = []
files = []
def scan_recursive(current_path: Path, parent_path: Optional[str] = None):
"""递归扫描目录"""
if not current_path.exists() or not current_path.is_dir():
return
# 获取相对路径
rel_path = current_path.relative_to(base_dir)
rel_path_str = str(rel_path).replace('\\', '/')
# 添加目录节点
if rel_path_str != '.':
directories.append({
'name': current_path.name,
'path': rel_path_str,
'parent_path': parent_path
})
# 扫描子项
for item in sorted(current_path.iterdir()):
if item.is_dir():
scan_recursive(item, rel_path_str)
elif item.is_file() and item.suffix.lower() in ['.docx', '.doc']:
file_rel_path = item.relative_to(base_dir)
file_rel_path_str = str(file_rel_path).replace('\\', '/')
files.append({
'name': item.name,
'path': file_rel_path_str,
'parent_path': rel_path_str if rel_path_str != '.' else None
})
scan_recursive(base_dir)
return {
'directories': directories,
'files': files
}
def get_existing_templates(conn, tenant_id: int) -> Dict:
"""获取现有模板"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 获取所有模板(包括目录和文件)
cursor.execute("""
SELECT id, name, file_path, parent_id
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
""", (tenant_id,))
templates = cursor.fetchall()
result = {
'by_path': {}, # file_path -> template
'by_name': {}, # name -> [templates]
'by_id': {} # id -> template
}
for t in templates:
result['by_id'][t['id']] = t
if t['file_path']:
result['by_path'][t['file_path']] = t
else:
# 目录节点
name = t['name']
if name not in result['by_name']:
result['by_name'][name] = []
result['by_name'][name].append(t)
return result
finally:
cursor.close()
def create_or_update_directory(conn, tenant_id: int, name: str, parent_id: Optional[int],
existing_templates: Dict, dry_run: bool = False) -> int:
"""创建或更新目录节点"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 查找现有目录通过名称和parent_id匹配
candidates = existing_templates['by_name'].get(name, [])
existing = None
for candidate in candidates:
if candidate.get('parent_id') == parent_id and not candidate.get('file_path'):
existing = candidate
break
if existing:
# 更新现有目录
if not dry_run:
cursor.execute("""
UPDATE f_polic_file_config
SET parent_id = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s AND tenant_id = %s
""", (parent_id, UPDATED_BY, existing['id'], tenant_id))
conn.commit()
print(f" [更新目录] {name} (ID: {existing['id']})")
return existing['id']
else:
# 创建新目录
dir_id = generate_id()
if not dry_run:
cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NULL, NOW(), %s, NOW(), %s, 1)
""", (dir_id, tenant_id, parent_id, name, CREATED_BY, UPDATED_BY))
conn.commit()
print(f" [创建目录] {name} (ID: {dir_id})")
return dir_id
finally:
cursor.close()
def create_or_update_file(conn, tenant_id: int, file_info: Dict, parent_id: Optional[int],
existing_templates: Dict, dry_run: bool = False) -> int:
"""创建或更新文件节点"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
file_path = file_info['path']
file_name = file_info['name']
# 查找现有文件通过file_path匹配
existing = existing_templates['by_path'].get(file_path)
if existing:
# 更新现有文件
if not dry_run:
cursor.execute("""
UPDATE f_polic_file_config
SET parent_id = %s, name = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s AND tenant_id = %s
""", (parent_id, file_name, UPDATED_BY, existing['id'], tenant_id))
conn.commit()
print(f" [更新文件] {file_name} (ID: {existing['id']})")
return existing['id']
else:
# 创建新文件
file_id = generate_id()
if not dry_run:
cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (file_id, tenant_id, parent_id, file_name, file_path, CREATED_BY, UPDATED_BY))
conn.commit()
print(f" [创建文件] {file_name} (ID: {file_id})")
return file_id
finally:
cursor.close()
def update_template_hierarchy(conn, tenant_id: int, dry_run: bool = False):
"""更新模板层级结构"""
print_section("更新模板层级结构")
# 1. 扫描目录结构
print("1. 扫描目录结构...")
structure = scan_directory_structure(TEMPLATES_DIR)
print_result(True, f"找到 {len(structure['directories'])} 个目录,{len(structure['files'])} 个文件")
if not structure['directories'] and not structure['files']:
print_result(False, "未找到任何目录或文件")
return
# 2. 获取现有模板
print("\n2. 获取现有模板...")
existing_templates = get_existing_templates(conn, tenant_id)
print_result(True, f"找到 {len(existing_templates['by_path'])} 个文件模板,{len(existing_templates['by_name'])} 个目录模板")
# 3. 创建/更新目录节点
print("\n3. 创建/更新目录节点...")
path_to_id = {}
dir_created = 0
dir_updated = 0
for dir_info in structure['directories']:
parent_id = None
if dir_info['parent_path']:
parent_id = path_to_id.get(dir_info['parent_path'])
existing = None
candidates = existing_templates['by_name'].get(dir_info['name'], [])
for candidate in candidates:
if candidate.get('parent_id') == parent_id and not candidate.get('file_path'):
existing = candidate
break
if existing:
dir_id = existing['id']
if existing.get('parent_id') != parent_id:
dir_updated += 1
else:
dir_id = generate_id()
dir_created += 1
if not dry_run:
if existing and existing.get('parent_id') != parent_id:
cursor = conn.cursor()
cursor.execute("""
UPDATE f_polic_file_config
SET parent_id = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s AND tenant_id = %s
""", (parent_id, UPDATED_BY, dir_id, tenant_id))
conn.commit()
cursor.close()
elif not existing:
cursor = conn.cursor()
cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NULL, NOW(), %s, NOW(), %s, 1)
""", (dir_id, tenant_id, parent_id, dir_info['name'], CREATED_BY, UPDATED_BY))
conn.commit()
cursor.close()
path_to_id[dir_info['path']] = dir_id
print_result(True, f"创建 {dir_created} 个目录,更新 {dir_updated} 个目录")
# 4. 创建/更新文件节点
print("\n4. 创建/更新文件节点...")
file_created = 0
file_updated = 0
for file_info in structure['files']:
parent_id = None
if file_info['parent_path']:
parent_id = path_to_id.get(file_info['parent_path'])
existing = existing_templates['by_path'].get(file_info['path'])
if existing:
file_id = existing['id']
if existing.get('parent_id') != parent_id or existing.get('name') != file_info['name']:
file_updated += 1
else:
file_id = generate_id()
file_created += 1
if not dry_run:
if existing:
if existing.get('parent_id') != parent_id or existing.get('name') != file_info['name']:
cursor = conn.cursor()
cursor.execute("""
UPDATE f_polic_file_config
SET parent_id = %s, name = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s AND tenant_id = %s
""", (parent_id, file_info['name'], UPDATED_BY, file_id, tenant_id))
conn.commit()
cursor.close()
else:
cursor = conn.cursor()
cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (file_id, tenant_id, parent_id, file_info['name'], file_info['path'], CREATED_BY, UPDATED_BY))
conn.commit()
cursor.close()
print_result(True, f"创建 {file_created} 个文件,更新 {file_updated} 个文件")
return {
'directories_created': dir_created,
'directories_updated': dir_updated,
'files_created': file_created,
'files_updated': file_updated
}
# ==================== 字段关联关系更新 ====================
def get_input_fields(conn, tenant_id: int) -> Dict[str, int]:
"""获取输入字段"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, filed_code, name
FROM f_polic_field
WHERE tenant_id = %s
AND field_type = 1
AND filed_code IN ('clue_info', 'target_basic_info_clue')
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
fields = cursor.fetchall()
result = {}
for field in fields:
result[field['filed_code']] = field['id']
return result
finally:
cursor.close()
def get_output_fields(conn, tenant_id: int) -> Dict[str, int]:
"""获取所有输出字段"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, filed_code, name
FROM f_polic_field
WHERE tenant_id = %s
AND field_type = 2
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
fields = cursor.fetchall()
result = {}
for field in fields:
result[field['filed_code']] = field['id']
return result
finally:
cursor.close()
def extract_placeholders_from_docx(file_path: Path) -> Set[str]:
"""从docx文件中提取所有占位符"""
placeholders = set()
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
try:
doc = Document(file_path)
# 从段落中提取
for paragraph in doc.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
# 从表格中提取
for table in doc.tables:
try:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
except:
continue
except Exception as e:
pass # 静默处理错误
return placeholders
def get_all_templates(conn, tenant_id: int) -> List[Dict]:
"""获取所有模板(只获取文件节点)"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND file_path IS NOT NULL
AND file_path != ''
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
templates = cursor.fetchall()
return templates
finally:
cursor.close()
def get_existing_relations(conn, tenant_id: int, file_id: int) -> Set[int]:
"""获取模板的现有关联关系"""
cursor = conn.cursor()
try:
sql = """
SELECT filed_id
FROM f_polic_file_field
WHERE tenant_id = %s
AND file_id = %s
AND state = 1
"""
cursor.execute(sql, (tenant_id, file_id))
results = cursor.fetchall()
return {row[0] for row in results}
finally:
cursor.close()
def create_missing_input_field(conn, tenant_id: int, field_code: str) -> Optional[int]:
"""创建缺失的输入字段"""
cursor = conn.cursor()
try:
field_id = generate_id()
field_name_map = {
'clue_info': '线索信息',
'target_basic_info_clue': '被核查人基本信息(线索)'
}
field_name = field_name_map.get(field_code, field_code.replace('_', ' '))
insert_sql = """
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
field_id,
tenant_id,
field_name,
field_code,
1, # field_type=1 表示输入字段
CREATED_BY,
UPDATED_BY
))
conn.commit()
return field_id
except Exception as e:
conn.rollback()
return None
finally:
cursor.close()
def create_missing_output_field(conn, tenant_id: int, field_code: str) -> Optional[int]:
"""创建缺失的输出字段"""
cursor = conn.cursor()
try:
# 先检查是否已存在
check_cursor = conn.cursor(pymysql.cursors.DictCursor)
check_cursor.execute("""
SELECT id FROM f_polic_field
WHERE tenant_id = %s AND filed_code = %s
""", (tenant_id, field_code))
existing = check_cursor.fetchone()
check_cursor.close()
if existing:
return existing['id']
# 创建新字段
field_id = generate_id()
field_name = field_code.replace('_', ' ')
insert_sql = """
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
field_id,
tenant_id,
field_name,
field_code,
2, # field_type=2 表示输出字段
CREATED_BY,
UPDATED_BY
))
conn.commit()
return field_id
except Exception as e:
conn.rollback()
return None
finally:
cursor.close()
def update_template_field_relations(conn, tenant_id: int, file_id: int, file_name: str,
input_field_ids: List[int], output_field_ids: List[int],
dry_run: bool = False):
"""更新模板的字段关联关系"""
cursor = conn.cursor()
try:
all_field_ids = set(input_field_ids + output_field_ids)
if not all_field_ids:
return
# 获取现有关联
existing_field_ids = get_existing_relations(conn, tenant_id, file_id)
# 需要添加的字段
to_add = all_field_ids - existing_field_ids
# 需要删除的字段
to_remove = existing_field_ids - all_field_ids
if not to_add and not to_remove:
return
if dry_run:
return
# 删除需要移除的关联
if to_remove:
placeholders = ','.join(['%s'] * len(to_remove))
delete_sql = f"""
DELETE FROM f_polic_file_field
WHERE tenant_id = %s
AND file_id = %s
AND filed_id IN ({placeholders})
"""
cursor.execute(delete_sql, [tenant_id, file_id] + list(to_remove))
# 添加新的关联
for field_id in to_add:
# 检查是否已存在
check_sql = """
SELECT id FROM f_polic_file_field
WHERE tenant_id = %s AND file_id = %s AND filed_id = %s
"""
cursor.execute(check_sql, (tenant_id, file_id, field_id))
if cursor.fetchone():
continue
relation_id = generate_id()
insert_sql = """
INSERT INTO f_polic_file_field
(id, tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
relation_id,
tenant_id,
file_id,
field_id,
CREATED_BY,
UPDATED_BY
))
conn.commit()
except Exception as e:
conn.rollback()
raise
finally:
cursor.close()
def update_all_field_relations(conn, tenant_id: int, dry_run: bool = False):
"""更新所有模板的字段关联关系"""
print_section("更新字段关联关系")
# 1. 获取输入字段
print("1. 获取输入字段...")
input_fields = get_input_fields(conn, tenant_id)
if not input_fields:
print(" 创建缺失的输入字段...")
for field_code in ['clue_info', 'target_basic_info_clue']:
field_id = create_missing_input_field(conn, tenant_id, field_code)
if field_id:
input_fields[field_code] = field_id
if not input_fields:
print_result(False, "无法获取或创建输入字段")
return
input_field_ids = list(input_fields.values())
print_result(True, f"找到 {len(input_field_ids)} 个输入字段")
# 2. 获取输出字段
print("\n2. 获取输出字段...")
output_fields = get_output_fields(conn, tenant_id)
print_result(True, f"找到 {len(output_fields)} 个输出字段")
# 3. 获取所有模板
print("\n3. 获取所有模板...")
templates = get_all_templates(conn, tenant_id)
print_result(True, f"找到 {len(templates)} 个模板")
if not templates:
print_result(False, "未找到模板")
return
# 4. 扫描模板占位符并更新关联关系
print("\n4. 扫描模板占位符并更新关联关系...")
total_updated = 0
total_kept = 0
total_errors = 0
all_placeholders_found = set()
missing_fields = set()
for i, template in enumerate(templates, 1):
template_id = template['id']
template_name = template['name']
file_path = template['file_path']
if i % 20 == 0:
print(f" 处理进度: {i}/{len(templates)}")
# 检查本地文件是否存在
local_file = PROJECT_ROOT / file_path
if not local_file.exists():
total_errors += 1
continue
# 提取占位符
placeholders = extract_placeholders_from_docx(local_file)
all_placeholders_found.update(placeholders)
# 根据占位符找到对应的输出字段ID
output_field_ids = []
for placeholder in placeholders:
if placeholder in output_fields:
output_field_ids.append(output_fields[placeholder])
else:
# 字段不存在,尝试创建
missing_fields.add(placeholder)
field_id = create_missing_output_field(conn, tenant_id, placeholder)
if field_id:
output_fields[placeholder] = field_id
output_field_ids.append(field_id)
# 更新关联关系
try:
existing = get_existing_relations(conn, tenant_id, template_id)
to_add = set(input_field_ids + output_field_ids) - existing
to_remove = existing - set(input_field_ids + output_field_ids)
if to_add or to_remove:
update_template_field_relations(
conn, tenant_id, template_id, template_name,
input_field_ids, output_field_ids, dry_run
)
total_updated += 1
else:
total_kept += 1
except Exception as e:
total_errors += 1
# 5. 统计结果
print_section("字段关联更新结果")
print(f" 总模板数: {len(templates)}")
print(f" 已更新: {total_updated}")
print(f" 保持不变: {total_kept}")
print(f" 错误: {total_errors}")
print(f" 发现的占位符总数: {len(all_placeholders_found)}")
print(f" 创建的字段数: {len(missing_fields)}")
return {
'total_templates': len(templates),
'updated': total_updated,
'kept': total_kept,
'errors': total_errors,
'placeholders_found': len(all_placeholders_found),
'fields_created': len(missing_fields)
}
# ==================== 主函数 ====================
def main():
"""主函数"""
print_section("模板更新脚本")
print("支持自定义数据库连接和租户ID配置")
# 获取配置
config = get_db_config_from_args()
if not config:
config = get_db_config_interactive()
# 显示配置信息
print_section("配置信息")
print(f" 数据库服务器: {config['host']}:{config['port']}")
print(f" 数据库名称: {config['database']}")
print(f" 用户名: {config['user']}")
print(f" 租户ID: {config['tenant_id']}")
print(f" 预览模式: {'' if config['dry_run'] else ''}")
print(f" 更新层级结构: {'' if config['update_hierarchy'] else ''}")
print(f" 更新字段关联: {'' if config['update_fields'] else ''}")
if config['dry_run']:
print("\n[注意] 当前为预览模式,不会实际更新数据库")
# 确认
if not config.get('dry_run'):
confirm = input("\n确认执行更新?[y/N]: ").strip().lower()
if confirm != 'y':
print("已取消")
return
# 连接数据库
print_section("连接数据库")
conn = test_db_connection(config)
if not conn:
return
print_result(True, "数据库连接成功")
try:
tenant_id = config['tenant_id']
dry_run = config['dry_run']
results = {}
# 更新模板层级结构
if config['update_hierarchy']:
hierarchy_result = update_template_hierarchy(conn, tenant_id, dry_run)
results['hierarchy'] = hierarchy_result
# 更新字段关联关系
if config['update_fields']:
fields_result = update_all_field_relations(conn, tenant_id, dry_run)
results['fields'] = fields_result
# 总结
print_section("更新完成")
if config['dry_run']:
print(" 本次为预览模式,未实际更新数据库")
else:
print(" 数据库已更新")
if 'hierarchy' in results:
h = results['hierarchy']
print(f"\n 层级结构:")
print(f" - 创建目录: {h['directories_created']}")
print(f" - 更新目录: {h['directories_updated']}")
print(f" - 创建文件: {h['files_created']}")
print(f" - 更新文件: {h['files_updated']}")
if 'fields' in results:
f = results['fields']
print(f"\n 字段关联:")
print(f" - 总模板数: {f['total_templates']}")
print(f" - 已更新: {f['updated']}")
print(f" - 保持不变: {f['kept']}")
print(f" - 发现的占位符: {f['placeholders_found']}")
print(f" - 创建的字段: {f['fields_created']}")
finally:
conn.close()
print_result(True, "数据库连接已关闭")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n[中断] 用户取消操作")
sys.exit(0)
except Exception as e:
print(f"\n[错误] 发生异常: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(1)

View File

@ -0,0 +1,250 @@
"""
验证模板字段关联关系
检查所有模板是否都正确关联了输入字段和输出字段
"""
import os
import pymysql
from pathlib import Path
from typing import Dict, List, Set
from dotenv import load_dotenv
# 加载环境变量
load_dotenv()
# 数据库配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def get_actual_tenant_id(conn) -> int:
"""获取数据库中的实际tenant_id"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1")
result = cursor.fetchone()
if result:
return result['tenant_id']
return 1
finally:
cursor.close()
def verify_template_relations(conn, tenant_id: int):
"""验证模板字段关联关系"""
print_section("验证模板字段关联关系")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 1. 获取所有模板
cursor.execute("""
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND file_path IS NOT NULL
AND file_path != ''
AND state = 1
""", (tenant_id,))
templates = cursor.fetchall()
print(f" 总模板数: {len(templates)}")
# 2. 获取输入字段ID
cursor.execute("""
SELECT id, filed_code
FROM f_polic_field
WHERE tenant_id = %s
AND field_type = 1
AND filed_code IN ('clue_info', 'target_basic_info_clue')
AND state = 1
""", (tenant_id,))
input_fields = {row['filed_code']: row['id'] for row in cursor.fetchall()}
input_field_ids = set(input_fields.values())
print(f" 输入字段: {len(input_field_ids)}")
for code, field_id in input_fields.items():
print(f" - {code}: ID={field_id}")
# 3. 检查每个模板的关联关系
templates_with_input = 0
templates_without_input = []
templates_with_output = 0
templates_without_output = []
templates_with_both = 0
for template in templates:
template_id = template['id']
template_name = template['name']
# 获取该模板关联的所有字段
cursor.execute("""
SELECT f.id, f.filed_code, f.field_type, f.name
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id
WHERE fff.tenant_id = %s
AND fff.file_id = %s
AND fff.state = 1
AND f.state = 1
""", (tenant_id, template_id))
related_fields = cursor.fetchall()
related_input_ids = {f['id'] for f in related_fields if f['field_type'] == 1}
related_output_ids = {f['id'] for f in related_fields if f['field_type'] == 2}
# 检查输入字段
has_all_input = input_field_ids.issubset(related_input_ids)
if has_all_input:
templates_with_input += 1
else:
templates_without_input.append({
'id': template_id,
'name': template_name,
'missing': input_field_ids - related_input_ids
})
# 检查输出字段
if related_output_ids:
templates_with_output += 1
else:
templates_without_output.append({
'id': template_id,
'name': template_name
})
# 同时有输入和输出字段
if has_all_input and related_output_ids:
templates_with_both += 1
# 4. 输出统计结果
print_section("验证结果统计")
print(f" 有输入字段关联: {templates_with_input}/{len(templates)} ({templates_with_input*100//len(templates)}%)")
print(f" 有输出字段关联: {templates_with_output}/{len(templates)} ({templates_with_output*100//len(templates)}%)")
print(f" 同时有输入和输出: {templates_with_both}/{len(templates)} ({templates_with_both*100//len(templates)}%)")
if templates_without_input:
print(f"\n [警告] {len(templates_without_input)} 个模板缺少输入字段关联:")
for t in templates_without_input[:5]:
print(f" - {t['name']} (ID: {t['id']})")
print(f" 缺少字段ID: {t['missing']}")
if templates_without_output:
print(f"\n [警告] {len(templates_without_output)} 个模板没有输出字段关联:")
for t in templates_without_output[:5]:
print(f" - {t['name']} (ID: {t['id']})")
# 5. 显示示例模板的关联关系
print_section("示例模板的关联关系")
# 选择几个有输出字段的模板
sample_templates = []
for template in templates[:5]:
template_id = template['id']
cursor.execute("""
SELECT f.id, f.filed_code, f.field_type, f.name
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id
WHERE fff.tenant_id = %s
AND fff.file_id = %s
AND fff.state = 1
AND f.state = 1
ORDER BY f.field_type, f.filed_code
""", (tenant_id, template_id))
related_fields = cursor.fetchall()
if related_fields:
sample_templates.append({
'template': template,
'fields': related_fields
})
for sample in sample_templates:
template = sample['template']
fields = sample['fields']
input_fields_list = [f for f in fields if f['field_type'] == 1]
output_fields_list = [f for f in fields if f['field_type'] == 2]
print(f"\n 模板: {template['name']} (ID: {template['id']})")
print(f" 输入字段 ({len(input_fields_list)} 个):")
for f in input_fields_list:
print(f" - {f['name']} ({f['filed_code']})")
print(f" 输出字段 ({len(output_fields_list)} 个):")
for f in output_fields_list[:10]: # 只显示前10个
print(f" - {f['name']} ({f['filed_code']})")
if len(output_fields_list) > 10:
print(f" ... 还有 {len(output_fields_list) - 10}")
# 6. 总体统计
print_section("总体统计")
cursor.execute("""
SELECT
f.field_type,
CASE
WHEN f.field_type = 1 THEN '输入字段'
WHEN f.field_type = 2 THEN '输出字段'
ELSE '未知'
END as type_name,
COUNT(DISTINCT fff.file_id) as template_count,
COUNT(*) as relation_count
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id
WHERE fff.tenant_id = %s
AND fff.state = 1
AND f.state = 1
GROUP BY f.field_type
""", (tenant_id,))
stats = cursor.fetchall()
for stat in stats:
print(f" {stat['type_name']}:")
print(f" - 关联的模板数: {stat['template_count']}")
print(f" - 关联关系总数: {stat['relation_count']}")
finally:
cursor.close()
def main():
"""主函数"""
print_section("验证模板字段关联关系")
try:
conn = pymysql.connect(**DB_CONFIG)
print_result(True, "数据库连接成功")
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return
try:
tenant_id = get_actual_tenant_id(conn)
print(f"实际tenant_id: {tenant_id}")
verify_template_relations(conn, tenant_id)
finally:
conn.close()
print_result(True, "数据库连接已关闭")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,15 @@
f_polic_field 表是字段表存储有哪些数据字段其中field_type值为2的是输出字段值为1的是输入字段
f_polic_file_config 表是文件配置表,用于存储模板信息,已经做过初始化,保存了模板文件的地址以及父级关系
f_polic_file_field 表是文件配置字段关联表用于存储f_polic_field表和f_polic_file_config 表的关联关系
关联关系如下:
f_polic_file_field的 filed_id字段id对应 f_polic_field的id
f_polic_file_field的 file_id(文件id)对应 f_polic_file_config的id
通过这样的关联关系其他研发人员开发的功能模块可以通过查询f_polic_file_config 表获得不同模板关联了哪些输入和输出字段,然后前端对应展示
之前虽然已经创建了关联关系但是大都是通过“input_data”和“template_code”实现的这个并不符合整体设计这两个字段现在抛弃不使用。

View File

@ -0,0 +1,376 @@
# 模板更新脚本使用说明
## 一、脚本概述
`update_templates_custom.py` 是一个灵活的模板更新脚本,支持:
1. **自定义数据库连接配置**(不从.env文件读取
2. **自定义租户IDtenant_id配置**
3. **更新模板层级结构**(根据`template_finish/`目录结构)
4. **更新字段关联关系**(输入字段和输出字段)
## 二、功能说明
### 2.1 更新模板层级结构
根据本地 `template_finish/` 目录结构更新数据库中的模板层级关系:
- 扫描目录结构,识别所有目录和文件
- 创建或更新目录节点(`f_polic_file_config`表中`file_path=NULL`的记录)
- 创建或更新文件节点(`f_polic_file_config`表中`file_path`不为空的记录)
- 建立正确的`parent_id`层级关系
### 2.2 更新字段关联关系
自动为所有模板建立字段关联关系:
- **输入字段**:所有模板都关联 `clue_info``target_basic_info_clue`
- **输出字段**:根据模板文件中的占位符(`{{field_code}}`)自动关联对应的输出字段
- **自动创建字段**:如果占位符对应的字段不存在,自动创建该字段
## 三、使用方法
### 3.1 命令行参数方式
```bash
python update_templates_custom.py --host <主机> --port <端口> --user <用户名> --password <密码> --database <数据库名> --tenant-id <租户ID>
```
**完整示例**
```bash
# 基本使用
python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1
# 预览模式(不实际更新数据库)
python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1 --dry-run
# 只更新层级结构,不更新字段关联
python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1 --update-hierarchy --no-update-fields
# 只更新字段关联,不更新层级结构
python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1 --update-fields --no-update-hierarchy
```
**参数说明**
| 参数 | 说明 | 必填 |
|------|------|------|
| `--host` | MySQL服务器地址 | 是 |
| `--port` | MySQL服务器端口 | 是 |
| `--user` | MySQL用户名 | 是 |
| `--password` | MySQL密码 | 是 |
| `--database` | 数据库名称 | 是 |
| `--tenant-id` | 租户ID | 是 |
| `--dry-run` | 预览模式(不实际更新) | 否 |
| `--update-hierarchy` | 更新模板层级结构(默认启用) | 否 |
| `--update-fields` | 更新字段关联关系(默认启用) | 否 |
### 3.2 交互式输入方式
如果命令行参数不完整,脚本会自动进入交互式输入模式:
```bash
python update_templates_custom.py
```
交互式输入示例:
```
======================================================================
数据库连接配置
======================================================================
请输入数据库连接信息(直接回车使用默认值):
MySQL服务器地址 [152.136.177.240]: 192.168.1.100
MySQL服务器端口 [5012]: 3306
MySQL用户名 [finyx]: root
MySQL密码 [留空使用默认]: ********
数据库名称 [finyx]: finyx
租户配置:
租户ID (tenant_id) [必填]: 1
更新选项:
更新模板层级结构?[Y/n]: y
更新字段关联关系?[Y/n]: y
预览模式(不实际更新)?[y/N]: n
确认执行更新?[y/N]: y
```
### 3.3 查看帮助信息
```bash
python update_templates_custom.py --help
```
## 四、使用场景
### 4.1 首次部署
首次部署时,需要将本地模板同步到数据库:
```bash
python update_templates_custom.py \
--host 192.168.1.100 \
--port 3306 \
--user root \
--password your_password \
--database finyx \
--tenant-id 1
```
### 4.2 多租户环境
在不同租户下更新模板:
```bash
# 租户1
python update_templates_custom.py --host ... --tenant-id 1
# 租户2
python update_templates_custom.py --host ... --tenant-id 2
```
### 4.3 预览更新内容
在正式更新前,先预览会执行哪些操作:
```bash
python update_templates_custom.py --host ... --tenant-id 1 --dry-run
```
### 4.4 增量更新
只更新层级结构或只更新字段关联:
```bash
# 只更新层级结构
python update_templates_custom.py --host ... --tenant-id 1 --update-hierarchy --no-update-fields
# 只更新字段关联
python update_templates_custom.py --host ... --tenant-id 1 --update-fields --no-update-hierarchy
```
## 五、输出说明
### 5.1 正常输出示例
```
======================================================================
模板更新脚本
======================================================================
支持自定义数据库连接和租户ID配置
======================================================================
配置信息
======================================================================
数据库服务器: 192.168.1.100:3306
数据库名称: finyx
用户名: root
租户ID: 1
预览模式: 否
更新层级结构: 是
更新字段关联: 是
确认执行更新?[y/N]: y
======================================================================
连接数据库
======================================================================
[OK] 数据库连接成功
======================================================================
更新模板层级结构
======================================================================
1. 扫描目录结构...
[OK] 找到 33 个目录122 个文件
2. 获取现有模板...
[OK] 找到 122 个文件模板28 个目录模板
3. 创建/更新目录节点...
[创建目录] 1-谈话函询模板 (ID: 1766711031977435)
[更新目录] 2-初核模版 (ID: 1766711031977436)
...
[OK] 创建 5 个目录,更新 28 个目录
4. 创建/更新文件节点...
[创建文件] 请示报告卡XXX.docx (ID: 1766711031977437)
[更新文件] 谈话通知书第一联.docx (ID: 1766711031977438)
...
[OK] 创建 2 个文件,更新 120 个文件
======================================================================
更新字段关联关系
======================================================================
1. 获取输入字段...
[OK] 找到 2 个输入字段
2. 获取输出字段...
[OK] 找到 72 个输出字段
3. 获取所有模板...
[OK] 找到 122 个模板
4. 扫描模板占位符并更新关联关系...
处理进度: 20/122
处理进度: 40/122
...
======================================================================
字段关联更新结果
======================================================================
总模板数: 122
已更新: 93 个
保持不变: 29 个
错误: 0 个
发现的占位符总数: 35 个
创建的字段数: 0 个
======================================================================
更新完成
======================================================================
数据库已更新
层级结构:
- 创建目录: 5 个
- 更新目录: 28 个
- 创建文件: 2 个
- 更新文件: 120 个
字段关联:
- 总模板数: 122 个
- 已更新: 93 个
- 保持不变: 29 个
- 发现的占位符: 35 个
- 创建的字段: 0 个
[OK] 数据库连接已关闭
```
### 5.2 预览模式输出
预览模式下,会显示将要执行的操作,但不会实际更新数据库:
```
[注意] 当前为预览模式,不会实际更新数据库
...
[OK] 预览模式,未实际更新数据库
```
## 六、注意事项
### 6.1 数据库连接
- 确保数据库服务器可访问
- 确保用户有足够的权限SELECT, INSERT, UPDATE, DELETE
- 确保数据库名称正确
### 6.2 租户ID
- **tenant_id是必填项**,不能为空
- 确保tenant_id在数据库中存在或需要创建
- 不同租户的数据是隔离的更新时请确认tenant_id正确
### 6.3 模板文件
- 脚本会扫描 `template_finish/` 目录下的所有 `.docx``.doc` 文件
- 确保模板文件路径与数据库中的`file_path`字段匹配
- 如果模板文件不存在,会跳过该模板的字段关联更新
### 6.4 字段创建
- 如果模板中的占位符对应的字段不存在,脚本会自动创建该字段
- 创建的字段类型为输出字段(`field_type=2`
- 字段名称会根据`field_code`自动生成(将下划线替换为空格)
### 6.5 数据安全
- **建议先使用预览模式(--dry-run**查看将要执行的操作
- 在生产环境执行前,建议先备份数据库
- 确保有足够的磁盘空间和数据库连接数
## 七、常见问题
### 7.1 连接失败
**问题**:数据库连接失败
**解决方案**
- 检查数据库服务器地址和端口是否正确
- 检查用户名和密码是否正确
- 检查网络连接是否正常
- 检查防火墙设置
### 7.2 权限不足
**问题**:执行更新时提示权限不足
**解决方案**
- 确保数据库用户有足够的权限
- 需要以下权限SELECT, INSERT, UPDATE, DELETE
### 7.3 模板文件不存在
**问题**:部分模板的字段关联更新失败,提示文件不存在
**解决方案**
- 检查数据库中的`file_path`是否正确
- 确保模板文件存在于`template_finish/`目录下
- 检查文件路径中的斜杠方向Windows使用反斜杠脚本会自动处理
### 7.4 占位符识别错误
**问题**:模板中的占位符没有被正确识别
**解决方案**
- 确保占位符格式正确:`{{field_code}}`
- 占位符中的`field_code`不能包含空格
- 检查模板文件是否损坏
## 八、技术细节
### 8.1 占位符提取
脚本使用正则表达式 `\{\{([^}]+)\}\}` 从Word文档中提取占位符
- 扫描所有段落paragraphs
- 扫描所有表格单元格table cells
- 提取所有匹配的占位符
### 8.2 ID生成
使用时间戳生成唯一ID
```python
def generate_id():
import time
return int(time.time() * 1000000)
```
### 8.3 层级结构处理
- 按目录层级顺序处理(先处理父目录,再处理子目录)
- 使用路径映射表(`path_to_id`维护目录ID关系
- 文件节点的`parent_id`指向其所在目录的ID
### 8.4 字段关联处理
- 先获取现有关联关系
- 计算需要添加和删除的关联
- 批量更新关联关系
- 自动创建缺失的字段
## 九、更新历史
- **2025-12-16**: 创建脚本支持自定义数据库连接和租户ID配置
---
**脚本路径**: `update_templates_custom.py`
**文档版本**: 1.0
**最后更新**: 2025-12-16

View File

@ -0,0 +1,251 @@
# 模板字段关联关系更新报告
## 更新时间
2025-12-16
## 一、更新概述
根据数据库设计说明,更新了所有模板的字段关联关系:
- **输入字段**:所有模板都关联了 `clue_info``target_basic_info_clue`
- **输出字段**:根据模板中的占位符自动关联对应的输出字段
## 二、更新规则
### 2.1 输入字段关联规则
所有模板(文件节点)都自动关联以下输入字段:
1. **clue_info** - 线索信息
2. **target_basic_info_clue** - 被核查人员工作基本情况线索
### 2.2 输出字段关联规则
根据模板文件中的占位符(格式:`{{field_code}}`)自动关联对应的输出字段:
- 扫描模板文件中的段落和表格
- 提取所有占位符
- 根据占位符的 `field_code` 查找对应的输出字段(`field_type=2`
- 如果字段不存在,自动创建该字段
## 三、更新结果
### 3.1 输入字段关联
- **关联的模板数**: 122/122 (100%)
- **输入字段数**: 2个
- `clue_info` (ID: 1764656917384058)
- `target_basic_info_clue` (ID: 1764656917996367)
- **关联关系总数**: 260条122个模板 × 2个输入字段
### 3.2 输出字段关联
- **关联的模板数**: 93/122 (76%)
- **未关联的模板数**: 29个这些模板没有占位符或占位符格式不正确
- **发现的占位符总数**: 35个不同的占位符
- **关联关系总数**: 325条
### 3.3 占位符列表35个
所有发现的占位符及其对应的输出字段:
1. `appointment_location` - 约谈地点
2. `appointment_time` - 约谈时间
3. `approval_time` - 审批时间
4. `clue_source` - 线索来源
5. `department_opinion` - 部门意见
6. `handler_name` - 处理人姓名
7. `handling_department` - 处理部门
8. `investigation_location` - 调查地点
9. `investigation_team_code` - 调查组编号
10. `investigation_team_leader_name` - 调查组组长姓名
11. `investigation_team_member_names` - 调查组成员姓名
12. `investigation_unit_name` - 调查单位名称
13. `target_address` - 被核查人地址
14. `target_age` - 被核查人年龄
15. `target_basic_info` - 被核查人基本信息
16. `target_contact` - 被核查人联系方式
17. `target_date_of_birth` - 被核查人出生日期
18. `target_date_of_birth_full` - 被核查人出生日期(完整)
19. `target_education` - 被核查人学历
20. `target_education_level` - 被核查人学历层次
21. `target_ethnicity` - 被核查人民族
22. `target_family_situation` - 被核查人家庭情况
23. `target_gender` - 被核查人性别
24. `target_id_number` - 被核查人身份证号
25. `target_issue_description` - 被核查人问题描述
26. `target_name` - 被核查人姓名
27. `target_organization` - 被核查人单位
28. `target_organization_and_position` - 被核查人单位及职务
29. `target_place_of_origin` - 被核查人籍贯
30. `target_political_status` - 被核查人政治面貌
31. `target_position` - 被核查人职务
32. `target_professional_rank` - 被核查人职级
33. `target_registered_address` - 被核查人户籍地址
34. `target_social_relations` - 被核查人社会关系
35. `target_work_basic_info` - 被核查人工作基本信息
## 四、示例模板关联关系
### 4.1 示例18-1请示报告卡初核报告结论
- **模板ID**: 1765273962631542
- **输入字段** (2个):
- clue_info - 线索信息
- target_basic_info_clue - 被核查人员工作基本情况线索
- **输出字段** (3个):
- investigation_team_code - 调查组编号
- target_name - 被核查人姓名
- target_organization_and_position - 被核查人单位及职务
### 4.2 示例2谈话通知书第三联
- **模板ID**: 1765273963038891
- **输入字段** (2个):
- clue_info - 线索信息
- target_basic_info_clue - 被核查人员工作基本情况线索
- **输出字段** (3个):
- appointment_location - 约谈地点
- appointment_time - 约谈时间
- target_name - 被核查人姓名
### 4.3 示例3谈话通知书第一联
- **模板ID**: 1765273963625524
- **输入字段** (2个):
- clue_info - 线索信息
- target_basic_info_clue - 被核查人员工作基本情况线索
- **输出字段** (9个):
- appointment_location - 约谈地点
- appointment_time - 约谈时间
- approval_time - 审批时间
- handler_name - 处理人姓名
- handling_department - 处理部门
- target_id_number - 被核查人身份证号
- target_name - 被核查人姓名
- target_organization - 被核查人单位
- target_position - 被核查人职务
## 五、数据库表结构
### 5.1 f_polic_file_field 表
关联关系存储在 `f_polic_file_field` 表中:
- `id`: 关联关系ID主键
- `tenant_id`: 租户ID
- `file_id`: 文件配置ID关联 `f_polic_file_config.id`
- `filed_id`: 字段ID关联 `f_polic_field.id`
- `state`: 状态1=启用0=禁用)
### 5.2 查询示例
**查询某个模板关联的所有字段**:
```sql
SELECT
f.id AS field_id,
f.name AS field_name,
f.filed_code AS field_code,
f.field_type,
CASE
WHEN f.field_type = 1 THEN '输入字段'
WHEN f.field_type = 2 THEN '输出字段'
END AS field_type_name
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id
WHERE fff.tenant_id = 1
AND fff.file_id = <模板ID>
AND fff.state = 1
AND f.state = 1
ORDER BY f.field_type, f.filed_code;
```
**查询某个模板关联的输入字段**:
```sql
SELECT f.*
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id
WHERE fff.tenant_id = 1
AND fff.file_id = <模板ID>
AND f.field_type = 1
AND fff.state = 1
AND f.state = 1;
```
**查询某个模板关联的输出字段**:
```sql
SELECT f.*
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id
WHERE fff.tenant_id = 1
AND fff.file_id = <模板ID>
AND f.field_type = 2
AND fff.state = 1
AND f.state = 1;
```
## 六、验证结果
### 6.1 关联完整性
**输入字段关联**: 100% 完成
- 所有122个模板都关联了2个输入字段
- 关联关系总数: 260条
**输出字段关联**: 76% 完成
- 93个模板关联了输出字段
- 29个模板没有输出字段这些模板没有占位符属于正常情况
- 关联关系总数: 325条
### 6.2 总体统计
- **总模板数**: 122个
- **总关联关系数**: 585条
- 输入字段关联: 260条
- 输出字段关联: 325条
## 七、注意事项
1. **tenant_id**: 数据库中的实际tenant_id是 `1`,不是配置中的 `615873064429507639`
2. **无占位符的模板**: 29个模板没有输出字段关联这些模板可能是
- 目录节点(但已过滤,只处理文件节点)
- 没有占位符的模板文件
- 占位符格式不正确的模板
3. **字段自动创建**: 如果模板中的占位符对应的字段不存在,系统会自动创建该字段(`field_type=2`,输出字段)
4. **关联关系维护**:
- 添加新模板时,会自动关联输入字段
- 输出字段根据模板中的占位符自动关联
- 如果模板占位符发生变化,需要重新运行更新脚本
## 八、更新脚本
本次更新使用的脚本:
1. **`update_all_template_field_relations.py`**:
- 扫描所有模板的占位符
- 为所有模板关联输入字段
- 根据占位符关联输出字段
- 自动创建缺失的字段
2. **`verify_template_field_relations.py`**:
- 验证关联关系的完整性
- 检查是否有遗漏的关联
## 九、后续维护
1. **添加新模板**:
- 将模板文件放到对应目录
- 运行 `update_all_template_field_relations.py` 更新关联关系
2. **修改模板占位符**:
- 更新模板文件中的占位符
- 重新运行更新脚本同步关联关系
3. **验证关联关系**:
- 定期运行 `verify_template_field_relations.py` 检查关联关系
---
**更新人员**: 自动化脚本
**更新日期**: 2025-12-16
**更新状态**: ✅ 完成