From d27c18d0d2d827a1e8e6d56a67714f78aa37fe1d Mon Sep 17 00:00:00 2001 From: python Date: Tue, 30 Dec 2025 10:41:35 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3=E7=94=9F=E6=88=90=E6=96=87?= =?UTF-8?q?=E6=A1=A3=E9=94=99=E8=AF=AF=E6=B5=8B=E8=AF=95?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app.py | 38 +- check_database_id_relations.py | 539 ++++++++++++++ clean_and_resync_templates.py | 874 +++++++++++++++++++++++ fix_document_service_tenant_id.py | 102 +++ services/document_service.py | 20 +- sync_templates_between_databases.py | 779 +++++++++++++++++++++ update_all_template_field_relations.py | 562 +++++++++++++++ update_templates_custom.py | 928 +++++++++++++++++++++++++ verify_template_field_relations.py | 250 +++++++ 技术文档/数据库设计说明.md | 15 + 技术文档/模板更新脚本使用说明.md | 376 ++++++++++ 模板字段关联关系更新报告.md | 251 +++++++ 12 files changed, 4728 insertions(+), 6 deletions(-) create mode 100644 check_database_id_relations.py create mode 100644 clean_and_resync_templates.py create mode 100644 fix_document_service_tenant_id.py create mode 100644 sync_templates_between_databases.py create mode 100644 update_all_template_field_relations.py create mode 100644 update_templates_custom.py create mode 100644 verify_template_field_relations.py create mode 100644 技术文档/数据库设计说明.md create mode 100644 技术文档/模板更新脚本使用说明.md create mode 100644 模板字段关联关系更新报告.md diff --git a/app.py b/app.py index c6dd616..fb6405e 100644 --- a/app.py +++ b/app.py @@ -686,6 +686,22 @@ def generate_document(): if not data: return error_response(400, "请求参数不能为空") + # 获取tenant_id(从请求参数或请求体中获取) + tenant_id = request.args.get('tenant_id') or data.get('tenant_id') + if tenant_id: + try: + tenant_id = int(tenant_id) + except (ValueError, TypeError): + return error_response(400, "tenant_id必须是整数") + else: + # 如果未提供tenant_id,尝试从环境变量获取,默认使用1 + import os + tenant_id_str = os.getenv('TENANT_ID', '1') + try: + tenant_id = int(tenant_id_str) + except (ValueError, TypeError): + tenant_id = 1 + input_data = data.get('inputData', []) file_list = data.get('fpolicFieldParamFileList', []) @@ -723,7 +739,8 @@ def generate_document(): result = document_service.generate_document( file_id=file_id, input_data=input_data, - file_info=file_info + file_info=file_info, + tenant_id=tenant_id ) # 使用生成的文档名称(.docx格式),而不是原始文件名 @@ -834,6 +851,22 @@ def get_document_by_task(): # 生成文档ID document_id = document_service.generate_document_id() + # 获取tenant_id(从请求参数或请求体中获取) + tenant_id = request.args.get('tenant_id') or data.get('tenant_id') + if tenant_id: + try: + tenant_id = int(tenant_id) + except (ValueError, TypeError): + return error_response(400, "tenant_id必须是整数") + else: + # 如果未提供tenant_id,尝试从环境变量获取,默认使用1 + import os + tenant_id_str = os.getenv('TENANT_ID', '1') + try: + tenant_id = int(tenant_id_str) + except (ValueError, TypeError): + tenant_id = 1 + # 处理每个文件 result_file_list = [] first_document_name = None # 用于存储第一个生成的文档名 @@ -851,7 +884,8 @@ def get_document_by_task(): result = document_service.generate_document( file_id=file_id, input_data=input_data, - file_info=file_info + file_info=file_info, + tenant_id=tenant_id ) # 使用生成的文档名称(.docx格式),而不是原始文件名 diff --git a/check_database_id_relations.py b/check_database_id_relations.py new file mode 100644 index 0000000..2d05e0b --- /dev/null +++ b/check_database_id_relations.py @@ -0,0 +1,539 @@ +""" +检查数据库中的ID关系是否正确 + +功能: +1. 检查f_polic_file_config表中的数据 +2. 检查f_polic_field表中的数据 +3. 检查f_polic_file_field表中的关联关系 +4. 验证ID关系是否正确匹配 +5. 找出孤立数据和错误关联 + +使用方法: +python check_database_id_relations.py --host 10.100.31.21 --port 3306 --user finyx --password FknJYz3FA5WDYtsd --database finyx --tenant-id 1 +""" +import os +import sys +import pymysql +import argparse +from typing import Dict, List, Set, Optional +from collections import defaultdict + +# 设置输出编码为UTF-8(Windows兼容) +if sys.platform == 'win32': + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') + + +def print_section(title): + """打印章节标题""" + print("\n" + "="*70) + print(f" {title}") + print("="*70) + + +def print_result(success, message): + """打印结果""" + status = "[OK]" if success else "[FAIL]" + print(f"{status} {message}") + + +def get_db_config_from_args() -> Dict: + """从命令行参数获取数据库配置""" + parser = argparse.ArgumentParser( + description='检查数据库中的ID关系是否正确', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + python check_database_id_relations.py --host 10.100.31.21 --port 3306 --user finyx --password FknJYz3FA5WDYtsd --database finyx --tenant-id 1 + """ + ) + + parser.add_argument('--host', type=str, required=True, help='MySQL服务器地址') + parser.add_argument('--port', type=int, required=True, help='MySQL服务器端口') + parser.add_argument('--user', type=str, required=True, help='MySQL用户名') + parser.add_argument('--password', type=str, required=True, help='MySQL密码') + parser.add_argument('--database', type=str, required=True, help='数据库名称') + parser.add_argument('--tenant-id', type=int, required=True, help='租户ID') + parser.add_argument('--file-id', type=int, help='检查特定的文件ID') + + args = parser.parse_args() + + return { + 'host': args.host, + 'port': args.port, + 'user': args.user, + 'password': args.password, + 'database': args.database, + 'charset': 'utf8mb4', + 'tenant_id': args.tenant_id, + 'file_id': args.file_id + } + + +def test_db_connection(config: Dict) -> Optional[pymysql.Connection]: + """测试数据库连接""" + try: + conn = pymysql.connect( + host=config['host'], + port=config['port'], + user=config['user'], + password=config['password'], + database=config['database'], + charset=config['charset'] + ) + return conn + except Exception as e: + print_result(False, f"数据库连接失败: {str(e)}") + return None + + +def check_file_config(conn, tenant_id: int, file_id: Optional[int] = None): + """检查f_polic_file_config表""" + print_section("检查 f_polic_file_config 表") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + if file_id: + # 检查特定文件ID + cursor.execute(""" + SELECT id, tenant_id, parent_id, name, file_path, state + FROM f_polic_file_config + WHERE id = %s AND tenant_id = %s + """, (file_id, tenant_id)) + result = cursor.fetchone() + + if result: + print(f"\n 文件ID {file_id} 的信息:") + print(f" - ID: {result['id']}") + print(f" - 租户ID: {result['tenant_id']}") + print(f" - 父级ID: {result['parent_id']}") + print(f" - 名称: {result['name']}") + print(f" - 文件路径: {result['file_path']}") + # 处理state字段(可能是bytes或int) + state_raw = result['state'] + if isinstance(state_raw, bytes): + state_value = int.from_bytes(state_raw, byteorder='big') + elif state_raw is not None: + state_value = int(state_raw) + else: + state_value = 0 + print(f" - 状态: {state_value} ({'启用' if state_value == 1 else '禁用'})") + + if state_value != 1: + print_result(False, f"文件ID {file_id} 的状态为禁用(state={state_value})") + else: + print_result(True, f"文件ID {file_id} 存在且已启用") + else: + print_result(False, f"文件ID {file_id} 不存在或不属于租户 {tenant_id}") + return + + # 统计信息 + cursor.execute(""" + SELECT + COUNT(*) as total, + SUM(CASE WHEN state = 1 THEN 1 ELSE 0 END) as enabled, + SUM(CASE WHEN state = 0 THEN 1 ELSE 0 END) as disabled, + SUM(CASE WHEN file_path IS NOT NULL AND file_path != '' THEN 1 ELSE 0 END) as files, + SUM(CASE WHEN file_path IS NULL OR file_path = '' THEN 1 ELSE 0 END) as directories + FROM f_polic_file_config + WHERE tenant_id = %s + """, (tenant_id,)) + stats = cursor.fetchone() + + print(f"\n 统计信息:") + print(f" - 总记录数: {stats['total']}") + print(f" - 启用记录: {stats['enabled']}") + print(f" - 禁用记录: {stats['disabled']}") + print(f" - 文件节点: {stats['files']}") + print(f" - 目录节点: {stats['directories']}") + + # 检查parent_id引用 + cursor.execute(""" + SELECT fc1.id, fc1.name, fc1.parent_id + FROM f_polic_file_config fc1 + LEFT JOIN f_polic_file_config fc2 ON fc1.parent_id = fc2.id AND fc1.tenant_id = fc2.tenant_id + WHERE fc1.tenant_id = %s + AND fc1.parent_id IS NOT NULL + AND fc2.id IS NULL + """, (tenant_id,)) + broken_parents = cursor.fetchall() + + if broken_parents: + print(f"\n [警告] 发现 {len(broken_parents)} 个parent_id引用错误:") + for item in broken_parents[:10]: + print(f" - ID: {item['id']}, 名称: {item['name']}, parent_id: {item['parent_id']} (不存在)") + if len(broken_parents) > 10: + print(f" ... 还有 {len(broken_parents) - 10} 个") + else: + print_result(True, "所有parent_id引用正确") + + finally: + cursor.close() + + +def check_fields(conn, tenant_id: int): + """检查f_polic_field表""" + print_section("检查 f_polic_field 表") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 统计信息 + cursor.execute(""" + SELECT + field_type, + COUNT(*) as total, + SUM(CASE WHEN state = 1 THEN 1 ELSE 0 END) as enabled, + SUM(CASE WHEN state = 0 THEN 1 ELSE 0 END) as disabled + FROM f_polic_field + WHERE tenant_id = %s + GROUP BY field_type + """, (tenant_id,)) + stats = cursor.fetchall() + + print(f"\n 统计信息:") + for stat in stats: + field_type_name = "输入字段" if stat['field_type'] == 1 else "输出字段" if stat['field_type'] == 2 else "未知" + print(f" - {field_type_name} (field_type={stat['field_type']}):") + print(f" 总记录数: {stat['total']}") + print(f" 启用: {stat['enabled']}") + print(f" 禁用: {stat['disabled']}") + + # 检查重复的filed_code + cursor.execute(""" + SELECT filed_code, field_type, COUNT(*) as count + FROM f_polic_field + WHERE tenant_id = %s + AND state = 1 + GROUP BY filed_code, field_type + HAVING count > 1 + """, (tenant_id,)) + duplicates = cursor.fetchall() + + if duplicates: + print(f"\n [警告] 发现重复的filed_code:") + for dup in duplicates: + print(f" - filed_code: {dup['filed_code']}, field_type: {dup['field_type']}, 重复数: {dup['count']}") + else: + print_result(True, "没有重复的filed_code") + + finally: + cursor.close() + + +def check_file_field_relations(conn, tenant_id: int, file_id: Optional[int] = None): + """检查f_polic_file_field表""" + print_section("检查 f_polic_file_field 表(关联关系)") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 统计信息 + cursor.execute(""" + SELECT COUNT(*) as total + FROM f_polic_file_field + WHERE tenant_id = %s AND state = 1 + """, (tenant_id,)) + total_relations = cursor.fetchone()['total'] + + print(f"\n 总关联关系数: {total_relations}") + + if file_id: + # 检查特定文件ID的关联关系 + cursor.execute(""" + SELECT fff.id, fff.file_id, fff.filed_id, fff.state, + fc.name as file_name, fc.file_path, fc.state as file_state, + f.name as field_name, f.filed_code, f.field_type, f.state as field_state + FROM f_polic_file_field fff + LEFT JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id + LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s AND fff.file_id = %s + """, (tenant_id, file_id)) + relations = cursor.fetchall() + + if relations: + print(f"\n 文件ID {file_id} 的关联关系 ({len(relations)} 条):") + for rel in relations: + print(f"\n 关联ID: {rel['id']}") + print(f" - file_id: {rel['file_id']}") + if rel['file_name']: + print(f" 模板: {rel['file_name']} (路径: {rel['file_path']})") + # 处理state字段(可能是bytes或int) + state_raw = rel['file_state'] + if isinstance(state_raw, bytes): + file_state = int.from_bytes(state_raw, byteorder='big') + elif state_raw is not None: + file_state = int(state_raw) + else: + file_state = 0 + print(f" 状态: {file_state} ({'启用' if file_state == 1 else '禁用'})") + else: + print(f" [错误] 模板不存在!") + print(f" - filed_id: {rel['filed_id']}") + if rel['field_name']: + field_type_name = "输入字段" if rel['field_type'] == 1 else "输出字段" if rel['field_type'] == 2 else "未知" + # 处理state字段(可能是bytes或int) + state_raw = rel['field_state'] + if isinstance(state_raw, bytes): + field_state = int.from_bytes(state_raw, byteorder='big') + elif state_raw is not None: + field_state = int(state_raw) + else: + field_state = 0 + print(f" 字段: {rel['field_name']} ({rel['filed_code']}, {field_type_name})") + print(f" 状态: {field_state} ({'启用' if field_state == 1 else '禁用'})") + else: + print(f" [错误] 字段不存在!") + else: + print(f"\n 文件ID {file_id} 没有关联关系") + + # 检查孤立的关联关系(file_id不存在) + cursor.execute(""" + SELECT fff.id, fff.file_id, fff.filed_id + FROM f_polic_file_field fff + LEFT JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id + WHERE fff.tenant_id = %s + AND fff.state = 1 + AND fc.id IS NULL + """, (tenant_id,)) + orphaned_file_relations = cursor.fetchall() + + if orphaned_file_relations: + print(f"\n [错误] 发现 {len(orphaned_file_relations)} 个孤立的关联关系(file_id不存在):") + for rel in orphaned_file_relations[:10]: + print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}") + if len(orphaned_file_relations) > 10: + print(f" ... 还有 {len(orphaned_file_relations) - 10} 个") + else: + print_result(True, "所有file_id引用正确") + + # 检查孤立的关联关系(filed_id不存在) + cursor.execute(""" + SELECT fff.id, fff.file_id, fff.filed_id + FROM f_polic_file_field fff + LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s + AND fff.state = 1 + AND f.id IS NULL + """, (tenant_id,)) + orphaned_field_relations = cursor.fetchall() + + if orphaned_field_relations: + print(f"\n [错误] 发现 {len(orphaned_field_relations)} 个孤立的关联关系(filed_id不存在):") + for rel in orphaned_field_relations[:10]: + print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}") + if len(orphaned_field_relations) > 10: + print(f" ... 还有 {len(orphaned_field_relations) - 10} 个") + else: + print_result(True, "所有filed_id引用正确") + + # 检查关联到禁用模板或字段的关联关系 + cursor.execute(""" + SELECT fff.id, fff.file_id, fff.filed_id, + fc.state as file_state, f.state as field_state + FROM f_polic_file_field fff + LEFT JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id + LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s + AND fff.state = 1 + AND (fc.state != 1 OR f.state != 1) + """, (tenant_id,)) + disabled_relations = cursor.fetchall() + + if disabled_relations: + print(f"\n [警告] 发现 {len(disabled_relations)} 个关联到禁用模板或字段的关联关系:") + for rel in disabled_relations[:10]: + print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}") + print(f" 模板状态: {rel['file_state']}, 字段状态: {rel['field_state']}") + if len(disabled_relations) > 10: + print(f" ... 还有 {len(disabled_relations) - 10} 个") + else: + print_result(True, "所有关联关系都关联到启用的模板和字段") + + finally: + cursor.close() + + +def check_specific_file(conn, tenant_id: int, file_id: int): + """检查特定文件ID的完整信息""" + print_section(f"详细检查文件ID {file_id}") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 1. 检查文件配置 + cursor.execute(""" + SELECT id, tenant_id, parent_id, name, file_path, state, created_time, updated_time + FROM f_polic_file_config + WHERE id = %s AND tenant_id = %s + """, (file_id, tenant_id)) + file_config = cursor.fetchone() + + if not file_config: + print_result(False, f"文件ID {file_id} 不存在或不属于租户 {tenant_id}") + return + + print(f"\n 文件配置信息:") + print(f" - ID: {file_config['id']}") + print(f" - 租户ID: {file_config['tenant_id']}") + print(f" - 父级ID: {file_config['parent_id']}") + print(f" - 名称: {file_config['name']}") + print(f" - 文件路径: {file_config['file_path']}") + # 处理state字段(可能是bytes或int) + state_raw = file_config['state'] + if isinstance(state_raw, bytes): + file_state = int.from_bytes(state_raw, byteorder='big') + elif state_raw is not None: + file_state = int(state_raw) + else: + file_state = 0 + print(f" - 状态: {file_state} ({'启用' if file_state == 1 else '禁用'})") + print(f" - 创建时间: {file_config['created_time']}") + print(f" - 更新时间: {file_config['updated_time']}") + + # 2. 检查父级 + if file_config['parent_id']: + cursor.execute(""" + SELECT id, name, file_path, state + FROM f_polic_file_config + WHERE id = %s AND tenant_id = %s + """, (file_config['parent_id'], tenant_id)) + parent = cursor.fetchone() + if parent: + # 处理state字段(可能是bytes或int) + state_raw = parent['state'] + if isinstance(state_raw, bytes): + parent_state = int.from_bytes(state_raw, byteorder='big') + elif state_raw is not None: + parent_state = int(state_raw) + else: + parent_state = 0 + print(f"\n 父级信息:") + print(f" - ID: {parent['id']}") + print(f" - 名称: {parent['name']}") + print(f" - 状态: {parent_state} ({'启用' if parent_state == 1 else '禁用'})") + else: + print(f"\n [错误] 父级ID {file_config['parent_id']} 不存在!") + + # 3. 检查关联的字段 + cursor.execute(""" + SELECT fff.id as relation_id, fff.filed_id, + f.name as field_name, f.filed_code, f.field_type, f.state as field_state + FROM f_polic_file_field fff + LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s AND fff.file_id = %s AND fff.state = 1 + ORDER BY f.field_type, f.filed_code + """, (tenant_id, file_id)) + relations = cursor.fetchall() + + print(f"\n 关联的字段 ({len(relations)} 个):") + input_fields = [] + output_fields = [] + for rel in relations: + field_type_name = "输入字段" if rel['field_type'] == 1 else "输出字段" if rel['field_type'] == 2 else "未知" + # 处理state字段(可能是bytes或int) + state_raw = rel['field_state'] + if isinstance(state_raw, bytes): + field_state = int.from_bytes(state_raw, byteorder='big') + elif state_raw is not None: + field_state = int(state_raw) + else: + field_state = 0 + field_info = f" - {rel['field_name']} ({rel['filed_code']}, {field_type_name})" + if field_state != 1: + field_info += f" [状态: 禁用]" + if not rel['field_name']: + field_info += f" [错误: 字段不存在!]" + + if rel['field_type'] == 1: + input_fields.append(field_info) + else: + output_fields.append(field_info) + + if input_fields: + print(f"\n 输入字段 ({len(input_fields)} 个):") + for info in input_fields: + print(info) + + if output_fields: + print(f"\n 输出字段 ({len(output_fields)} 个):") + for info in output_fields: + print(info) + + # 4. 检查是否有孤立的关联关系 + cursor.execute(""" + SELECT fff.id, fff.filed_id + FROM f_polic_file_field fff + LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s AND fff.file_id = %s AND fff.state = 1 AND f.id IS NULL + """, (tenant_id, file_id)) + orphaned = cursor.fetchall() + + if orphaned: + print(f"\n [错误] 发现 {len(orphaned)} 个孤立的关联关系(字段不存在):") + for rel in orphaned: + print(f" - 关联ID: {rel['id']}, filed_id: {rel['filed_id']}") + + finally: + cursor.close() + + +def main(): + """主函数""" + print_section("数据库ID关系检查工具") + + # 获取配置 + config = get_db_config_from_args() + + # 显示配置信息 + print_section("配置信息") + print(f" 数据库服务器: {config['host']}:{config['port']}") + print(f" 数据库名称: {config['database']}") + print(f" 用户名: {config['user']}") + print(f" 租户ID: {config['tenant_id']}") + if config.get('file_id'): + print(f" 检查文件ID: {config['file_id']}") + + # 连接数据库 + print_section("连接数据库") + conn = test_db_connection(config) + if not conn: + return + + print_result(True, "数据库连接成功") + + try: + tenant_id = config['tenant_id'] + file_id = config.get('file_id') + + # 检查各个表 + check_file_config(conn, tenant_id, file_id) + check_fields(conn, tenant_id) + check_file_field_relations(conn, tenant_id, file_id) + + # 如果指定了文件ID,进行详细检查 + if file_id: + check_specific_file(conn, tenant_id, file_id) + + # 总结 + print_section("检查完成") + print("请查看上述检查结果,找出问题所在") + + finally: + conn.close() + print_result(True, "数据库连接已关闭") + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\n[中断] 用户取消操作") + sys.exit(0) + except Exception as e: + print(f"\n[错误] 发生异常: {str(e)}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/clean_and_resync_templates.py b/clean_and_resync_templates.py new file mode 100644 index 0000000..f07bd5b --- /dev/null +++ b/clean_and_resync_templates.py @@ -0,0 +1,874 @@ +""" +清理并重新同步模板数据到指定数据库 + +功能: +1. 清理指定tenant_id下的旧数据(包括MinIO路径的数据) +2. 清理相关的字段关联关系 +3. 重新扫描template_finish/目录 +4. 重新创建/更新模板数据 +5. 重新建立字段关联关系 + +使用方法: +python clean_and_resync_templates.py --host 10.100.31.21 --port 3306 --user finyx --password FknJYz3FA5WDYtsd --database finyx --tenant-id 1 +""" +import os +import sys +import pymysql +import argparse +from pathlib import Path +from typing import Dict, List, Set, Optional +import re +from docx import Document +import getpass + +# 设置输出编码为UTF-8(Windows兼容) +if sys.platform == 'win32': + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') + +# 项目根目录 +PROJECT_ROOT = Path(__file__).parent +TEMPLATES_DIR = PROJECT_ROOT / "template_finish" + +CREATED_BY = 655162080928945152 +UPDATED_BY = 655162080928945152 + + +def print_section(title): + """打印章节标题""" + print("\n" + "="*70) + print(f" {title}") + print("="*70) + + +def print_result(success, message): + """打印结果""" + status = "[OK]" if success else "[FAIL]" + print(f"{status} {message}") + + +def generate_id(): + """生成ID""" + import time + return int(time.time() * 1000000) + + +def get_db_config_from_args() -> Optional[Dict]: + """从命令行参数获取数据库配置""" + parser = argparse.ArgumentParser( + description='清理并重新同步模板数据到指定数据库', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + python clean_and_resync_templates.py --host 10.100.31.21 --port 3306 --user finyx --password FknJYz3FA5WDYtsd --database finyx --tenant-id 1 + """ + ) + + parser.add_argument('--host', type=str, required=True, help='MySQL服务器地址') + parser.add_argument('--port', type=int, required=True, help='MySQL服务器端口') + parser.add_argument('--user', type=str, required=True, help='MySQL用户名') + parser.add_argument('--password', type=str, required=True, help='MySQL密码') + parser.add_argument('--database', type=str, required=True, help='数据库名称') + parser.add_argument('--tenant-id', type=int, required=True, help='租户ID') + parser.add_argument('--dry-run', action='store_true', help='预览模式(不实际更新数据库)') + parser.add_argument('--skip-clean', action='store_true', help='跳过清理步骤(只同步)') + + args = parser.parse_args() + + return { + 'host': args.host, + 'port': args.port, + 'user': args.user, + 'password': args.password, + 'database': args.database, + 'charset': 'utf8mb4', + 'tenant_id': args.tenant_id, + 'dry_run': args.dry_run, + 'skip_clean': args.skip_clean + } + + +def test_db_connection(config: Dict) -> Optional[pymysql.Connection]: + """测试数据库连接""" + try: + conn = pymysql.connect( + host=config['host'], + port=config['port'], + user=config['user'], + password=config['password'], + database=config['database'], + charset=config['charset'] + ) + return conn + except Exception as e: + print_result(False, f"数据库连接失败: {str(e)}") + return None + + +def scan_local_templates() -> Dict[str, Path]: + """扫描本地template_finish目录,返回file_path -> Path的映射""" + templates = {} + + if not TEMPLATES_DIR.exists(): + return templates + + for item in TEMPLATES_DIR.rglob("*"): + if item.is_file() and item.suffix.lower() in ['.docx', '.doc']: + rel_path = item.relative_to(PROJECT_ROOT) + rel_path_str = str(rel_path).replace('\\', '/') + templates[rel_path_str] = item + + return templates + + +def clean_old_data(conn, tenant_id: int, local_templates: Dict[str, Path], dry_run: bool = False): + """清理旧数据""" + print_section("清理旧数据") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 1. 获取所有模板 + cursor.execute(""" + SELECT id, name, file_path + FROM f_polic_file_config + WHERE tenant_id = %s + AND state = 1 + """, (tenant_id,)) + all_templates = cursor.fetchall() + + print(f" 数据库中的模板总数: {len(all_templates)}") + + # 2. 识别需要删除的模板 + to_delete = [] + minio_paths = [] + invalid_paths = [] + duplicate_paths = [] + + # 统计file_path + path_count = {} + for template in all_templates: + file_path = template.get('file_path') + if file_path: + if file_path not in path_count: + path_count[file_path] = [] + path_count[file_path].append(template) + + for template in all_templates: + file_path = template.get('file_path') + template_id = template['id'] + + # 检查是否是MinIO路径 + if file_path and ('minio' in file_path.lower() or file_path.startswith('http://') or file_path.startswith('https://')): + minio_paths.append(template) + to_delete.append(template_id) + continue + + # 检查文件路径是否在本地存在 + if file_path: + if file_path not in local_templates: + invalid_paths.append(template) + to_delete.append(template_id) + continue + + # 检查是否有重复路径 + if len(path_count.get(file_path, [])) > 1: + # 保留第一个,删除其他的 + if template != path_count[file_path][0]: + duplicate_paths.append(template) + to_delete.append(template_id) + continue + + # 3. 统计需要删除的数据 + print(f"\n 需要删除的模板:") + print(f" - MinIO路径的模板: {len(minio_paths)} 个") + print(f" - 无效路径的模板: {len(invalid_paths)} 个") + print(f" - 重复路径的模板: {len(duplicate_paths)} 个") + print(f" - 总计: {len(to_delete)} 个") + + if to_delete and not dry_run: + # 4. 删除字段关联关系 + print("\n 删除字段关联关系...") + if to_delete: + placeholders = ','.join(['%s'] * len(to_delete)) + delete_relations_sql = f""" + DELETE FROM f_polic_file_field + WHERE tenant_id = %s + AND file_id IN ({placeholders}) + """ + cursor.execute(delete_relations_sql, [tenant_id] + to_delete) + deleted_relations = cursor.rowcount + print(f" 删除了 {deleted_relations} 条字段关联关系") + + # 5. 删除模板记录 + print("\n 删除模板记录...") + delete_templates_sql = f""" + UPDATE f_polic_file_config + SET state = 0, updated_time = NOW(), updated_by = %s + WHERE tenant_id = %s + AND id IN ({placeholders}) + """ + cursor.execute(delete_templates_sql, [UPDATED_BY, tenant_id] + to_delete) + deleted_templates = cursor.rowcount + print(f" 删除了 {deleted_templates} 个模板记录(标记为state=0)") + + conn.commit() + print_result(True, f"清理完成:删除了 {deleted_templates} 个模板记录") + elif to_delete: + print("\n [预览模式] 将删除上述模板记录") + else: + print_result(True, "没有需要清理的数据") + + return { + 'total': len(all_templates), + 'deleted': len(to_delete), + 'minio_paths': len(minio_paths), + 'invalid_paths': len(invalid_paths), + 'duplicate_paths': len(duplicate_paths) + } + + finally: + cursor.close() + + +def scan_directory_structure(base_dir: Path) -> Dict: + """扫描目录结构""" + directories = [] + files = [] + + def scan_recursive(current_path: Path, parent_path: Optional[str] = None): + """递归扫描目录""" + if not current_path.exists() or not current_path.is_dir(): + return + + # 获取相对路径 + rel_path = current_path.relative_to(base_dir) + rel_path_str = str(rel_path).replace('\\', '/') + + # 添加目录节点 + if rel_path_str != '.': + directories.append({ + 'name': current_path.name, + 'path': rel_path_str, + 'parent_path': parent_path + }) + + # 扫描子项 + for item in sorted(current_path.iterdir()): + if item.is_dir(): + scan_recursive(item, rel_path_str) + elif item.is_file() and item.suffix.lower() in ['.docx', '.doc']: + file_rel_path = item.relative_to(base_dir) + file_rel_path_str = str(file_rel_path).replace('\\', '/') + files.append({ + 'name': item.name, + 'path': file_rel_path_str, + 'parent_path': rel_path_str if rel_path_str != '.' else None + }) + + scan_recursive(base_dir) + + return { + 'directories': directories, + 'files': files + } + + +def get_existing_templates(conn, tenant_id: int) -> Dict: + """获取现有模板(只获取state=1的)""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + cursor.execute(""" + SELECT id, name, file_path, parent_id + FROM f_polic_file_config + WHERE tenant_id = %s + AND state = 1 + """, (tenant_id,)) + templates = cursor.fetchall() + + result = { + 'by_path': {}, + 'by_name': {}, + 'by_id': {} + } + + for t in templates: + result['by_id'][t['id']] = t + if t['file_path']: + result['by_path'][t['file_path']] = t + else: + name = t['name'] + if name not in result['by_name']: + result['by_name'][name] = [] + result['by_name'][name].append(t) + + return result + finally: + cursor.close() + + +def sync_template_hierarchy(conn, tenant_id: int, dry_run: bool = False): + """同步模板层级结构""" + print_section("同步模板层级结构") + + # 1. 扫描目录结构 + print("1. 扫描目录结构...") + structure = scan_directory_structure(TEMPLATES_DIR) + print_result(True, f"找到 {len(structure['directories'])} 个目录,{len(structure['files'])} 个文件") + + if not structure['directories'] and not structure['files']: + print_result(False, "未找到任何目录或文件") + return None + + # 2. 获取现有模板 + print("\n2. 获取现有模板...") + existing_templates = get_existing_templates(conn, tenant_id) + print_result(True, f"找到 {len(existing_templates['by_path'])} 个文件模板,{len(existing_templates['by_name'])} 个目录模板") + + # 3. 创建/更新目录节点 + print("\n3. 创建/更新目录节点...") + path_to_id = {} + dir_created = 0 + dir_updated = 0 + + for dir_info in structure['directories']: + parent_id = None + if dir_info['parent_path']: + parent_id = path_to_id.get(dir_info['parent_path']) + + existing = None + candidates = existing_templates['by_name'].get(dir_info['name'], []) + for candidate in candidates: + if candidate.get('parent_id') == parent_id and not candidate.get('file_path'): + existing = candidate + break + + if existing: + dir_id = existing['id'] + if existing.get('parent_id') != parent_id: + dir_updated += 1 + if not dry_run: + cursor = conn.cursor() + cursor.execute(""" + UPDATE f_polic_file_config + SET parent_id = %s, updated_time = NOW(), updated_by = %s + WHERE id = %s AND tenant_id = %s + """, (parent_id, UPDATED_BY, dir_id, tenant_id)) + conn.commit() + cursor.close() + else: + dir_id = generate_id() + dir_created += 1 + if not dry_run: + cursor = conn.cursor() + cursor.execute(""" + INSERT INTO f_polic_file_config + (id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, NULL, NOW(), %s, NOW(), %s, 1) + """, (dir_id, tenant_id, parent_id, dir_info['name'], CREATED_BY, UPDATED_BY)) + conn.commit() + cursor.close() + + path_to_id[dir_info['path']] = dir_id + + print_result(True, f"创建 {dir_created} 个目录,更新 {dir_updated} 个目录") + + # 4. 创建/更新文件节点 + print("\n4. 创建/更新文件节点...") + file_created = 0 + file_updated = 0 + + for file_info in structure['files']: + parent_id = None + if file_info['parent_path']: + parent_id = path_to_id.get(file_info['parent_path']) + + existing = existing_templates['by_path'].get(file_info['path']) + + if existing: + file_id = existing['id'] + if existing.get('parent_id') != parent_id or existing.get('name') != file_info['name']: + file_updated += 1 + if not dry_run: + cursor = conn.cursor() + cursor.execute(""" + UPDATE f_polic_file_config + SET parent_id = %s, name = %s, updated_time = NOW(), updated_by = %s + WHERE id = %s AND tenant_id = %s + """, (parent_id, file_info['name'], UPDATED_BY, file_id, tenant_id)) + conn.commit() + cursor.close() + else: + file_id = generate_id() + file_created += 1 + if not dry_run: + cursor = conn.cursor() + cursor.execute(""" + INSERT INTO f_polic_file_config + (id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """, (file_id, tenant_id, parent_id, file_info['name'], file_info['path'], CREATED_BY, UPDATED_BY)) + conn.commit() + cursor.close() + + print_result(True, f"创建 {file_created} 个文件,更新 {file_updated} 个文件") + + return { + 'directories_created': dir_created, + 'directories_updated': dir_updated, + 'files_created': file_created, + 'files_updated': file_updated + } + + +def get_input_fields(conn, tenant_id: int) -> Dict[str, int]: + """获取输入字段""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, filed_code, name + FROM f_polic_field + WHERE tenant_id = %s + AND field_type = 1 + AND filed_code IN ('clue_info', 'target_basic_info_clue') + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + fields = cursor.fetchall() + + result = {} + for field in fields: + result[field['filed_code']] = field['id'] + + return result + finally: + cursor.close() + + +def get_output_fields(conn, tenant_id: int) -> Dict[str, int]: + """获取所有输出字段""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, filed_code, name + FROM f_polic_field + WHERE tenant_id = %s + AND field_type = 2 + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + fields = cursor.fetchall() + + result = {} + for field in fields: + result[field['filed_code']] = field['id'] + + return result + finally: + cursor.close() + + +def extract_placeholders_from_docx(file_path: Path) -> Set[str]: + """从docx文件中提取所有占位符""" + placeholders = set() + placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}') + + try: + doc = Document(file_path) + + # 从段落中提取 + for paragraph in doc.paragraphs: + text = paragraph.text + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: + placeholders.add(field_code) + + # 从表格中提取 + for table in doc.tables: + try: + for row in table.rows: + for cell in row.cells: + for paragraph in cell.paragraphs: + text = paragraph.text + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: + placeholders.add(field_code) + except: + continue + except Exception as e: + pass + + return placeholders + + +def create_missing_input_field(conn, tenant_id: int, field_code: str) -> Optional[int]: + """创建缺失的输入字段""" + cursor = conn.cursor() + + try: + field_id = generate_id() + field_name_map = { + 'clue_info': '线索信息', + 'target_basic_info_clue': '被核查人基本信息(线索)' + } + field_name = field_name_map.get(field_code, field_code.replace('_', ' ')) + + insert_sql = """ + INSERT INTO f_polic_field + (id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """ + cursor.execute(insert_sql, ( + field_id, + tenant_id, + field_name, + field_code, + 1, + CREATED_BY, + UPDATED_BY + )) + conn.commit() + + return field_id + + except Exception as e: + conn.rollback() + return None + finally: + cursor.close() + + +def create_missing_output_field(conn, tenant_id: int, field_code: str) -> Optional[int]: + """创建缺失的输出字段""" + cursor = conn.cursor() + + try: + # 先检查是否已存在 + check_cursor = conn.cursor(pymysql.cursors.DictCursor) + check_cursor.execute(""" + SELECT id FROM f_polic_field + WHERE tenant_id = %s AND filed_code = %s + """, (tenant_id, field_code)) + existing = check_cursor.fetchone() + check_cursor.close() + + if existing: + return existing['id'] + + # 创建新字段 + field_id = generate_id() + field_name = field_code.replace('_', ' ') + + insert_sql = """ + INSERT INTO f_polic_field + (id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """ + cursor.execute(insert_sql, ( + field_id, + tenant_id, + field_name, + field_code, + 2, + CREATED_BY, + UPDATED_BY + )) + conn.commit() + + return field_id + + except Exception as e: + conn.rollback() + return None + finally: + cursor.close() + + +def get_existing_relations(conn, tenant_id: int, file_id: int) -> Set[int]: + """获取模板的现有关联关系""" + cursor = conn.cursor() + try: + sql = """ + SELECT filed_id + FROM f_polic_file_field + WHERE tenant_id = %s + AND file_id = %s + AND state = 1 + """ + cursor.execute(sql, (tenant_id, file_id)) + results = cursor.fetchall() + return {row[0] for row in results} + finally: + cursor.close() + + +def sync_field_relations(conn, tenant_id: int, dry_run: bool = False): + """同步字段关联关系""" + print_section("同步字段关联关系") + + # 1. 获取输入字段 + print("1. 获取输入字段...") + input_fields = get_input_fields(conn, tenant_id) + + if not input_fields: + print(" 创建缺失的输入字段...") + for field_code in ['clue_info', 'target_basic_info_clue']: + field_id = create_missing_input_field(conn, tenant_id, field_code) + if field_id: + input_fields[field_code] = field_id + + if not input_fields: + print_result(False, "无法获取或创建输入字段") + return None + + input_field_ids = list(input_fields.values()) + print_result(True, f"找到 {len(input_field_ids)} 个输入字段") + + # 2. 获取输出字段 + print("\n2. 获取输出字段...") + output_fields = get_output_fields(conn, tenant_id) + print_result(True, f"找到 {len(output_fields)} 个输出字段") + + # 3. 获取所有模板 + print("\n3. 获取所有模板...") + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, name, file_path + FROM f_polic_file_config + WHERE tenant_id = %s + AND file_path IS NOT NULL + AND file_path != '' + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + templates = cursor.fetchall() + finally: + cursor.close() + + print_result(True, f"找到 {len(templates)} 个模板") + + if not templates: + print_result(False, "未找到模板") + return None + + # 4. 先清理所有现有关联关系 + print("\n4. 清理现有关联关系...") + if not dry_run: + cursor = conn.cursor() + try: + cursor.execute(""" + DELETE FROM f_polic_file_field + WHERE tenant_id = %s + """, (tenant_id,)) + deleted_count = cursor.rowcount + conn.commit() + print_result(True, f"删除了 {deleted_count} 条旧关联关系") + finally: + cursor.close() + else: + print(" [预览模式] 将清理所有现有关联关系") + + # 5. 扫描模板占位符并创建关联关系 + print("\n5. 扫描模板占位符并创建关联关系...") + + total_updated = 0 + total_errors = 0 + all_placeholders_found = set() + missing_fields = set() + + for i, template in enumerate(templates, 1): + template_id = template['id'] + template_name = template['name'] + file_path = template['file_path'] + + if i % 20 == 0: + print(f" 处理进度: {i}/{len(templates)}") + + # 检查本地文件是否存在 + local_file = PROJECT_ROOT / file_path + if not local_file.exists(): + total_errors += 1 + continue + + # 提取占位符 + placeholders = extract_placeholders_from_docx(local_file) + all_placeholders_found.update(placeholders) + + # 根据占位符找到对应的输出字段ID + output_field_ids = [] + for placeholder in placeholders: + if placeholder in output_fields: + output_field_ids.append(output_fields[placeholder]) + else: + # 字段不存在,尝试创建 + missing_fields.add(placeholder) + field_id = create_missing_output_field(conn, tenant_id, placeholder) + if field_id: + output_fields[placeholder] = field_id + output_field_ids.append(field_id) + + # 创建关联关系 + all_field_ids = input_field_ids + output_field_ids + + if not dry_run and all_field_ids: + cursor = conn.cursor() + try: + for field_id in all_field_ids: + relation_id = generate_id() + insert_sql = """ + INSERT INTO f_polic_file_field + (id, tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """ + cursor.execute(insert_sql, ( + relation_id, + tenant_id, + template_id, + field_id, + CREATED_BY, + UPDATED_BY + )) + conn.commit() + total_updated += 1 + except Exception as e: + conn.rollback() + total_errors += 1 + finally: + cursor.close() + else: + total_updated += 1 + + # 6. 统计结果 + print_section("字段关联同步结果") + print(f" 总模板数: {len(templates)}") + print(f" 已处理: {total_updated} 个") + print(f" 错误: {total_errors} 个") + print(f" 发现的占位符总数: {len(all_placeholders_found)} 个") + print(f" 创建的字段数: {len(missing_fields)} 个") + + return { + 'total_templates': len(templates), + 'updated': total_updated, + 'errors': total_errors, + 'placeholders_found': len(all_placeholders_found), + 'fields_created': len(missing_fields) + } + + +def main(): + """主函数""" + print_section("清理并重新同步模板数据") + + # 获取配置 + config = get_db_config_from_args() + + # 显示配置信息 + print_section("配置信息") + print(f" 数据库服务器: {config['host']}:{config['port']}") + print(f" 数据库名称: {config['database']}") + print(f" 用户名: {config['user']}") + print(f" 租户ID: {config['tenant_id']}") + print(f" 预览模式: {'是' if config['dry_run'] else '否'}") + print(f" 跳过清理: {'是' if config['skip_clean'] else '否'}") + + if config['dry_run']: + print("\n[注意] 当前为预览模式,不会实际更新数据库") + + # 确认 + if not config.get('dry_run'): + print("\n[警告] 此操作将清理指定租户下的旧数据并重新同步") + confirm = input("确认执行?[yes/N]: ").strip().lower() + if confirm != 'yes': + print("已取消") + return + + # 连接数据库 + print_section("连接数据库") + conn = test_db_connection(config) + if not conn: + return + + print_result(True, "数据库连接成功") + + try: + tenant_id = config['tenant_id'] + dry_run = config['dry_run'] + skip_clean = config['skip_clean'] + + results = {} + + # 1. 扫描本地模板 + print_section("扫描本地模板") + local_templates = scan_local_templates() + print_result(True, f"找到 {len(local_templates)} 个本地模板文件") + + # 2. 清理旧数据 + if not skip_clean: + clean_result = clean_old_data(conn, tenant_id, local_templates, dry_run) + results['clean'] = clean_result + else: + print_section("跳过清理步骤") + print(" 已跳过清理步骤") + + # 3. 同步模板层级结构 + hierarchy_result = sync_template_hierarchy(conn, tenant_id, dry_run) + results['hierarchy'] = hierarchy_result + + # 4. 同步字段关联关系 + fields_result = sync_field_relations(conn, tenant_id, dry_run) + results['fields'] = fields_result + + # 5. 总结 + print_section("同步完成") + if config['dry_run']: + print(" 本次为预览模式,未实际更新数据库") + else: + print(" 数据库已更新") + + if 'clean' in results: + c = results['clean'] + print(f"\n 清理结果:") + print(f" - 总模板数: {c['total']} 个") + print(f" - 删除模板: {c['deleted']} 个") + print(f" * MinIO路径: {c['minio_paths']} 个") + print(f" * 无效路径: {c['invalid_paths']} 个") + print(f" * 重复路径: {c['duplicate_paths']} 个") + + if 'hierarchy' in results and results['hierarchy']: + h = results['hierarchy'] + print(f"\n 层级结构:") + print(f" - 创建目录: {h['directories_created']} 个") + print(f" - 更新目录: {h['directories_updated']} 个") + print(f" - 创建文件: {h['files_created']} 个") + print(f" - 更新文件: {h['files_updated']} 个") + + if 'fields' in results and results['fields']: + f = results['fields'] + print(f"\n 字段关联:") + print(f" - 总模板数: {f['total_templates']} 个") + print(f" - 已处理: {f['updated']} 个") + print(f" - 发现的占位符: {f['placeholders_found']} 个") + print(f" - 创建的字段: {f['fields_created']} 个") + + finally: + conn.close() + print_result(True, "数据库连接已关闭") + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\n[中断] 用户取消操作") + sys.exit(0) + except Exception as e: + print(f"\n[错误] 发生异常: {str(e)}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/fix_document_service_tenant_id.py b/fix_document_service_tenant_id.py new file mode 100644 index 0000000..cca2c45 --- /dev/null +++ b/fix_document_service_tenant_id.py @@ -0,0 +1,102 @@ +""" +修复document_service.py中的tenant_id查询问题 + +问题:get_file_config_by_id方法没有检查tenant_id,导致查询可能失败 +解决方案:在查询中添加tenant_id检查 +""" +import re +from pathlib import Path + +def fix_document_service(): + """修复document_service.py中的查询逻辑""" + file_path = Path("services/document_service.py") + + if not file_path.exists(): + print(f"[错误] 文件不存在: {file_path}") + return False + + # 读取文件 + with open(file_path, 'r', encoding='utf-8') as f: + content = f.read() + + # 查找get_file_config_by_id方法 + pattern = r'(def get_file_config_by_id\(self, file_id: int\) -> Optional\[Dict\]:.*?)(\s+sql = """.*?WHERE id = %s\s+AND state = 1\s+""".*?cursor\.execute\(sql, \(file_id,\)\))' + + match = re.search(pattern, content, re.DOTALL) + + if not match: + print("[错误] 未找到get_file_config_by_id方法或查询语句") + return False + + old_code = match.group(0) + + # 检查是否已经包含tenant_id + if 'tenant_id' in old_code: + print("[信息] 查询已经包含tenant_id检查,无需修复") + return True + + # 生成新的代码 + new_sql = ''' sql = """ + SELECT id, name, file_path + FROM f_polic_file_config + WHERE id = %s + AND tenant_id = %s + AND state = 1 + """ + # 获取tenant_id(从环境变量或请求中获取) + tenant_id = self.tenant_id if self.tenant_id else os.getenv('TENANT_ID', '1') + try: + tenant_id = int(tenant_id) + except (ValueError, TypeError): + tenant_id = 1 # 默认值 + + cursor.execute(sql, (file_id, tenant_id))''' + + # 替换 + new_code = re.sub( + r'sql = """.*?WHERE id = %s\s+AND state = 1\s+""".*?cursor\.execute\(sql, \(file_id,\)\)', + new_sql, + old_code, + flags=re.DOTALL + ) + + new_content = content.replace(old_code, new_code) + + # 检查是否需要导入os + if 'import os' not in new_content and 'os.getenv' in new_content: + # 在文件开头添加import os(如果还没有) + if 'from dotenv import load_dotenv' in new_content: + new_content = new_content.replace('from dotenv import load_dotenv', 'from dotenv import load_dotenv\nimport os') + elif 'import pymysql' in new_content: + new_content = new_content.replace('import pymysql', 'import pymysql\nimport os') + else: + # 在文件开头添加 + lines = new_content.split('\n') + import_line = 0 + for i, line in enumerate(lines): + if line.startswith('import ') or line.startswith('from '): + import_line = i + 1 + lines.insert(import_line, 'import os') + new_content = '\n'.join(lines) + + # 写回文件 + with open(file_path, 'w', encoding='utf-8') as f: + f.write(new_content) + + print("[成功] 已修复get_file_config_by_id方法,添加了tenant_id检查") + return True + + +if __name__ == "__main__": + print("="*70) + print("修复document_service.py中的tenant_id查询问题") + print("="*70) + + if fix_document_service(): + print("\n修复完成!") + print("\n注意:") + print("1. 请确保.env文件中配置了TENANT_ID") + print("2. 或者确保应用程序在调用时正确传递tenant_id") + print("3. 建议在app.py中从请求中获取tenant_id并传递给document_service") + else: + print("\n修复失败,请手动检查代码") diff --git a/services/document_service.py b/services/document_service.py index 9b3979c..368c978 100644 --- a/services/document_service.py +++ b/services/document_service.py @@ -79,12 +79,13 @@ class DocumentService: secure=self.minio_config['secure'] ) - def get_file_config_by_id(self, file_id: int) -> Optional[Dict]: + def get_file_config_by_id(self, file_id: int, tenant_id: Optional[int] = None) -> Optional[Dict]: """ 根据文件ID获取文件配置 Args: file_id: 文件配置ID + tenant_id: 租户ID(如果为None,则从环境变量获取或使用默认值1) Returns: 文件配置信息,包含: id, name, file_path @@ -93,13 +94,23 @@ class DocumentService: cursor = conn.cursor(pymysql.cursors.DictCursor) try: + # 获取tenant_id + if tenant_id is None: + # 尝试从环境变量获取 + tenant_id_str = os.getenv('TENANT_ID', '1') + try: + tenant_id = int(tenant_id_str) + except (ValueError, TypeError): + tenant_id = 1 # 默认值 + sql = """ SELECT id, name, file_path FROM f_polic_file_config WHERE id = %s + AND tenant_id = %s AND state = 1 """ - cursor.execute(sql, (file_id,)) + cursor.execute(sql, (file_id, tenant_id)) config = cursor.fetchone() if config: @@ -899,7 +910,7 @@ class DocumentService: except S3Error as e: raise Exception(f"上传文件到MinIO失败: {str(e)}") - def generate_document(self, file_id: int, input_data: List[Dict], file_info: Dict) -> Dict: + def generate_document(self, file_id: int, input_data: List[Dict], file_info: Dict, tenant_id: Optional[int] = None) -> Dict: """ 生成文档 @@ -907,12 +918,13 @@ class DocumentService: file_id: 文件配置ID input_data: 输入数据列表,格式: [{'fieldCode': 'xxx', 'fieldValue': 'xxx'}] file_info: 文件信息,格式: {'fileId': 1, 'fileName': 'xxx.doc'} + tenant_id: 租户ID(如果为None,则从环境变量获取或使用默认值1) Returns: 生成结果,包含: filePath """ # 获取文件配置 - file_config = self.get_file_config_by_id(file_id) + file_config = self.get_file_config_by_id(file_id, tenant_id) if not file_config: # 提供更详细的错误信息 raise Exception( diff --git a/sync_templates_between_databases.py b/sync_templates_between_databases.py new file mode 100644 index 0000000..d9501f8 --- /dev/null +++ b/sync_templates_between_databases.py @@ -0,0 +1,779 @@ +""" +跨数据库同步模板、字段和关联关系 + +功能: +1. 从.env文件读取源数据库配置 +2. 同步到目标数据库(10.100.31.21) +3. 处理ID映射关系(两个数据库的ID不同) +4. 根据业务逻辑(name, filed_code, file_path)匹配数据 + +使用方法: +python sync_templates_between_databases.py --target-host 10.100.31.21 --target-port 3306 --target-user finyx --target-password FknJYz3FA5WDYtsd --target-database finyx --target-tenant-id 1 +""" +import os +import sys +import pymysql +import argparse +from pathlib import Path +from typing import Dict, List, Set, Optional, Tuple +from dotenv import load_dotenv + +# 设置输出编码为UTF-8(Windows兼容) +if sys.platform == 'win32': + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') + +# 加载环境变量 +load_dotenv() + +# 项目根目录 +PROJECT_ROOT = Path(__file__).parent +TEMPLATES_DIR = PROJECT_ROOT / "template_finish" + +CREATED_BY = 655162080928945152 +UPDATED_BY = 655162080928945152 + + +def print_section(title): + """打印章节标题""" + print("\n" + "="*70) + print(f" {title}") + print("="*70) + + +def print_result(success, message): + """打印结果""" + status = "[OK]" if success else "[FAIL]" + print(f"{status} {message}") + + +def generate_id(): + """生成ID""" + import time + return int(time.time() * 1000000) + + +def get_source_db_config() -> Dict: + """从.env文件读取源数据库配置""" + db_host = os.getenv('DB_HOST') + db_port = os.getenv('DB_PORT') + db_user = os.getenv('DB_USER') + db_password = os.getenv('DB_PASSWORD') + db_name = os.getenv('DB_NAME') + + if not all([db_host, db_port, db_user, db_password, db_name]): + raise ValueError( + "源数据库配置不完整,请在.env文件中配置以下环境变量:\n" + "DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME" + ) + + return { + 'host': db_host, + 'port': int(db_port), + 'user': db_user, + 'password': db_password, + 'database': db_name, + 'charset': 'utf8mb4' + } + + +def get_target_db_config_from_args() -> Dict: + """从命令行参数获取目标数据库配置""" + parser = argparse.ArgumentParser( + description='跨数据库同步模板、字段和关联关系', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + python sync_templates_between_databases.py --target-host 10.100.31.21 --target-port 3306 --target-user finyx --target-password FknJYz3FA5WDYtsd --target-database finyx --target-tenant-id 1 + """ + ) + + parser.add_argument('--target-host', type=str, required=True, help='目标MySQL服务器地址') + parser.add_argument('--target-port', type=int, required=True, help='目标MySQL服务器端口') + parser.add_argument('--target-user', type=str, required=True, help='目标MySQL用户名') + parser.add_argument('--target-password', type=str, required=True, help='目标MySQL密码') + parser.add_argument('--target-database', type=str, required=True, help='目标数据库名称') + parser.add_argument('--target-tenant-id', type=int, required=True, help='目标租户ID') + parser.add_argument('--source-tenant-id', type=int, help='源租户ID(如果不指定,将使用数据库中的第一个tenant_id)') + parser.add_argument('--dry-run', action='store_true', help='预览模式(不实际更新数据库)') + + args = parser.parse_args() + + return { + 'host': args.target_host, + 'port': args.target_port, + 'user': args.target_user, + 'password': args.target_password, + 'database': args.target_database, + 'charset': 'utf8mb4', + 'tenant_id': args.target_tenant_id, + 'source_tenant_id': args.source_tenant_id, + 'dry_run': args.dry_run + } + + +def test_db_connection(config: Dict, label: str) -> Optional[pymysql.Connection]: + """测试数据库连接""" + try: + conn = pymysql.connect( + host=config['host'], + port=config['port'], + user=config['user'], + password=config['password'], + database=config['database'], + charset=config['charset'] + ) + return conn + except Exception as e: + print_result(False, f"{label}数据库连接失败: {str(e)}") + return None + + +def get_source_tenant_id(conn) -> int: + """获取源数据库中的tenant_id""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1") + result = cursor.fetchone() + if result: + return result['tenant_id'] + return 1 + finally: + cursor.close() + + +def read_source_fields(conn, tenant_id: int) -> Tuple[Dict[str, Dict], Dict[str, Dict]]: + """ + 从源数据库读取字段数据 + + Returns: + (input_fields_dict, output_fields_dict) + key: filed_code, value: 字段信息 + """ + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, tenant_id, name, filed_code, field_type, state + FROM f_polic_field + WHERE tenant_id = %s + AND state = 1 + ORDER BY field_type, filed_code + """ + cursor.execute(sql, (tenant_id,)) + fields = cursor.fetchall() + + input_fields = {} + output_fields = {} + + for field in fields: + field_info = { + 'id': field['id'], + 'tenant_id': field['tenant_id'], + 'name': field['name'], + 'filed_code': field['filed_code'], + 'field_type': field['field_type'], + 'state': field['state'] + } + + if field['field_type'] == 1: + input_fields[field['filed_code']] = field_info + elif field['field_type'] == 2: + output_fields[field['filed_code']] = field_info + + return input_fields, output_fields + finally: + cursor.close() + + +def read_source_templates(conn, tenant_id: int) -> Dict[str, Dict]: + """ + 从源数据库读取模板数据 + + Returns: + key: file_path (如果为空则使用name), value: 模板信息 + """ + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, tenant_id, parent_id, name, file_path, state + FROM f_polic_file_config + WHERE tenant_id = %s + AND state = 1 + ORDER BY file_path, name + """ + cursor.execute(sql, (tenant_id,)) + templates = cursor.fetchall() + + result = {} + for template in templates: + # 使用file_path作为key,如果没有file_path则使用name + key = template['file_path'] if template['file_path'] else f"DIR:{template['name']}" + result[key] = { + 'id': template['id'], + 'tenant_id': template['tenant_id'], + 'parent_id': template['parent_id'], + 'name': template['name'], + 'file_path': template['file_path'], + 'state': template['state'] + } + + return result + finally: + cursor.close() + + +def read_source_relations(conn, tenant_id: int) -> Dict[int, List[int]]: + """ + 从源数据库读取字段关联关系 + + Returns: + key: file_id, value: [filed_id列表] + """ + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT file_id, filed_id + FROM f_polic_file_field + WHERE tenant_id = %s + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + relations = cursor.fetchall() + + result = {} + for rel in relations: + file_id = rel['file_id'] + filed_id = rel['filed_id'] + if file_id not in result: + result[file_id] = [] + result[file_id].append(filed_id) + + return result + finally: + cursor.close() + + +def sync_fields_to_target(conn, tenant_id: int, source_input_fields: Dict, source_output_fields: Dict, + dry_run: bool = False) -> Tuple[Dict[int, int], Dict[int, int]]: + """ + 同步字段到目标数据库 + + Returns: + (input_field_id_map, output_field_id_map) + key: 源字段ID, value: 目标字段ID + """ + print_section("同步字段到目标数据库") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 1. 获取目标数据库中的现有字段 + cursor.execute(""" + SELECT id, filed_code, field_type + FROM f_polic_field + WHERE tenant_id = %s + AND state = 1 + """, (tenant_id,)) + existing_fields = cursor.fetchall() + + existing_by_code = {} + for field in existing_fields: + key = (field['filed_code'], field['field_type']) + existing_by_code[key] = field['id'] + + print(f" 目标数据库现有字段: {len(existing_fields)} 个") + + # 2. 同步输入字段 + print("\n 同步输入字段...") + input_field_id_map = {} + input_created = 0 + input_matched = 0 + + for code, source_field in source_input_fields.items(): + key = (code, 1) + if key in existing_by_code: + # 字段已存在,使用现有ID + target_id = existing_by_code[key] + input_field_id_map[source_field['id']] = target_id + input_matched += 1 + else: + # 创建新字段 + target_id = generate_id() + input_field_id_map[source_field['id']] = target_id + + if not dry_run: + insert_cursor = conn.cursor() + try: + insert_cursor.execute(""" + INSERT INTO f_polic_field + (id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """, ( + target_id, + tenant_id, + source_field['name'], + source_field['filed_code'], + 1, + CREATED_BY, + UPDATED_BY + )) + conn.commit() + input_created += 1 + finally: + insert_cursor.close() + else: + input_created += 1 + + print(f" 匹配: {input_matched} 个,创建: {input_created} 个") + + # 3. 同步输出字段 + print("\n 同步输出字段...") + output_field_id_map = {} + output_created = 0 + output_matched = 0 + + for code, source_field in source_output_fields.items(): + key = (code, 2) + if key in existing_by_code: + # 字段已存在,使用现有ID + target_id = existing_by_code[key] + output_field_id_map[source_field['id']] = target_id + output_matched += 1 + else: + # 创建新字段 + target_id = generate_id() + output_field_id_map[source_field['id']] = target_id + + if not dry_run: + insert_cursor = conn.cursor() + try: + insert_cursor.execute(""" + INSERT INTO f_polic_field + (id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """, ( + target_id, + tenant_id, + source_field['name'], + source_field['filed_code'], + 2, + CREATED_BY, + UPDATED_BY + )) + conn.commit() + output_created += 1 + finally: + insert_cursor.close() + else: + output_created += 1 + + print(f" 匹配: {output_matched} 个,创建: {output_created} 个") + + return input_field_id_map, output_field_id_map + + finally: + cursor.close() + + +def sync_templates_to_target(conn, tenant_id: int, source_templates: Dict, + dry_run: bool = False) -> Dict[int, int]: + """ + 同步模板到目标数据库 + + Returns: + template_id_map: key: 源模板ID, value: 目标模板ID + """ + print_section("同步模板到目标数据库") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 1. 获取目标数据库中的现有模板 + cursor.execute(""" + SELECT id, name, file_path, parent_id + FROM f_polic_file_config + WHERE tenant_id = %s + AND state = 1 + """, (tenant_id,)) + existing_templates = cursor.fetchall() + + existing_by_path = {} + existing_by_name = {} + for template in existing_templates: + if template['file_path']: + existing_by_path[template['file_path']] = template + else: + # 目录节点 + name = template['name'] + if name not in existing_by_name: + existing_by_name[name] = [] + existing_by_name[name].append(template) + + print(f" 目标数据库现有模板: {len(existing_templates)} 个") + + # 2. 先处理目录节点(按层级顺序) + print("\n 同步目录节点...") + template_id_map = {} + dir_created = 0 + dir_matched = 0 + + # 分离目录和文件 + dir_templates = {} + file_templates = {} + for key, source_template in source_templates.items(): + if source_template['file_path']: + file_templates[key] = source_template + else: + dir_templates[key] = source_template + + # 构建目录层级关系(需要先处理父目录) + # 按parent_id分组,先处理没有parent_id的,再处理有parent_id的 + dirs_by_level = {} + for key, source_template in dir_templates.items(): + level = 0 + current = source_template + while current.get('parent_id'): + level += 1 + # 查找父目录 + parent_found = False + for t in dir_templates.values(): + if t['id'] == current['parent_id']: + current = t + parent_found = True + break + if not parent_found: + break + + if level not in dirs_by_level: + dirs_by_level[level] = [] + dirs_by_level[level].append((key, source_template)) + + # 按层级顺序处理目录 + for level in sorted(dirs_by_level.keys()): + for key, source_template in dirs_by_level[level]: + source_id = source_template['id'] + name = source_template['name'] + + # 查找匹配的目录(通过名称和parent_id) + matched = None + target_parent_id = None + if source_template['parent_id']: + target_parent_id = template_id_map.get(source_template['parent_id']) + + for existing in existing_by_name.get(name, []): + if not existing['file_path']: # 确保是目录节点 + # 检查parent_id是否匹配 + if existing['parent_id'] == target_parent_id: + matched = existing + break + + if matched: + target_id = matched['id'] + template_id_map[source_id] = target_id + dir_matched += 1 + else: + target_id = generate_id() + template_id_map[source_id] = target_id + + if not dry_run: + insert_cursor = conn.cursor() + try: + insert_cursor.execute(""" + INSERT INTO f_polic_file_config + (id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, NULL, NOW(), %s, NOW(), %s, 1) + """, ( + target_id, + tenant_id, + target_parent_id, + name, + CREATED_BY, + UPDATED_BY + )) + conn.commit() + dir_created += 1 + finally: + insert_cursor.close() + else: + dir_created += 1 + + print(f" 匹配: {dir_matched} 个,创建: {dir_created} 个") + + # 3. 处理文件节点 + print("\n 同步文件节点...") + file_created = 0 + file_matched = 0 + file_updated = 0 + + for key, source_template in file_templates.items(): + source_id = source_template['id'] + file_path = source_template['file_path'] + name = source_template['name'] + + # 通过file_path匹配 + matched = existing_by_path.get(file_path) + + if matched: + target_id = matched['id'] + template_id_map[source_id] = target_id + file_matched += 1 + + # 检查是否需要更新 + target_parent_id = None + if source_template['parent_id']: + target_parent_id = template_id_map.get(source_template['parent_id']) + + if matched['parent_id'] != target_parent_id or matched['name'] != name: + file_updated += 1 + if not dry_run: + update_cursor = conn.cursor() + try: + update_cursor.execute(""" + UPDATE f_polic_file_config + SET parent_id = %s, name = %s, updated_time = NOW(), updated_by = %s + WHERE id = %s AND tenant_id = %s + """, (target_parent_id, name, UPDATED_BY, target_id, tenant_id)) + conn.commit() + finally: + update_cursor.close() + else: + target_id = generate_id() + template_id_map[source_id] = target_id + + if not dry_run: + insert_cursor = conn.cursor() + try: + # 处理parent_id映射 + target_parent_id = None + if source_template['parent_id']: + target_parent_id = template_id_map.get(source_template['parent_id']) + + insert_cursor.execute(""" + INSERT INTO f_polic_file_config + (id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """, ( + target_id, + tenant_id, + target_parent_id, + name, + file_path, + CREATED_BY, + UPDATED_BY + )) + conn.commit() + file_created += 1 + finally: + insert_cursor.close() + else: + file_created += 1 + + print(f" 匹配: {file_matched} 个,创建: {file_created} 个,更新: {file_updated} 个") + + return template_id_map + + finally: + cursor.close() + + +def sync_relations_to_target(conn, tenant_id: int, source_relations: Dict[int, List[int]], + template_id_map: Dict[int, int], + input_field_id_map: Dict[int, int], + output_field_id_map: Dict[int, int], + dry_run: bool = False): + """同步字段关联关系到目标数据库""" + print_section("同步字段关联关系到目标数据库") + + # 1. 清理现有关联关系 + print("1. 清理现有关联关系...") + if not dry_run: + cursor = conn.cursor() + try: + cursor.execute(""" + DELETE FROM f_polic_file_field + WHERE tenant_id = %s + """, (tenant_id,)) + deleted_count = cursor.rowcount + conn.commit() + print_result(True, f"删除了 {deleted_count} 条旧关联关系") + finally: + cursor.close() + else: + print(" [预览模式] 将清理所有现有关联关系") + + # 2. 创建新的关联关系 + print("\n2. 创建新的关联关系...") + all_field_id_map = {**input_field_id_map, **output_field_id_map} + + relations_created = 0 + relations_skipped = 0 + + for source_file_id, source_field_ids in source_relations.items(): + # 获取目标file_id + target_file_id = template_id_map.get(source_file_id) + if not target_file_id: + relations_skipped += 1 + continue + + # 转换field_id + target_field_ids = [] + for source_field_id in source_field_ids: + target_field_id = all_field_id_map.get(source_field_id) + if target_field_id: + target_field_ids.append(target_field_id) + + if not target_field_ids: + continue + + # 创建关联关系 + if not dry_run: + cursor = conn.cursor() + try: + for target_field_id in target_field_ids: + relation_id = generate_id() + cursor.execute(""" + INSERT INTO f_polic_file_field + (id, tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """, ( + relation_id, + tenant_id, + target_file_id, + target_field_id, + CREATED_BY, + UPDATED_BY + )) + conn.commit() + relations_created += len(target_field_ids) + except Exception as e: + conn.rollback() + print(f" [错误] 创建关联关系失败: {str(e)}") + finally: + cursor.close() + else: + relations_created += len(target_field_ids) + + print_result(True, f"创建了 {relations_created} 条关联关系,跳过 {relations_skipped} 个模板") + + return { + 'created': relations_created, + 'skipped': relations_skipped + } + + +def main(): + """主函数""" + print_section("跨数据库同步模板、字段和关联关系") + + # 1. 获取源数据库配置(从.env) + print_section("读取源数据库配置") + try: + source_config = get_source_db_config() + print_result(True, f"源数据库: {source_config['host']}:{source_config['port']}/{source_config['database']}") + except Exception as e: + print_result(False, str(e)) + return + + # 2. 获取目标数据库配置(从命令行参数) + print_section("读取目标数据库配置") + target_config = get_target_db_config_from_args() + print_result(True, f"目标数据库: {target_config['host']}:{target_config['port']}/{target_config['database']}") + print(f" 目标租户ID: {target_config['tenant_id']}") + + if target_config['dry_run']: + print("\n[注意] 当前为预览模式,不会实际更新数据库") + + # 3. 连接数据库 + print_section("连接数据库") + source_conn = test_db_connection(source_config, "源") + if not source_conn: + return + + target_conn = test_db_connection(target_config, "目标") + if not target_conn: + source_conn.close() + return + + print_result(True, "数据库连接成功") + + try: + # 4. 获取源租户ID + source_tenant_id = target_config.get('source_tenant_id') + if not source_tenant_id: + source_tenant_id = get_source_tenant_id(source_conn) + print(f"\n源租户ID: {source_tenant_id}") + + # 5. 读取源数据 + print_section("读取源数据库数据") + + print(" 读取字段...") + source_input_fields, source_output_fields = read_source_fields(source_conn, source_tenant_id) + print_result(True, f"输入字段: {len(source_input_fields)} 个,输出字段: {len(source_output_fields)} 个") + + print("\n 读取模板...") + source_templates = read_source_templates(source_conn, source_tenant_id) + print_result(True, f"模板总数: {len(source_templates)} 个") + + print("\n 读取关联关系...") + source_relations = read_source_relations(source_conn, source_tenant_id) + print_result(True, f"关联关系: {len(source_relations)} 个模板有字段关联") + + # 6. 同步到目标数据库 + target_tenant_id = target_config['tenant_id'] + dry_run = target_config['dry_run'] + + # 6.1 同步字段 + input_field_id_map, output_field_id_map = sync_fields_to_target( + target_conn, target_tenant_id, + source_input_fields, source_output_fields, + dry_run + ) + + # 6.2 同步模板 + template_id_map = sync_templates_to_target( + target_conn, target_tenant_id, + source_templates, + dry_run + ) + + # 6.3 同步关联关系 + relations_result = sync_relations_to_target( + target_conn, target_tenant_id, + source_relations, + template_id_map, + input_field_id_map, + output_field_id_map, + dry_run + ) + + # 7. 总结 + print_section("同步完成") + if dry_run: + print(" 本次为预览模式,未实际更新数据库") + else: + print(" 数据库已更新") + + print(f"\n 同步统计:") + print(f" - 输入字段: {len(input_field_id_map)} 个") + print(f" - 输出字段: {len(output_field_id_map)} 个") + print(f" - 模板: {len(template_id_map)} 个") + print(f" - 关联关系: {relations_result['created']} 条") + + finally: + source_conn.close() + target_conn.close() + print_result(True, "数据库连接已关闭") + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\n[中断] 用户取消操作") + sys.exit(0) + except Exception as e: + print(f"\n[错误] 发生异常: {str(e)}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/update_all_template_field_relations.py b/update_all_template_field_relations.py new file mode 100644 index 0000000..76234df --- /dev/null +++ b/update_all_template_field_relations.py @@ -0,0 +1,562 @@ +""" +更新所有模板的字段关联关系 +1. 输入字段:所有模板都关联 clue_info 和 target_basic_info_clue +2. 输出字段:根据模板中的占位符自动关联对应的输出字段 +""" +import os +import pymysql +from pathlib import Path +from typing import Dict, List, Set, Optional +from dotenv import load_dotenv +import re +from docx import Document + +# 加载环境变量 +load_dotenv() + +# 数据库配置 +DB_CONFIG = { + 'host': os.getenv('DB_HOST', '152.136.177.240'), + 'port': int(os.getenv('DB_PORT', 5012)), + 'user': os.getenv('DB_USER', 'finyx'), + 'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), + 'database': os.getenv('DB_NAME', 'finyx'), + 'charset': 'utf8mb4' +} + +CREATED_BY = 655162080928945152 +UPDATED_BY = 655162080928945152 + +# 项目根目录 +PROJECT_ROOT = Path(__file__).parent +TEMPLATES_DIR = PROJECT_ROOT / "template_finish" + + +def print_section(title): + """打印章节标题""" + print("\n" + "="*70) + print(f" {title}") + print("="*70) + + +def print_result(success, message): + """打印结果""" + status = "[OK]" if success else "[FAIL]" + print(f"{status} {message}") + + +def generate_id(): + """生成ID""" + import time + return int(time.time() * 1000000) + + +def get_actual_tenant_id(conn) -> int: + """获取数据库中的实际tenant_id""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1") + result = cursor.fetchone() + if result: + return result['tenant_id'] + return 1 + finally: + cursor.close() + + +def get_input_fields(conn, tenant_id: int) -> Dict[str, int]: + """ + 获取输入字段(clue_info 和 target_basic_info_clue) + + Returns: + 字典,key为field_code,value为field_id + """ + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, filed_code, name + FROM f_polic_field + WHERE tenant_id = %s + AND field_type = 1 + AND filed_code IN ('clue_info', 'target_basic_info_clue') + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + fields = cursor.fetchall() + + result = {} + for field in fields: + result[field['filed_code']] = field['id'] + print(f" 输入字段: {field['name']} ({field['filed_code']}) - ID: {field['id']}") + + return result + finally: + cursor.close() + + +def get_output_fields(conn, tenant_id: int) -> Dict[str, int]: + """ + 获取所有输出字段 + + Returns: + 字典,key为filed_code,value为field_id + """ + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, filed_code, name + FROM f_polic_field + WHERE tenant_id = %s + AND field_type = 2 + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + fields = cursor.fetchall() + + result = {} + for field in fields: + result[field['filed_code']] = field['id'] + + return result + finally: + cursor.close() + + +def extract_placeholders_from_docx(file_path: Path) -> Set[str]: + """从docx文件中提取所有占位符""" + placeholders = set() + placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}') + + try: + doc = Document(file_path) + + # 从段落中提取 + for paragraph in doc.paragraphs: + text = paragraph.text + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: + placeholders.add(field_code) + + # 从表格中提取 + for table in doc.tables: + try: + for row in table.rows: + for cell in row.cells: + for paragraph in cell.paragraphs: + text = paragraph.text + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: + placeholders.add(field_code) + except: + continue + except Exception as e: + print(f" [错误] 读取文件失败: {str(e)}") + + return placeholders + + +def get_all_templates(conn, tenant_id: int) -> List[Dict]: + """获取所有模板(只获取文件节点,不包括目录节点)""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, name, file_path + FROM f_polic_file_config + WHERE tenant_id = %s + AND file_path IS NOT NULL + AND file_path != '' + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + templates = cursor.fetchall() + return templates + finally: + cursor.close() + + +def get_existing_relations(conn, tenant_id: int, file_id: int) -> Set[int]: + """获取模板的现有关联关系""" + cursor = conn.cursor() + try: + sql = """ + SELECT filed_id + FROM f_polic_file_field + WHERE tenant_id = %s + AND file_id = %s + AND state = 1 + """ + cursor.execute(sql, (tenant_id, file_id)) + results = cursor.fetchall() + return {row[0] for row in results} + finally: + cursor.close() + + +def update_template_field_relations(conn, tenant_id: int, file_id: int, file_name: str, + input_field_ids: List[int], output_field_ids: List[int], + dry_run: bool = False): + """ + 更新模板的字段关联关系 + + Args: + conn: 数据库连接 + tenant_id: 租户ID + file_id: 模板ID + file_name: 模板名称 + input_field_ids: 输入字段ID列表 + output_field_ids: 输出字段ID列表 + dry_run: 是否只是预览(不实际更新) + """ + cursor = conn.cursor() + + try: + all_field_ids = set(input_field_ids + output_field_ids) + + if not all_field_ids: + print(f" [跳过] {file_name}: 没有字段需要关联") + return + + # 获取现有关联 + existing_field_ids = get_existing_relations(conn, tenant_id, file_id) + + # 需要添加的字段 + to_add = all_field_ids - existing_field_ids + # 需要删除的字段(如果某个字段不在新列表中,但存在于现有关联中,且不是必需的输入字段,则删除) + # 注意:我们保留所有现有关联,只添加新的 + to_remove = existing_field_ids - all_field_ids + + if not to_add and not to_remove: + print(f" [保持] {file_name}: 关联关系已是最新") + return + + if dry_run: + print(f" [预览] {file_name}:") + if to_add: + print(f" 将添加: {len(to_add)} 个字段") + if to_remove: + print(f" 将删除: {len(to_remove)} 个字段") + return + + # 删除需要移除的关联 + if to_remove: + placeholders = ','.join(['%s'] * len(to_remove)) + delete_sql = f""" + DELETE FROM f_polic_file_field + WHERE tenant_id = %s + AND file_id = %s + AND filed_id IN ({placeholders}) + """ + cursor.execute(delete_sql, [tenant_id, file_id] + list(to_remove)) + deleted_count = cursor.rowcount + + # 添加新的关联 + added_count = 0 + for field_id in to_add: + # 检查是否已存在(防止重复) + check_sql = """ + SELECT id FROM f_polic_file_field + WHERE tenant_id = %s AND file_id = %s AND filed_id = %s + """ + cursor.execute(check_sql, (tenant_id, file_id, field_id)) + if cursor.fetchone(): + continue + + relation_id = generate_id() + insert_sql = """ + INSERT INTO f_polic_file_field + (id, tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """ + cursor.execute(insert_sql, ( + relation_id, + tenant_id, + file_id, + field_id, + CREATED_BY, + UPDATED_BY + )) + added_count += 1 + + conn.commit() + + action_parts = [] + if added_count > 0: + action_parts.append(f"添加 {added_count} 个") + if to_remove and deleted_count > 0: + action_parts.append(f"删除 {deleted_count} 个") + + if action_parts: + print(f" [更新] {file_name}: {', '.join(action_parts)}") + + except Exception as e: + conn.rollback() + print(f" [错误] {file_name}: 更新失败 - {str(e)}") + finally: + cursor.close() + + +def create_missing_output_field(conn, tenant_id: int, field_code: str) -> Optional[int]: + """ + 如果输出字段不存在,创建它 + + Returns: + 字段ID,如果创建失败则返回None + """ + cursor = conn.cursor() + + try: + # 先检查是否已存在 + check_cursor = conn.cursor(pymysql.cursors.DictCursor) + check_cursor.execute(""" + SELECT id FROM f_polic_field + WHERE tenant_id = %s AND filed_code = %s + """, (tenant_id, field_code)) + existing = check_cursor.fetchone() + check_cursor.close() + + if existing: + return existing['id'] + + # 创建新字段 + field_id = generate_id() + field_name = field_code.replace('_', ' ') # 简单的名称生成 + + insert_sql = """ + INSERT INTO f_polic_field + (id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """ + cursor.execute(insert_sql, ( + field_id, + tenant_id, + field_name, + field_code, + 2, # field_type=2 表示输出字段 + CREATED_BY, + UPDATED_BY + )) + conn.commit() + + print(f" [创建字段] {field_code} (ID: {field_id})") + return field_id + + except Exception as e: + conn.rollback() + print(f" [错误] 创建字段失败 {field_code}: {str(e)}") + return None + finally: + cursor.close() + + +def main(): + """主函数""" + print_section("更新所有模板的字段关联关系") + + # 1. 连接数据库 + print_section("1. 连接数据库") + try: + conn = pymysql.connect(**DB_CONFIG) + print_result(True, "数据库连接成功") + except Exception as e: + print_result(False, f"数据库连接失败: {str(e)}") + return + + try: + # 2. 获取实际的tenant_id + print_section("2. 获取实际的tenant_id") + tenant_id = get_actual_tenant_id(conn) + print_result(True, f"实际tenant_id: {tenant_id}") + + # 3. 获取输入字段 + print_section("3. 获取输入字段") + input_fields = get_input_fields(conn, tenant_id) + + if not input_fields: + print_result(False, "未找到输入字段 clue_info 和 target_basic_info_clue") + print(" 将尝试创建这些字段...") + # 创建缺失的输入字段 + for field_code in ['clue_info', 'target_basic_info_clue']: + field_id = create_missing_input_field(conn, tenant_id, field_code) + if field_id: + input_fields[field_code] = field_id + + if not input_fields: + print_result(False, "无法获取或创建输入字段,终止操作") + return + + input_field_ids = list(input_fields.values()) + print_result(True, f"找到 {len(input_field_ids)} 个输入字段") + + # 4. 获取输出字段 + print_section("4. 获取输出字段") + output_fields = get_output_fields(conn, tenant_id) + print_result(True, f"找到 {len(output_fields)} 个输出字段") + + # 5. 获取所有模板 + print_section("5. 获取所有模板") + templates = get_all_templates(conn, tenant_id) + print_result(True, f"找到 {len(templates)} 个模板") + + if not templates: + print_result(False, "未找到模板") + return + + # 6. 扫描模板占位符并更新关联关系 + print_section("6. 扫描模板占位符并更新关联关系") + + total_updated = 0 + total_kept = 0 + total_errors = 0 + all_placeholders_found = set() + missing_fields = set() + + for i, template in enumerate(templates, 1): + template_id = template['id'] + template_name = template['name'] + file_path = template['file_path'] + + if i % 10 == 0: + print(f" 处理进度: {i}/{len(templates)}") + + # 检查本地文件是否存在 + local_file = PROJECT_ROOT / file_path + if not local_file.exists(): + print(f" [跳过] {template_name}: 文件不存在 - {file_path}") + total_errors += 1 + continue + + # 提取占位符 + placeholders = extract_placeholders_from_docx(local_file) + all_placeholders_found.update(placeholders) + + # 根据占位符找到对应的输出字段ID + output_field_ids = [] + for placeholder in placeholders: + if placeholder in output_fields: + output_field_ids.append(output_fields[placeholder]) + else: + # 字段不存在,尝试创建 + missing_fields.add(placeholder) + field_id = create_missing_output_field(conn, tenant_id, placeholder) + if field_id: + output_fields[placeholder] = field_id + output_field_ids.append(field_id) + + # 更新关联关系 + try: + update_template_field_relations( + conn, tenant_id, template_id, template_name, + input_field_ids, output_field_ids, dry_run=False + ) + total_updated += 1 + except Exception as e: + print(f" [错误] {template_name}: {str(e)}") + total_errors += 1 + + # 7. 统计结果 + print_section("7. 更新结果统计") + print(f" 总模板数: {len(templates)}") + print(f" 已更新: {total_updated} 个") + print(f" 错误: {total_errors} 个") + print(f" 发现的占位符总数: {len(all_placeholders_found)} 个") + print(f" 缺失的字段(已创建): {len(missing_fields)} 个") + + if missing_fields: + print(f"\n 创建的字段列表:") + for field_code in sorted(missing_fields): + print(f" - {field_code}") + + # 8. 验证关联关系 + print_section("8. 验证关联关系") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + # 统计有输入字段关联的模板 + cursor.execute(""" + SELECT COUNT(DISTINCT fff.file_id) as count + FROM f_polic_file_field fff + INNER JOIN f_polic_field f ON fff.filed_id = f.id + WHERE fff.tenant_id = %s + AND f.field_type = 1 + AND fff.state = 1 + """, (tenant_id,)) + templates_with_input = cursor.fetchone()['count'] + print(f" 有输入字段关联的模板: {templates_with_input} 个") + + # 统计有输出字段关联的模板 + cursor.execute(""" + SELECT COUNT(DISTINCT fff.file_id) as count + FROM f_polic_file_field fff + INNER JOIN f_polic_field f ON fff.filed_id = f.id + WHERE fff.tenant_id = %s + AND f.field_type = 2 + AND fff.state = 1 + """, (tenant_id,)) + templates_with_output = cursor.fetchone()['count'] + print(f" 有输出字段关联的模板: {templates_with_output} 个") + + # 统计总关联数 + cursor.execute(""" + SELECT COUNT(*) as count + FROM f_polic_file_field + WHERE tenant_id = %s + AND state = 1 + """, (tenant_id,)) + total_relations = cursor.fetchone()['count'] + print(f" 总关联关系数: {total_relations} 条") + + finally: + cursor.close() + + finally: + conn.close() + print_result(True, "数据库连接已关闭") + + print_section("完成") + + +def create_missing_input_field(conn, tenant_id: int, field_code: str) -> Optional[int]: + """创建缺失的输入字段""" + cursor = conn.cursor() + + try: + field_id = generate_id() + field_name_map = { + 'clue_info': '线索信息', + 'target_basic_info_clue': '被核查人基本信息(线索)' + } + field_name = field_name_map.get(field_code, field_code.replace('_', ' ')) + + insert_sql = """ + INSERT INTO f_polic_field + (id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """ + cursor.execute(insert_sql, ( + field_id, + tenant_id, + field_name, + field_code, + 1, # field_type=1 表示输入字段 + CREATED_BY, + UPDATED_BY + )) + conn.commit() + + print(f" [创建输入字段] {field_code} ({field_name}) - ID: {field_id}") + return field_id + + except Exception as e: + conn.rollback() + print(f" [错误] 创建输入字段失败 {field_code}: {str(e)}") + return None + finally: + cursor.close() + + +if __name__ == "__main__": + main() diff --git a/update_templates_custom.py b/update_templates_custom.py new file mode 100644 index 0000000..af554d9 --- /dev/null +++ b/update_templates_custom.py @@ -0,0 +1,928 @@ +""" +模板更新脚本 - 支持自定义数据库连接和租户ID配置 + +功能: +1. 更新模板层级结构(根据template_finish/目录结构) +2. 更新模板字段关联关系(输入字段和输出字段) + +使用方法: +1. 命令行参数方式: + python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1 + +2. 交互式输入方式: + python update_templates_custom.py +""" +import os +import sys +import pymysql +import argparse +from pathlib import Path +from typing import Dict, List, Set, Optional +import re +from docx import Document +import getpass + +# 设置输出编码为UTF-8(Windows兼容) +if sys.platform == 'win32': + import io + sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') + sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8') + +# 项目根目录 +PROJECT_ROOT = Path(__file__).parent +TEMPLATES_DIR = PROJECT_ROOT / "template_finish" + +CREATED_BY = 655162080928945152 +UPDATED_BY = 655162080928945152 + + +def print_section(title): + """打印章节标题""" + print("\n" + "="*70) + print(f" {title}") + print("="*70) + + +def print_result(success, message): + """打印结果""" + status = "[OK]" if success else "[FAIL]" + print(f"{status} {message}") + + +def generate_id(): + """生成ID""" + import time + return int(time.time() * 1000000) + + +def get_db_config_from_args() -> Optional[Dict]: + """从命令行参数获取数据库配置""" + parser = argparse.ArgumentParser( + description='模板更新脚本 - 支持自定义数据库连接和租户ID', + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +示例: + # 使用命令行参数 + python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1 + + # 使用交互式输入 + python update_templates_custom.py + """ + ) + + parser.add_argument('--host', type=str, help='MySQL服务器地址') + parser.add_argument('--port', type=int, help='MySQL服务器端口') + parser.add_argument('--user', type=str, help='MySQL用户名') + parser.add_argument('--password', type=str, help='MySQL密码') + parser.add_argument('--database', type=str, help='数据库名称') + parser.add_argument('--tenant-id', type=int, help='租户ID') + parser.add_argument('--dry-run', action='store_true', help='预览模式(不实际更新数据库)') + parser.add_argument('--update-hierarchy', action='store_true', default=True, help='更新模板层级结构(默认启用)') + parser.add_argument('--update-fields', action='store_true', default=True, help='更新字段关联关系(默认启用)') + + args = parser.parse_args() + + # 如果所有参数都提供了,返回配置 + if all([args.host, args.port, args.user, args.password, args.database, args.tenant_id]): + return { + 'host': args.host, + 'port': args.port, + 'user': args.user, + 'password': args.password, + 'database': args.database, + 'charset': 'utf8mb4', + 'tenant_id': args.tenant_id, + 'dry_run': args.dry_run, + 'update_hierarchy': args.update_hierarchy, + 'update_fields': args.update_fields + } + + return None + + +def get_db_config_interactive() -> Dict: + """交互式获取数据库配置""" + print_section("数据库连接配置") + print("请输入数据库连接信息(直接回车使用默认值):") + + host = input("MySQL服务器地址 [152.136.177.240]: ").strip() or "152.136.177.240" + port_str = input("MySQL服务器端口 [5012]: ").strip() or "5012" + port = int(port_str) if port_str.isdigit() else 5012 + user = input("MySQL用户名 [finyx]: ").strip() or "finyx" + password = getpass.getpass("MySQL密码 [留空使用默认]: ").strip() + if not password: + password = "6QsGK6MpePZDE57Z" + database = input("数据库名称 [finyx]: ").strip() or "finyx" + + print("\n租户配置:") + tenant_id_str = input("租户ID (tenant_id) [必填]: ").strip() + if not tenant_id_str: + print("[错误] 租户ID不能为空") + sys.exit(1) + try: + tenant_id = int(tenant_id_str) + except ValueError: + print("[错误] 租户ID必须是数字") + sys.exit(1) + + print("\n更新选项:") + update_hierarchy = input("更新模板层级结构?[Y/n]: ").strip().lower() != 'n' + update_fields = input("更新字段关联关系?[Y/n]: ").strip().lower() != 'n' + dry_run = input("预览模式(不实际更新)?[y/N]: ").strip().lower() == 'y' + + return { + 'host': host, + 'port': port, + 'user': user, + 'password': password, + 'database': database, + 'charset': 'utf8mb4', + 'tenant_id': tenant_id, + 'dry_run': dry_run, + 'update_hierarchy': update_hierarchy, + 'update_fields': update_fields + } + + +def test_db_connection(config: Dict) -> Optional[pymysql.Connection]: + """测试数据库连接""" + try: + conn = pymysql.connect( + host=config['host'], + port=config['port'], + user=config['user'], + password=config['password'], + database=config['database'], + charset=config['charset'] + ) + return conn + except Exception as e: + print_result(False, f"数据库连接失败: {str(e)}") + return None + + +# ==================== 模板层级结构更新 ==================== + +def scan_directory_structure(base_dir: Path) -> Dict: + """扫描目录结构""" + directories = [] + files = [] + + def scan_recursive(current_path: Path, parent_path: Optional[str] = None): + """递归扫描目录""" + if not current_path.exists() or not current_path.is_dir(): + return + + # 获取相对路径 + rel_path = current_path.relative_to(base_dir) + rel_path_str = str(rel_path).replace('\\', '/') + + # 添加目录节点 + if rel_path_str != '.': + directories.append({ + 'name': current_path.name, + 'path': rel_path_str, + 'parent_path': parent_path + }) + + # 扫描子项 + for item in sorted(current_path.iterdir()): + if item.is_dir(): + scan_recursive(item, rel_path_str) + elif item.is_file() and item.suffix.lower() in ['.docx', '.doc']: + file_rel_path = item.relative_to(base_dir) + file_rel_path_str = str(file_rel_path).replace('\\', '/') + files.append({ + 'name': item.name, + 'path': file_rel_path_str, + 'parent_path': rel_path_str if rel_path_str != '.' else None + }) + + scan_recursive(base_dir) + + return { + 'directories': directories, + 'files': files + } + + +def get_existing_templates(conn, tenant_id: int) -> Dict: + """获取现有模板""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + # 获取所有模板(包括目录和文件) + cursor.execute(""" + SELECT id, name, file_path, parent_id + FROM f_polic_file_config + WHERE tenant_id = %s + AND state = 1 + """, (tenant_id,)) + templates = cursor.fetchall() + + result = { + 'by_path': {}, # file_path -> template + 'by_name': {}, # name -> [templates] + 'by_id': {} # id -> template + } + + for t in templates: + result['by_id'][t['id']] = t + if t['file_path']: + result['by_path'][t['file_path']] = t + else: + # 目录节点 + name = t['name'] + if name not in result['by_name']: + result['by_name'][name] = [] + result['by_name'][name].append(t) + + return result + finally: + cursor.close() + + +def create_or_update_directory(conn, tenant_id: int, name: str, parent_id: Optional[int], + existing_templates: Dict, dry_run: bool = False) -> int: + """创建或更新目录节点""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 查找现有目录(通过名称和parent_id匹配) + candidates = existing_templates['by_name'].get(name, []) + existing = None + for candidate in candidates: + if candidate.get('parent_id') == parent_id and not candidate.get('file_path'): + existing = candidate + break + + if existing: + # 更新现有目录 + if not dry_run: + cursor.execute(""" + UPDATE f_polic_file_config + SET parent_id = %s, updated_time = NOW(), updated_by = %s + WHERE id = %s AND tenant_id = %s + """, (parent_id, UPDATED_BY, existing['id'], tenant_id)) + conn.commit() + print(f" [更新目录] {name} (ID: {existing['id']})") + return existing['id'] + else: + # 创建新目录 + dir_id = generate_id() + if not dry_run: + cursor.execute(""" + INSERT INTO f_polic_file_config + (id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, NULL, NOW(), %s, NOW(), %s, 1) + """, (dir_id, tenant_id, parent_id, name, CREATED_BY, UPDATED_BY)) + conn.commit() + print(f" [创建目录] {name} (ID: {dir_id})") + return dir_id + finally: + cursor.close() + + +def create_or_update_file(conn, tenant_id: int, file_info: Dict, parent_id: Optional[int], + existing_templates: Dict, dry_run: bool = False) -> int: + """创建或更新文件节点""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + file_path = file_info['path'] + file_name = file_info['name'] + + # 查找现有文件(通过file_path匹配) + existing = existing_templates['by_path'].get(file_path) + + if existing: + # 更新现有文件 + if not dry_run: + cursor.execute(""" + UPDATE f_polic_file_config + SET parent_id = %s, name = %s, updated_time = NOW(), updated_by = %s + WHERE id = %s AND tenant_id = %s + """, (parent_id, file_name, UPDATED_BY, existing['id'], tenant_id)) + conn.commit() + print(f" [更新文件] {file_name} (ID: {existing['id']})") + return existing['id'] + else: + # 创建新文件 + file_id = generate_id() + if not dry_run: + cursor.execute(""" + INSERT INTO f_polic_file_config + (id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """, (file_id, tenant_id, parent_id, file_name, file_path, CREATED_BY, UPDATED_BY)) + conn.commit() + print(f" [创建文件] {file_name} (ID: {file_id})") + return file_id + finally: + cursor.close() + + +def update_template_hierarchy(conn, tenant_id: int, dry_run: bool = False): + """更新模板层级结构""" + print_section("更新模板层级结构") + + # 1. 扫描目录结构 + print("1. 扫描目录结构...") + structure = scan_directory_structure(TEMPLATES_DIR) + print_result(True, f"找到 {len(structure['directories'])} 个目录,{len(structure['files'])} 个文件") + + if not structure['directories'] and not structure['files']: + print_result(False, "未找到任何目录或文件") + return + + # 2. 获取现有模板 + print("\n2. 获取现有模板...") + existing_templates = get_existing_templates(conn, tenant_id) + print_result(True, f"找到 {len(existing_templates['by_path'])} 个文件模板,{len(existing_templates['by_name'])} 个目录模板") + + # 3. 创建/更新目录节点 + print("\n3. 创建/更新目录节点...") + path_to_id = {} + dir_created = 0 + dir_updated = 0 + + for dir_info in structure['directories']: + parent_id = None + if dir_info['parent_path']: + parent_id = path_to_id.get(dir_info['parent_path']) + + existing = None + candidates = existing_templates['by_name'].get(dir_info['name'], []) + for candidate in candidates: + if candidate.get('parent_id') == parent_id and not candidate.get('file_path'): + existing = candidate + break + + if existing: + dir_id = existing['id'] + if existing.get('parent_id') != parent_id: + dir_updated += 1 + else: + dir_id = generate_id() + dir_created += 1 + + if not dry_run: + if existing and existing.get('parent_id') != parent_id: + cursor = conn.cursor() + cursor.execute(""" + UPDATE f_polic_file_config + SET parent_id = %s, updated_time = NOW(), updated_by = %s + WHERE id = %s AND tenant_id = %s + """, (parent_id, UPDATED_BY, dir_id, tenant_id)) + conn.commit() + cursor.close() + elif not existing: + cursor = conn.cursor() + cursor.execute(""" + INSERT INTO f_polic_file_config + (id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, NULL, NOW(), %s, NOW(), %s, 1) + """, (dir_id, tenant_id, parent_id, dir_info['name'], CREATED_BY, UPDATED_BY)) + conn.commit() + cursor.close() + + path_to_id[dir_info['path']] = dir_id + + print_result(True, f"创建 {dir_created} 个目录,更新 {dir_updated} 个目录") + + # 4. 创建/更新文件节点 + print("\n4. 创建/更新文件节点...") + file_created = 0 + file_updated = 0 + + for file_info in structure['files']: + parent_id = None + if file_info['parent_path']: + parent_id = path_to_id.get(file_info['parent_path']) + + existing = existing_templates['by_path'].get(file_info['path']) + + if existing: + file_id = existing['id'] + if existing.get('parent_id') != parent_id or existing.get('name') != file_info['name']: + file_updated += 1 + else: + file_id = generate_id() + file_created += 1 + + if not dry_run: + if existing: + if existing.get('parent_id') != parent_id or existing.get('name') != file_info['name']: + cursor = conn.cursor() + cursor.execute(""" + UPDATE f_polic_file_config + SET parent_id = %s, name = %s, updated_time = NOW(), updated_by = %s + WHERE id = %s AND tenant_id = %s + """, (parent_id, file_info['name'], UPDATED_BY, file_id, tenant_id)) + conn.commit() + cursor.close() + else: + cursor = conn.cursor() + cursor.execute(""" + INSERT INTO f_polic_file_config + (id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """, (file_id, tenant_id, parent_id, file_info['name'], file_info['path'], CREATED_BY, UPDATED_BY)) + conn.commit() + cursor.close() + + print_result(True, f"创建 {file_created} 个文件,更新 {file_updated} 个文件") + + return { + 'directories_created': dir_created, + 'directories_updated': dir_updated, + 'files_created': file_created, + 'files_updated': file_updated + } + + +# ==================== 字段关联关系更新 ==================== + +def get_input_fields(conn, tenant_id: int) -> Dict[str, int]: + """获取输入字段""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, filed_code, name + FROM f_polic_field + WHERE tenant_id = %s + AND field_type = 1 + AND filed_code IN ('clue_info', 'target_basic_info_clue') + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + fields = cursor.fetchall() + + result = {} + for field in fields: + result[field['filed_code']] = field['id'] + + return result + finally: + cursor.close() + + +def get_output_fields(conn, tenant_id: int) -> Dict[str, int]: + """获取所有输出字段""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, filed_code, name + FROM f_polic_field + WHERE tenant_id = %s + AND field_type = 2 + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + fields = cursor.fetchall() + + result = {} + for field in fields: + result[field['filed_code']] = field['id'] + + return result + finally: + cursor.close() + + +def extract_placeholders_from_docx(file_path: Path) -> Set[str]: + """从docx文件中提取所有占位符""" + placeholders = set() + placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}') + + try: + doc = Document(file_path) + + # 从段落中提取 + for paragraph in doc.paragraphs: + text = paragraph.text + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: + placeholders.add(field_code) + + # 从表格中提取 + for table in doc.tables: + try: + for row in table.rows: + for cell in row.cells: + for paragraph in cell.paragraphs: + text = paragraph.text + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: + placeholders.add(field_code) + except: + continue + except Exception as e: + pass # 静默处理错误 + + return placeholders + + +def get_all_templates(conn, tenant_id: int) -> List[Dict]: + """获取所有模板(只获取文件节点)""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + sql = """ + SELECT id, name, file_path + FROM f_polic_file_config + WHERE tenant_id = %s + AND file_path IS NOT NULL + AND file_path != '' + AND state = 1 + """ + cursor.execute(sql, (tenant_id,)) + templates = cursor.fetchall() + return templates + finally: + cursor.close() + + +def get_existing_relations(conn, tenant_id: int, file_id: int) -> Set[int]: + """获取模板的现有关联关系""" + cursor = conn.cursor() + try: + sql = """ + SELECT filed_id + FROM f_polic_file_field + WHERE tenant_id = %s + AND file_id = %s + AND state = 1 + """ + cursor.execute(sql, (tenant_id, file_id)) + results = cursor.fetchall() + return {row[0] for row in results} + finally: + cursor.close() + + +def create_missing_input_field(conn, tenant_id: int, field_code: str) -> Optional[int]: + """创建缺失的输入字段""" + cursor = conn.cursor() + + try: + field_id = generate_id() + field_name_map = { + 'clue_info': '线索信息', + 'target_basic_info_clue': '被核查人基本信息(线索)' + } + field_name = field_name_map.get(field_code, field_code.replace('_', ' ')) + + insert_sql = """ + INSERT INTO f_polic_field + (id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """ + cursor.execute(insert_sql, ( + field_id, + tenant_id, + field_name, + field_code, + 1, # field_type=1 表示输入字段 + CREATED_BY, + UPDATED_BY + )) + conn.commit() + + return field_id + + except Exception as e: + conn.rollback() + return None + finally: + cursor.close() + + +def create_missing_output_field(conn, tenant_id: int, field_code: str) -> Optional[int]: + """创建缺失的输出字段""" + cursor = conn.cursor() + + try: + # 先检查是否已存在 + check_cursor = conn.cursor(pymysql.cursors.DictCursor) + check_cursor.execute(""" + SELECT id FROM f_polic_field + WHERE tenant_id = %s AND filed_code = %s + """, (tenant_id, field_code)) + existing = check_cursor.fetchone() + check_cursor.close() + + if existing: + return existing['id'] + + # 创建新字段 + field_id = generate_id() + field_name = field_code.replace('_', ' ') + + insert_sql = """ + INSERT INTO f_polic_field + (id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """ + cursor.execute(insert_sql, ( + field_id, + tenant_id, + field_name, + field_code, + 2, # field_type=2 表示输出字段 + CREATED_BY, + UPDATED_BY + )) + conn.commit() + + return field_id + + except Exception as e: + conn.rollback() + return None + finally: + cursor.close() + + +def update_template_field_relations(conn, tenant_id: int, file_id: int, file_name: str, + input_field_ids: List[int], output_field_ids: List[int], + dry_run: bool = False): + """更新模板的字段关联关系""" + cursor = conn.cursor() + + try: + all_field_ids = set(input_field_ids + output_field_ids) + + if not all_field_ids: + return + + # 获取现有关联 + existing_field_ids = get_existing_relations(conn, tenant_id, file_id) + + # 需要添加的字段 + to_add = all_field_ids - existing_field_ids + # 需要删除的字段 + to_remove = existing_field_ids - all_field_ids + + if not to_add and not to_remove: + return + + if dry_run: + return + + # 删除需要移除的关联 + if to_remove: + placeholders = ','.join(['%s'] * len(to_remove)) + delete_sql = f""" + DELETE FROM f_polic_file_field + WHERE tenant_id = %s + AND file_id = %s + AND filed_id IN ({placeholders}) + """ + cursor.execute(delete_sql, [tenant_id, file_id] + list(to_remove)) + + # 添加新的关联 + for field_id in to_add: + # 检查是否已存在 + check_sql = """ + SELECT id FROM f_polic_file_field + WHERE tenant_id = %s AND file_id = %s AND filed_id = %s + """ + cursor.execute(check_sql, (tenant_id, file_id, field_id)) + if cursor.fetchone(): + continue + + relation_id = generate_id() + insert_sql = """ + INSERT INTO f_polic_file_field + (id, tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) + """ + cursor.execute(insert_sql, ( + relation_id, + tenant_id, + file_id, + field_id, + CREATED_BY, + UPDATED_BY + )) + + conn.commit() + + except Exception as e: + conn.rollback() + raise + finally: + cursor.close() + + +def update_all_field_relations(conn, tenant_id: int, dry_run: bool = False): + """更新所有模板的字段关联关系""" + print_section("更新字段关联关系") + + # 1. 获取输入字段 + print("1. 获取输入字段...") + input_fields = get_input_fields(conn, tenant_id) + + if not input_fields: + print(" 创建缺失的输入字段...") + for field_code in ['clue_info', 'target_basic_info_clue']: + field_id = create_missing_input_field(conn, tenant_id, field_code) + if field_id: + input_fields[field_code] = field_id + + if not input_fields: + print_result(False, "无法获取或创建输入字段") + return + + input_field_ids = list(input_fields.values()) + print_result(True, f"找到 {len(input_field_ids)} 个输入字段") + + # 2. 获取输出字段 + print("\n2. 获取输出字段...") + output_fields = get_output_fields(conn, tenant_id) + print_result(True, f"找到 {len(output_fields)} 个输出字段") + + # 3. 获取所有模板 + print("\n3. 获取所有模板...") + templates = get_all_templates(conn, tenant_id) + print_result(True, f"找到 {len(templates)} 个模板") + + if not templates: + print_result(False, "未找到模板") + return + + # 4. 扫描模板占位符并更新关联关系 + print("\n4. 扫描模板占位符并更新关联关系...") + + total_updated = 0 + total_kept = 0 + total_errors = 0 + all_placeholders_found = set() + missing_fields = set() + + for i, template in enumerate(templates, 1): + template_id = template['id'] + template_name = template['name'] + file_path = template['file_path'] + + if i % 20 == 0: + print(f" 处理进度: {i}/{len(templates)}") + + # 检查本地文件是否存在 + local_file = PROJECT_ROOT / file_path + if not local_file.exists(): + total_errors += 1 + continue + + # 提取占位符 + placeholders = extract_placeholders_from_docx(local_file) + all_placeholders_found.update(placeholders) + + # 根据占位符找到对应的输出字段ID + output_field_ids = [] + for placeholder in placeholders: + if placeholder in output_fields: + output_field_ids.append(output_fields[placeholder]) + else: + # 字段不存在,尝试创建 + missing_fields.add(placeholder) + field_id = create_missing_output_field(conn, tenant_id, placeholder) + if field_id: + output_fields[placeholder] = field_id + output_field_ids.append(field_id) + + # 更新关联关系 + try: + existing = get_existing_relations(conn, tenant_id, template_id) + to_add = set(input_field_ids + output_field_ids) - existing + to_remove = existing - set(input_field_ids + output_field_ids) + + if to_add or to_remove: + update_template_field_relations( + conn, tenant_id, template_id, template_name, + input_field_ids, output_field_ids, dry_run + ) + total_updated += 1 + else: + total_kept += 1 + except Exception as e: + total_errors += 1 + + # 5. 统计结果 + print_section("字段关联更新结果") + print(f" 总模板数: {len(templates)}") + print(f" 已更新: {total_updated} 个") + print(f" 保持不变: {total_kept} 个") + print(f" 错误: {total_errors} 个") + print(f" 发现的占位符总数: {len(all_placeholders_found)} 个") + print(f" 创建的字段数: {len(missing_fields)} 个") + + return { + 'total_templates': len(templates), + 'updated': total_updated, + 'kept': total_kept, + 'errors': total_errors, + 'placeholders_found': len(all_placeholders_found), + 'fields_created': len(missing_fields) + } + + +# ==================== 主函数 ==================== + +def main(): + """主函数""" + print_section("模板更新脚本") + print("支持自定义数据库连接和租户ID配置") + + # 获取配置 + config = get_db_config_from_args() + if not config: + config = get_db_config_interactive() + + # 显示配置信息 + print_section("配置信息") + print(f" 数据库服务器: {config['host']}:{config['port']}") + print(f" 数据库名称: {config['database']}") + print(f" 用户名: {config['user']}") + print(f" 租户ID: {config['tenant_id']}") + print(f" 预览模式: {'是' if config['dry_run'] else '否'}") + print(f" 更新层级结构: {'是' if config['update_hierarchy'] else '否'}") + print(f" 更新字段关联: {'是' if config['update_fields'] else '否'}") + + if config['dry_run']: + print("\n[注意] 当前为预览模式,不会实际更新数据库") + + # 确认 + if not config.get('dry_run'): + confirm = input("\n确认执行更新?[y/N]: ").strip().lower() + if confirm != 'y': + print("已取消") + return + + # 连接数据库 + print_section("连接数据库") + conn = test_db_connection(config) + if not conn: + return + + print_result(True, "数据库连接成功") + + try: + tenant_id = config['tenant_id'] + dry_run = config['dry_run'] + + results = {} + + # 更新模板层级结构 + if config['update_hierarchy']: + hierarchy_result = update_template_hierarchy(conn, tenant_id, dry_run) + results['hierarchy'] = hierarchy_result + + # 更新字段关联关系 + if config['update_fields']: + fields_result = update_all_field_relations(conn, tenant_id, dry_run) + results['fields'] = fields_result + + # 总结 + print_section("更新完成") + if config['dry_run']: + print(" 本次为预览模式,未实际更新数据库") + else: + print(" 数据库已更新") + + if 'hierarchy' in results: + h = results['hierarchy'] + print(f"\n 层级结构:") + print(f" - 创建目录: {h['directories_created']} 个") + print(f" - 更新目录: {h['directories_updated']} 个") + print(f" - 创建文件: {h['files_created']} 个") + print(f" - 更新文件: {h['files_updated']} 个") + + if 'fields' in results: + f = results['fields'] + print(f"\n 字段关联:") + print(f" - 总模板数: {f['total_templates']} 个") + print(f" - 已更新: {f['updated']} 个") + print(f" - 保持不变: {f['kept']} 个") + print(f" - 发现的占位符: {f['placeholders_found']} 个") + print(f" - 创建的字段: {f['fields_created']} 个") + + finally: + conn.close() + print_result(True, "数据库连接已关闭") + + +if __name__ == "__main__": + try: + main() + except KeyboardInterrupt: + print("\n\n[中断] 用户取消操作") + sys.exit(0) + except Exception as e: + print(f"\n[错误] 发生异常: {str(e)}") + import traceback + traceback.print_exc() + sys.exit(1) diff --git a/verify_template_field_relations.py b/verify_template_field_relations.py new file mode 100644 index 0000000..fd8f3b0 --- /dev/null +++ b/verify_template_field_relations.py @@ -0,0 +1,250 @@ +""" +验证模板字段关联关系 +检查所有模板是否都正确关联了输入字段和输出字段 +""" +import os +import pymysql +from pathlib import Path +from typing import Dict, List, Set +from dotenv import load_dotenv + +# 加载环境变量 +load_dotenv() + +# 数据库配置 +DB_CONFIG = { + 'host': os.getenv('DB_HOST', '152.136.177.240'), + 'port': int(os.getenv('DB_PORT', 5012)), + 'user': os.getenv('DB_USER', 'finyx'), + 'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), + 'database': os.getenv('DB_NAME', 'finyx'), + 'charset': 'utf8mb4' +} + + +def print_section(title): + """打印章节标题""" + print("\n" + "="*70) + print(f" {title}") + print("="*70) + + +def print_result(success, message): + """打印结果""" + status = "[OK]" if success else "[FAIL]" + print(f"{status} {message}") + + +def get_actual_tenant_id(conn) -> int: + """获取数据库中的实际tenant_id""" + cursor = conn.cursor(pymysql.cursors.DictCursor) + try: + cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1") + result = cursor.fetchone() + if result: + return result['tenant_id'] + return 1 + finally: + cursor.close() + + +def verify_template_relations(conn, tenant_id: int): + """验证模板字段关联关系""" + print_section("验证模板字段关联关系") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 1. 获取所有模板 + cursor.execute(""" + SELECT id, name, file_path + FROM f_polic_file_config + WHERE tenant_id = %s + AND file_path IS NOT NULL + AND file_path != '' + AND state = 1 + """, (tenant_id,)) + templates = cursor.fetchall() + + print(f" 总模板数: {len(templates)}") + + # 2. 获取输入字段ID + cursor.execute(""" + SELECT id, filed_code + FROM f_polic_field + WHERE tenant_id = %s + AND field_type = 1 + AND filed_code IN ('clue_info', 'target_basic_info_clue') + AND state = 1 + """, (tenant_id,)) + input_fields = {row['filed_code']: row['id'] for row in cursor.fetchall()} + input_field_ids = set(input_fields.values()) + + print(f" 输入字段: {len(input_field_ids)} 个") + for code, field_id in input_fields.items(): + print(f" - {code}: ID={field_id}") + + # 3. 检查每个模板的关联关系 + templates_with_input = 0 + templates_without_input = [] + templates_with_output = 0 + templates_without_output = [] + templates_with_both = 0 + + for template in templates: + template_id = template['id'] + template_name = template['name'] + + # 获取该模板关联的所有字段 + cursor.execute(""" + SELECT f.id, f.filed_code, f.field_type, f.name + FROM f_polic_file_field fff + INNER JOIN f_polic_field f ON fff.filed_id = f.id + WHERE fff.tenant_id = %s + AND fff.file_id = %s + AND fff.state = 1 + AND f.state = 1 + """, (tenant_id, template_id)) + related_fields = cursor.fetchall() + + related_input_ids = {f['id'] for f in related_fields if f['field_type'] == 1} + related_output_ids = {f['id'] for f in related_fields if f['field_type'] == 2} + + # 检查输入字段 + has_all_input = input_field_ids.issubset(related_input_ids) + if has_all_input: + templates_with_input += 1 + else: + templates_without_input.append({ + 'id': template_id, + 'name': template_name, + 'missing': input_field_ids - related_input_ids + }) + + # 检查输出字段 + if related_output_ids: + templates_with_output += 1 + else: + templates_without_output.append({ + 'id': template_id, + 'name': template_name + }) + + # 同时有输入和输出字段 + if has_all_input and related_output_ids: + templates_with_both += 1 + + # 4. 输出统计结果 + print_section("验证结果统计") + print(f" 有输入字段关联: {templates_with_input}/{len(templates)} ({templates_with_input*100//len(templates)}%)") + print(f" 有输出字段关联: {templates_with_output}/{len(templates)} ({templates_with_output*100//len(templates)}%)") + print(f" 同时有输入和输出: {templates_with_both}/{len(templates)} ({templates_with_both*100//len(templates)}%)") + + if templates_without_input: + print(f"\n [警告] {len(templates_without_input)} 个模板缺少输入字段关联:") + for t in templates_without_input[:5]: + print(f" - {t['name']} (ID: {t['id']})") + print(f" 缺少字段ID: {t['missing']}") + + if templates_without_output: + print(f"\n [警告] {len(templates_without_output)} 个模板没有输出字段关联:") + for t in templates_without_output[:5]: + print(f" - {t['name']} (ID: {t['id']})") + + # 5. 显示示例模板的关联关系 + print_section("示例模板的关联关系") + + # 选择几个有输出字段的模板 + sample_templates = [] + for template in templates[:5]: + template_id = template['id'] + cursor.execute(""" + SELECT f.id, f.filed_code, f.field_type, f.name + FROM f_polic_file_field fff + INNER JOIN f_polic_field f ON fff.filed_id = f.id + WHERE fff.tenant_id = %s + AND fff.file_id = %s + AND fff.state = 1 + AND f.state = 1 + ORDER BY f.field_type, f.filed_code + """, (tenant_id, template_id)) + related_fields = cursor.fetchall() + + if related_fields: + sample_templates.append({ + 'template': template, + 'fields': related_fields + }) + + for sample in sample_templates: + template = sample['template'] + fields = sample['fields'] + + input_fields_list = [f for f in fields if f['field_type'] == 1] + output_fields_list = [f for f in fields if f['field_type'] == 2] + + print(f"\n 模板: {template['name']} (ID: {template['id']})") + print(f" 输入字段 ({len(input_fields_list)} 个):") + for f in input_fields_list: + print(f" - {f['name']} ({f['filed_code']})") + print(f" 输出字段 ({len(output_fields_list)} 个):") + for f in output_fields_list[:10]: # 只显示前10个 + print(f" - {f['name']} ({f['filed_code']})") + if len(output_fields_list) > 10: + print(f" ... 还有 {len(output_fields_list) - 10} 个") + + # 6. 总体统计 + print_section("总体统计") + + cursor.execute(""" + SELECT + f.field_type, + CASE + WHEN f.field_type = 1 THEN '输入字段' + WHEN f.field_type = 2 THEN '输出字段' + ELSE '未知' + END as type_name, + COUNT(DISTINCT fff.file_id) as template_count, + COUNT(*) as relation_count + FROM f_polic_file_field fff + INNER JOIN f_polic_field f ON fff.filed_id = f.id + WHERE fff.tenant_id = %s + AND fff.state = 1 + AND f.state = 1 + GROUP BY f.field_type + """, (tenant_id,)) + stats = cursor.fetchall() + + for stat in stats: + print(f" {stat['type_name']}:") + print(f" - 关联的模板数: {stat['template_count']} 个") + print(f" - 关联关系总数: {stat['relation_count']} 条") + + finally: + cursor.close() + + +def main(): + """主函数""" + print_section("验证模板字段关联关系") + + try: + conn = pymysql.connect(**DB_CONFIG) + print_result(True, "数据库连接成功") + except Exception as e: + print_result(False, f"数据库连接失败: {str(e)}") + return + + try: + tenant_id = get_actual_tenant_id(conn) + print(f"实际tenant_id: {tenant_id}") + + verify_template_relations(conn, tenant_id) + + finally: + conn.close() + print_result(True, "数据库连接已关闭") + + +if __name__ == "__main__": + main() diff --git a/技术文档/数据库设计说明.md b/技术文档/数据库设计说明.md new file mode 100644 index 0000000..35b39e4 --- /dev/null +++ b/技术文档/数据库设计说明.md @@ -0,0 +1,15 @@ +f_polic_field 表是字段表,存储有哪些数据字段,其中field_type值为2的是输出字段,值为1的是输入字段 + +f_polic_file_config 表是文件配置表,用于存储模板信息,已经做过初始化,保存了模板文件的地址以及父级关系 + +f_polic_file_field 表是文件配置字段关联表,用于存储f_polic_field表和f_polic_file_config 表的关联关系 + +关联关系如下: + +f_polic_file_field的 filed_id(字段id)对应 f_polic_field的id + +f_polic_file_field的 file_id(文件id)对应 f_polic_file_config的id + +通过这样的关联关系,其他研发人员开发的功能模块,可以通过查询f_polic_file_config 表获得不同模板关联了哪些输入和输出字段,然后前端对应展示 + +之前虽然已经创建了关联关系,但是大都是通过“input_data”和“template_code”实现的,这个并不符合整体设计,这两个字段现在抛弃不使用。 \ No newline at end of file diff --git a/技术文档/模板更新脚本使用说明.md b/技术文档/模板更新脚本使用说明.md new file mode 100644 index 0000000..01f28a2 --- /dev/null +++ b/技术文档/模板更新脚本使用说明.md @@ -0,0 +1,376 @@ +# 模板更新脚本使用说明 + +## 一、脚本概述 + +`update_templates_custom.py` 是一个灵活的模板更新脚本,支持: + +1. **自定义数据库连接配置**(不从.env文件读取) +2. **自定义租户ID(tenant_id)配置** +3. **更新模板层级结构**(根据`template_finish/`目录结构) +4. **更新字段关联关系**(输入字段和输出字段) + +## 二、功能说明 + +### 2.1 更新模板层级结构 + +根据本地 `template_finish/` 目录结构更新数据库中的模板层级关系: + +- 扫描目录结构,识别所有目录和文件 +- 创建或更新目录节点(`f_polic_file_config`表中`file_path=NULL`的记录) +- 创建或更新文件节点(`f_polic_file_config`表中`file_path`不为空的记录) +- 建立正确的`parent_id`层级关系 + +### 2.2 更新字段关联关系 + +自动为所有模板建立字段关联关系: + +- **输入字段**:所有模板都关联 `clue_info` 和 `target_basic_info_clue` +- **输出字段**:根据模板文件中的占位符(`{{field_code}}`)自动关联对应的输出字段 +- **自动创建字段**:如果占位符对应的字段不存在,自动创建该字段 + +## 三、使用方法 + +### 3.1 命令行参数方式 + +```bash +python update_templates_custom.py --host <主机> --port <端口> --user <用户名> --password <密码> --database <数据库名> --tenant-id <租户ID> +``` + +**完整示例**: + +```bash +# 基本使用 +python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1 + +# 预览模式(不实际更新数据库) +python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1 --dry-run + +# 只更新层级结构,不更新字段关联 +python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1 --update-hierarchy --no-update-fields + +# 只更新字段关联,不更新层级结构 +python update_templates_custom.py --host 192.168.1.100 --port 3306 --user root --password 123456 --database finyx --tenant-id 1 --update-fields --no-update-hierarchy +``` + +**参数说明**: + +| 参数 | 说明 | 必填 | +|------|------|------| +| `--host` | MySQL服务器地址 | 是 | +| `--port` | MySQL服务器端口 | 是 | +| `--user` | MySQL用户名 | 是 | +| `--password` | MySQL密码 | 是 | +| `--database` | 数据库名称 | 是 | +| `--tenant-id` | 租户ID | 是 | +| `--dry-run` | 预览模式(不实际更新) | 否 | +| `--update-hierarchy` | 更新模板层级结构(默认启用) | 否 | +| `--update-fields` | 更新字段关联关系(默认启用) | 否 | + +### 3.2 交互式输入方式 + +如果命令行参数不完整,脚本会自动进入交互式输入模式: + +```bash +python update_templates_custom.py +``` + +交互式输入示例: + +``` +====================================================================== + 数据库连接配置 +====================================================================== +请输入数据库连接信息(直接回车使用默认值): + +MySQL服务器地址 [152.136.177.240]: 192.168.1.100 +MySQL服务器端口 [5012]: 3306 +MySQL用户名 [finyx]: root +MySQL密码 [留空使用默认]: ******** +数据库名称 [finyx]: finyx + +租户配置: +租户ID (tenant_id) [必填]: 1 + +更新选项: +更新模板层级结构?[Y/n]: y +更新字段关联关系?[Y/n]: y +预览模式(不实际更新)?[y/N]: n + +确认执行更新?[y/N]: y +``` + +### 3.3 查看帮助信息 + +```bash +python update_templates_custom.py --help +``` + +## 四、使用场景 + +### 4.1 首次部署 + +首次部署时,需要将本地模板同步到数据库: + +```bash +python update_templates_custom.py \ + --host 192.168.1.100 \ + --port 3306 \ + --user root \ + --password your_password \ + --database finyx \ + --tenant-id 1 +``` + +### 4.2 多租户环境 + +在不同租户下更新模板: + +```bash +# 租户1 +python update_templates_custom.py --host ... --tenant-id 1 + +# 租户2 +python update_templates_custom.py --host ... --tenant-id 2 +``` + +### 4.3 预览更新内容 + +在正式更新前,先预览会执行哪些操作: + +```bash +python update_templates_custom.py --host ... --tenant-id 1 --dry-run +``` + +### 4.4 增量更新 + +只更新层级结构或只更新字段关联: + +```bash +# 只更新层级结构 +python update_templates_custom.py --host ... --tenant-id 1 --update-hierarchy --no-update-fields + +# 只更新字段关联 +python update_templates_custom.py --host ... --tenant-id 1 --update-fields --no-update-hierarchy +``` + +## 五、输出说明 + +### 5.1 正常输出示例 + +``` +====================================================================== + 模板更新脚本 +====================================================================== +支持自定义数据库连接和租户ID配置 + +====================================================================== + 配置信息 +====================================================================== + 数据库服务器: 192.168.1.100:3306 + 数据库名称: finyx + 用户名: root + 租户ID: 1 + 预览模式: 否 + 更新层级结构: 是 + 更新字段关联: 是 + +确认执行更新?[y/N]: y + +====================================================================== + 连接数据库 +====================================================================== +[OK] 数据库连接成功 + +====================================================================== + 更新模板层级结构 +====================================================================== +1. 扫描目录结构... +[OK] 找到 33 个目录,122 个文件 + +2. 获取现有模板... +[OK] 找到 122 个文件模板,28 个目录模板 + +3. 创建/更新目录节点... + [创建目录] 1-谈话函询模板 (ID: 1766711031977435) + [更新目录] 2-初核模版 (ID: 1766711031977436) + ... +[OK] 创建 5 个目录,更新 28 个目录 + +4. 创建/更新文件节点... + [创建文件] 请示报告卡(XXX).docx (ID: 1766711031977437) + [更新文件] 谈话通知书第一联.docx (ID: 1766711031977438) + ... +[OK] 创建 2 个文件,更新 120 个文件 + +====================================================================== + 更新字段关联关系 +====================================================================== +1. 获取输入字段... +[OK] 找到 2 个输入字段 + +2. 获取输出字段... +[OK] 找到 72 个输出字段 + +3. 获取所有模板... +[OK] 找到 122 个模板 + +4. 扫描模板占位符并更新关联关系... + 处理进度: 20/122 + 处理进度: 40/122 + ... + +====================================================================== + 字段关联更新结果 +====================================================================== + 总模板数: 122 + 已更新: 93 个 + 保持不变: 29 个 + 错误: 0 个 + 发现的占位符总数: 35 个 + 创建的字段数: 0 个 + +====================================================================== + 更新完成 +====================================================================== + 数据库已更新 + + 层级结构: + - 创建目录: 5 个 + - 更新目录: 28 个 + - 创建文件: 2 个 + - 更新文件: 120 个 + + 字段关联: + - 总模板数: 122 个 + - 已更新: 93 个 + - 保持不变: 29 个 + - 发现的占位符: 35 个 + - 创建的字段: 0 个 + +[OK] 数据库连接已关闭 +``` + +### 5.2 预览模式输出 + +预览模式下,会显示将要执行的操作,但不会实际更新数据库: + +``` +[注意] 当前为预览模式,不会实际更新数据库 + +... + +[OK] 预览模式,未实际更新数据库 +``` + +## 六、注意事项 + +### 6.1 数据库连接 + +- 确保数据库服务器可访问 +- 确保用户有足够的权限(SELECT, INSERT, UPDATE, DELETE) +- 确保数据库名称正确 + +### 6.2 租户ID + +- **tenant_id是必填项**,不能为空 +- 确保tenant_id在数据库中存在或需要创建 +- 不同租户的数据是隔离的,更新时请确认tenant_id正确 + +### 6.3 模板文件 + +- 脚本会扫描 `template_finish/` 目录下的所有 `.docx` 和 `.doc` 文件 +- 确保模板文件路径与数据库中的`file_path`字段匹配 +- 如果模板文件不存在,会跳过该模板的字段关联更新 + +### 6.4 字段创建 + +- 如果模板中的占位符对应的字段不存在,脚本会自动创建该字段 +- 创建的字段类型为输出字段(`field_type=2`) +- 字段名称会根据`field_code`自动生成(将下划线替换为空格) + +### 6.5 数据安全 + +- **建议先使用预览模式(--dry-run)**查看将要执行的操作 +- 在生产环境执行前,建议先备份数据库 +- 确保有足够的磁盘空间和数据库连接数 + +## 七、常见问题 + +### 7.1 连接失败 + +**问题**:数据库连接失败 + +**解决方案**: +- 检查数据库服务器地址和端口是否正确 +- 检查用户名和密码是否正确 +- 检查网络连接是否正常 +- 检查防火墙设置 + +### 7.2 权限不足 + +**问题**:执行更新时提示权限不足 + +**解决方案**: +- 确保数据库用户有足够的权限 +- 需要以下权限:SELECT, INSERT, UPDATE, DELETE + +### 7.3 模板文件不存在 + +**问题**:部分模板的字段关联更新失败,提示文件不存在 + +**解决方案**: +- 检查数据库中的`file_path`是否正确 +- 确保模板文件存在于`template_finish/`目录下 +- 检查文件路径中的斜杠方向(Windows使用反斜杠,脚本会自动处理) + +### 7.4 占位符识别错误 + +**问题**:模板中的占位符没有被正确识别 + +**解决方案**: +- 确保占位符格式正确:`{{field_code}}` +- 占位符中的`field_code`不能包含空格 +- 检查模板文件是否损坏 + +## 八、技术细节 + +### 8.1 占位符提取 + +脚本使用正则表达式 `\{\{([^}]+)\}\}` 从Word文档中提取占位符: + +- 扫描所有段落(paragraphs) +- 扫描所有表格单元格(table cells) +- 提取所有匹配的占位符 + +### 8.2 ID生成 + +使用时间戳生成唯一ID: + +```python +def generate_id(): + import time + return int(time.time() * 1000000) +``` + +### 8.3 层级结构处理 + +- 按目录层级顺序处理(先处理父目录,再处理子目录) +- 使用路径映射表(`path_to_id`)维护目录ID关系 +- 文件节点的`parent_id`指向其所在目录的ID + +### 8.4 字段关联处理 + +- 先获取现有关联关系 +- 计算需要添加和删除的关联 +- 批量更新关联关系 +- 自动创建缺失的字段 + +## 九、更新历史 + +- **2025-12-16**: 创建脚本,支持自定义数据库连接和租户ID配置 + +--- + +**脚本路径**: `update_templates_custom.py` +**文档版本**: 1.0 +**最后更新**: 2025-12-16 diff --git a/模板字段关联关系更新报告.md b/模板字段关联关系更新报告.md new file mode 100644 index 0000000..5ddaaed --- /dev/null +++ b/模板字段关联关系更新报告.md @@ -0,0 +1,251 @@ +# 模板字段关联关系更新报告 + +## 更新时间 +2025-12-16 + +## 一、更新概述 + +根据数据库设计说明,更新了所有模板的字段关联关系: +- **输入字段**:所有模板都关联了 `clue_info` 和 `target_basic_info_clue` +- **输出字段**:根据模板中的占位符自动关联对应的输出字段 + +## 二、更新规则 + +### 2.1 输入字段关联规则 + +所有模板(文件节点)都自动关联以下输入字段: +1. **clue_info** - 线索信息 +2. **target_basic_info_clue** - 被核查人员工作基本情况线索 + +### 2.2 输出字段关联规则 + +根据模板文件中的占位符(格式:`{{field_code}}`)自动关联对应的输出字段: +- 扫描模板文件中的段落和表格 +- 提取所有占位符 +- 根据占位符的 `field_code` 查找对应的输出字段(`field_type=2`) +- 如果字段不存在,自动创建该字段 + +## 三、更新结果 + +### 3.1 输入字段关联 + +- **关联的模板数**: 122/122 (100%) +- **输入字段数**: 2个 + - `clue_info` (ID: 1764656917384058) + - `target_basic_info_clue` (ID: 1764656917996367) +- **关联关系总数**: 260条(122个模板 × 2个输入字段) + +### 3.2 输出字段关联 + +- **关联的模板数**: 93/122 (76%) +- **未关联的模板数**: 29个(这些模板没有占位符或占位符格式不正确) +- **发现的占位符总数**: 35个不同的占位符 +- **关联关系总数**: 325条 + +### 3.3 占位符列表(35个) + +所有发现的占位符及其对应的输出字段: + +1. `appointment_location` - 约谈地点 +2. `appointment_time` - 约谈时间 +3. `approval_time` - 审批时间 +4. `clue_source` - 线索来源 +5. `department_opinion` - 部门意见 +6. `handler_name` - 处理人姓名 +7. `handling_department` - 处理部门 +8. `investigation_location` - 调查地点 +9. `investigation_team_code` - 调查组编号 +10. `investigation_team_leader_name` - 调查组组长姓名 +11. `investigation_team_member_names` - 调查组成员姓名 +12. `investigation_unit_name` - 调查单位名称 +13. `target_address` - 被核查人地址 +14. `target_age` - 被核查人年龄 +15. `target_basic_info` - 被核查人基本信息 +16. `target_contact` - 被核查人联系方式 +17. `target_date_of_birth` - 被核查人出生日期 +18. `target_date_of_birth_full` - 被核查人出生日期(完整) +19. `target_education` - 被核查人学历 +20. `target_education_level` - 被核查人学历层次 +21. `target_ethnicity` - 被核查人民族 +22. `target_family_situation` - 被核查人家庭情况 +23. `target_gender` - 被核查人性别 +24. `target_id_number` - 被核查人身份证号 +25. `target_issue_description` - 被核查人问题描述 +26. `target_name` - 被核查人姓名 +27. `target_organization` - 被核查人单位 +28. `target_organization_and_position` - 被核查人单位及职务 +29. `target_place_of_origin` - 被核查人籍贯 +30. `target_political_status` - 被核查人政治面貌 +31. `target_position` - 被核查人职务 +32. `target_professional_rank` - 被核查人职级 +33. `target_registered_address` - 被核查人户籍地址 +34. `target_social_relations` - 被核查人社会关系 +35. `target_work_basic_info` - 被核查人工作基本信息 + +## 四、示例模板关联关系 + +### 4.1 示例1:8-1请示报告卡(初核报告结论) + +- **模板ID**: 1765273962631542 +- **输入字段** (2个): + - clue_info - 线索信息 + - target_basic_info_clue - 被核查人员工作基本情况线索 +- **输出字段** (3个): + - investigation_team_code - 调查组编号 + - target_name - 被核查人姓名 + - target_organization_and_position - 被核查人单位及职务 + +### 4.2 示例2:谈话通知书第三联 + +- **模板ID**: 1765273963038891 +- **输入字段** (2个): + - clue_info - 线索信息 + - target_basic_info_clue - 被核查人员工作基本情况线索 +- **输出字段** (3个): + - appointment_location - 约谈地点 + - appointment_time - 约谈时间 + - target_name - 被核查人姓名 + +### 4.3 示例3:谈话通知书第一联 + +- **模板ID**: 1765273963625524 +- **输入字段** (2个): + - clue_info - 线索信息 + - target_basic_info_clue - 被核查人员工作基本情况线索 +- **输出字段** (9个): + - appointment_location - 约谈地点 + - appointment_time - 约谈时间 + - approval_time - 审批时间 + - handler_name - 处理人姓名 + - handling_department - 处理部门 + - target_id_number - 被核查人身份证号 + - target_name - 被核查人姓名 + - target_organization - 被核查人单位 + - target_position - 被核查人职务 + +## 五、数据库表结构 + +### 5.1 f_polic_file_field 表 + +关联关系存储在 `f_polic_file_field` 表中: + +- `id`: 关联关系ID(主键) +- `tenant_id`: 租户ID +- `file_id`: 文件配置ID(关联 `f_polic_file_config.id`) +- `filed_id`: 字段ID(关联 `f_polic_field.id`) +- `state`: 状态(1=启用,0=禁用) + +### 5.2 查询示例 + +**查询某个模板关联的所有字段**: +```sql +SELECT + f.id AS field_id, + f.name AS field_name, + f.filed_code AS field_code, + f.field_type, + CASE + WHEN f.field_type = 1 THEN '输入字段' + WHEN f.field_type = 2 THEN '输出字段' + END AS field_type_name +FROM f_polic_file_field fff +INNER JOIN f_polic_field f ON fff.filed_id = f.id +WHERE fff.tenant_id = 1 +AND fff.file_id = <模板ID> +AND fff.state = 1 +AND f.state = 1 +ORDER BY f.field_type, f.filed_code; +``` + +**查询某个模板关联的输入字段**: +```sql +SELECT f.* +FROM f_polic_file_field fff +INNER JOIN f_polic_field f ON fff.filed_id = f.id +WHERE fff.tenant_id = 1 +AND fff.file_id = <模板ID> +AND f.field_type = 1 +AND fff.state = 1 +AND f.state = 1; +``` + +**查询某个模板关联的输出字段**: +```sql +SELECT f.* +FROM f_polic_file_field fff +INNER JOIN f_polic_field f ON fff.filed_id = f.id +WHERE fff.tenant_id = 1 +AND fff.file_id = <模板ID> +AND f.field_type = 2 +AND fff.state = 1 +AND f.state = 1; +``` + +## 六、验证结果 + +### 6.1 关联完整性 + +✅ **输入字段关联**: 100% 完成 +- 所有122个模板都关联了2个输入字段 +- 关联关系总数: 260条 + +✅ **输出字段关联**: 76% 完成 +- 93个模板关联了输出字段 +- 29个模板没有输出字段(这些模板没有占位符,属于正常情况) +- 关联关系总数: 325条 + +### 6.2 总体统计 + +- **总模板数**: 122个 +- **总关联关系数**: 585条 + - 输入字段关联: 260条 + - 输出字段关联: 325条 + +## 七、注意事项 + +1. **tenant_id**: 数据库中的实际tenant_id是 `1`,不是配置中的 `615873064429507639` + +2. **无占位符的模板**: 29个模板没有输出字段关联,这些模板可能是: + - 目录节点(但已过滤,只处理文件节点) + - 没有占位符的模板文件 + - 占位符格式不正确的模板 + +3. **字段自动创建**: 如果模板中的占位符对应的字段不存在,系统会自动创建该字段(`field_type=2`,输出字段) + +4. **关联关系维护**: + - 添加新模板时,会自动关联输入字段 + - 输出字段根据模板中的占位符自动关联 + - 如果模板占位符发生变化,需要重新运行更新脚本 + +## 八、更新脚本 + +本次更新使用的脚本: + +1. **`update_all_template_field_relations.py`**: + - 扫描所有模板的占位符 + - 为所有模板关联输入字段 + - 根据占位符关联输出字段 + - 自动创建缺失的字段 + +2. **`verify_template_field_relations.py`**: + - 验证关联关系的完整性 + - 检查是否有遗漏的关联 + +## 九、后续维护 + +1. **添加新模板**: + - 将模板文件放到对应目录 + - 运行 `update_all_template_field_relations.py` 更新关联关系 + +2. **修改模板占位符**: + - 更新模板文件中的占位符 + - 重新运行更新脚本同步关联关系 + +3. **验证关联关系**: + - 定期运行 `verify_template_field_relations.py` 检查关联关系 + +--- + +**更新人员**: 自动化脚本 +**更新日期**: 2025-12-16 +**更新状态**: ✅ 完成