ai-business-write/sync_templates_between_databases.py
2025-12-30 10:41:35 +08:00

780 lines
28 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
跨数据库同步模板、字段和关联关系
功能:
1. 从.env文件读取源数据库配置
2. 同步到目标数据库10.100.31.21
3. 处理ID映射关系两个数据库的ID不同
4. 根据业务逻辑name, filed_code, file_path匹配数据
使用方法:
python sync_templates_between_databases.py --target-host 10.100.31.21 --target-port 3306 --target-user finyx --target-password FknJYz3FA5WDYtsd --target-database finyx --target-tenant-id 1
"""
import os
import sys
import pymysql
import argparse
from pathlib import Path
from typing import Dict, List, Set, Optional, Tuple
from dotenv import load_dotenv
# 设置输出编码为UTF-8Windows兼容
if sys.platform == 'win32':
import io
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8')
sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')
# 加载环境变量
load_dotenv()
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def generate_id():
"""生成ID"""
import time
return int(time.time() * 1000000)
def get_source_db_config() -> Dict:
"""从.env文件读取源数据库配置"""
db_host = os.getenv('DB_HOST')
db_port = os.getenv('DB_PORT')
db_user = os.getenv('DB_USER')
db_password = os.getenv('DB_PASSWORD')
db_name = os.getenv('DB_NAME')
if not all([db_host, db_port, db_user, db_password, db_name]):
raise ValueError(
"源数据库配置不完整,请在.env文件中配置以下环境变量\n"
"DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME"
)
return {
'host': db_host,
'port': int(db_port),
'user': db_user,
'password': db_password,
'database': db_name,
'charset': 'utf8mb4'
}
def get_target_db_config_from_args() -> Dict:
"""从命令行参数获取目标数据库配置"""
parser = argparse.ArgumentParser(
description='跨数据库同步模板、字段和关联关系',
formatter_class=argparse.RawDescriptionHelpFormatter,
epilog="""
示例:
python sync_templates_between_databases.py --target-host 10.100.31.21 --target-port 3306 --target-user finyx --target-password FknJYz3FA5WDYtsd --target-database finyx --target-tenant-id 1
"""
)
parser.add_argument('--target-host', type=str, required=True, help='目标MySQL服务器地址')
parser.add_argument('--target-port', type=int, required=True, help='目标MySQL服务器端口')
parser.add_argument('--target-user', type=str, required=True, help='目标MySQL用户名')
parser.add_argument('--target-password', type=str, required=True, help='目标MySQL密码')
parser.add_argument('--target-database', type=str, required=True, help='目标数据库名称')
parser.add_argument('--target-tenant-id', type=int, required=True, help='目标租户ID')
parser.add_argument('--source-tenant-id', type=int, help='源租户ID如果不指定将使用数据库中的第一个tenant_id')
parser.add_argument('--dry-run', action='store_true', help='预览模式(不实际更新数据库)')
args = parser.parse_args()
return {
'host': args.target_host,
'port': args.target_port,
'user': args.target_user,
'password': args.target_password,
'database': args.target_database,
'charset': 'utf8mb4',
'tenant_id': args.target_tenant_id,
'source_tenant_id': args.source_tenant_id,
'dry_run': args.dry_run
}
def test_db_connection(config: Dict, label: str) -> Optional[pymysql.Connection]:
"""测试数据库连接"""
try:
conn = pymysql.connect(
host=config['host'],
port=config['port'],
user=config['user'],
password=config['password'],
database=config['database'],
charset=config['charset']
)
return conn
except Exception as e:
print_result(False, f"{label}数据库连接失败: {str(e)}")
return None
def get_source_tenant_id(conn) -> int:
"""获取源数据库中的tenant_id"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1")
result = cursor.fetchone()
if result:
return result['tenant_id']
return 1
finally:
cursor.close()
def read_source_fields(conn, tenant_id: int) -> Tuple[Dict[str, Dict], Dict[str, Dict]]:
"""
从源数据库读取字段数据
Returns:
(input_fields_dict, output_fields_dict)
key: filed_code, value: 字段信息
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, tenant_id, name, filed_code, field_type, state
FROM f_polic_field
WHERE tenant_id = %s
AND state = 1
ORDER BY field_type, filed_code
"""
cursor.execute(sql, (tenant_id,))
fields = cursor.fetchall()
input_fields = {}
output_fields = {}
for field in fields:
field_info = {
'id': field['id'],
'tenant_id': field['tenant_id'],
'name': field['name'],
'filed_code': field['filed_code'],
'field_type': field['field_type'],
'state': field['state']
}
if field['field_type'] == 1:
input_fields[field['filed_code']] = field_info
elif field['field_type'] == 2:
output_fields[field['filed_code']] = field_info
return input_fields, output_fields
finally:
cursor.close()
def read_source_templates(conn, tenant_id: int) -> Dict[str, Dict]:
"""
从源数据库读取模板数据
Returns:
key: file_path (如果为空则使用name), value: 模板信息
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, tenant_id, parent_id, name, file_path, state
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
ORDER BY file_path, name
"""
cursor.execute(sql, (tenant_id,))
templates = cursor.fetchall()
result = {}
for template in templates:
# 使用file_path作为key如果没有file_path则使用name
key = template['file_path'] if template['file_path'] else f"DIR:{template['name']}"
result[key] = {
'id': template['id'],
'tenant_id': template['tenant_id'],
'parent_id': template['parent_id'],
'name': template['name'],
'file_path': template['file_path'],
'state': template['state']
}
return result
finally:
cursor.close()
def read_source_relations(conn, tenant_id: int) -> Dict[int, List[int]]:
"""
从源数据库读取字段关联关系
Returns:
key: file_id, value: [filed_id列表]
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT file_id, filed_id
FROM f_polic_file_field
WHERE tenant_id = %s
AND state = 1
"""
cursor.execute(sql, (tenant_id,))
relations = cursor.fetchall()
result = {}
for rel in relations:
file_id = rel['file_id']
filed_id = rel['filed_id']
if file_id not in result:
result[file_id] = []
result[file_id].append(filed_id)
return result
finally:
cursor.close()
def sync_fields_to_target(conn, tenant_id: int, source_input_fields: Dict, source_output_fields: Dict,
dry_run: bool = False) -> Tuple[Dict[int, int], Dict[int, int]]:
"""
同步字段到目标数据库
Returns:
(input_field_id_map, output_field_id_map)
key: 源字段ID, value: 目标字段ID
"""
print_section("同步字段到目标数据库")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 1. 获取目标数据库中的现有字段
cursor.execute("""
SELECT id, filed_code, field_type
FROM f_polic_field
WHERE tenant_id = %s
AND state = 1
""", (tenant_id,))
existing_fields = cursor.fetchall()
existing_by_code = {}
for field in existing_fields:
key = (field['filed_code'], field['field_type'])
existing_by_code[key] = field['id']
print(f" 目标数据库现有字段: {len(existing_fields)}")
# 2. 同步输入字段
print("\n 同步输入字段...")
input_field_id_map = {}
input_created = 0
input_matched = 0
for code, source_field in source_input_fields.items():
key = (code, 1)
if key in existing_by_code:
# 字段已存在使用现有ID
target_id = existing_by_code[key]
input_field_id_map[source_field['id']] = target_id
input_matched += 1
else:
# 创建新字段
target_id = generate_id()
input_field_id_map[source_field['id']] = target_id
if not dry_run:
insert_cursor = conn.cursor()
try:
insert_cursor.execute("""
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
target_id,
tenant_id,
source_field['name'],
source_field['filed_code'],
1,
CREATED_BY,
UPDATED_BY
))
conn.commit()
input_created += 1
finally:
insert_cursor.close()
else:
input_created += 1
print(f" 匹配: {input_matched} 个,创建: {input_created}")
# 3. 同步输出字段
print("\n 同步输出字段...")
output_field_id_map = {}
output_created = 0
output_matched = 0
for code, source_field in source_output_fields.items():
key = (code, 2)
if key in existing_by_code:
# 字段已存在使用现有ID
target_id = existing_by_code[key]
output_field_id_map[source_field['id']] = target_id
output_matched += 1
else:
# 创建新字段
target_id = generate_id()
output_field_id_map[source_field['id']] = target_id
if not dry_run:
insert_cursor = conn.cursor()
try:
insert_cursor.execute("""
INSERT INTO f_polic_field
(id, tenant_id, name, filed_code, field_type, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
target_id,
tenant_id,
source_field['name'],
source_field['filed_code'],
2,
CREATED_BY,
UPDATED_BY
))
conn.commit()
output_created += 1
finally:
insert_cursor.close()
else:
output_created += 1
print(f" 匹配: {output_matched} 个,创建: {output_created}")
return input_field_id_map, output_field_id_map
finally:
cursor.close()
def sync_templates_to_target(conn, tenant_id: int, source_templates: Dict,
dry_run: bool = False) -> Dict[int, int]:
"""
同步模板到目标数据库
Returns:
template_id_map: key: 源模板ID, value: 目标模板ID
"""
print_section("同步模板到目标数据库")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 1. 获取目标数据库中的现有模板
cursor.execute("""
SELECT id, name, file_path, parent_id
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
""", (tenant_id,))
existing_templates = cursor.fetchall()
existing_by_path = {}
existing_by_name = {}
for template in existing_templates:
if template['file_path']:
existing_by_path[template['file_path']] = template
else:
# 目录节点
name = template['name']
if name not in existing_by_name:
existing_by_name[name] = []
existing_by_name[name].append(template)
print(f" 目标数据库现有模板: {len(existing_templates)}")
# 2. 先处理目录节点(按层级顺序)
print("\n 同步目录节点...")
template_id_map = {}
dir_created = 0
dir_matched = 0
# 分离目录和文件
dir_templates = {}
file_templates = {}
for key, source_template in source_templates.items():
if source_template['file_path']:
file_templates[key] = source_template
else:
dir_templates[key] = source_template
# 构建目录层级关系(需要先处理父目录)
# 按parent_id分组先处理没有parent_id的再处理有parent_id的
dirs_by_level = {}
for key, source_template in dir_templates.items():
level = 0
current = source_template
while current.get('parent_id'):
level += 1
# 查找父目录
parent_found = False
for t in dir_templates.values():
if t['id'] == current['parent_id']:
current = t
parent_found = True
break
if not parent_found:
break
if level not in dirs_by_level:
dirs_by_level[level] = []
dirs_by_level[level].append((key, source_template))
# 按层级顺序处理目录
for level in sorted(dirs_by_level.keys()):
for key, source_template in dirs_by_level[level]:
source_id = source_template['id']
name = source_template['name']
# 查找匹配的目录通过名称和parent_id
matched = None
target_parent_id = None
if source_template['parent_id']:
target_parent_id = template_id_map.get(source_template['parent_id'])
for existing in existing_by_name.get(name, []):
if not existing['file_path']: # 确保是目录节点
# 检查parent_id是否匹配
if existing['parent_id'] == target_parent_id:
matched = existing
break
if matched:
target_id = matched['id']
template_id_map[source_id] = target_id
dir_matched += 1
else:
target_id = generate_id()
template_id_map[source_id] = target_id
if not dry_run:
insert_cursor = conn.cursor()
try:
insert_cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NULL, NOW(), %s, NOW(), %s, 1)
""", (
target_id,
tenant_id,
target_parent_id,
name,
CREATED_BY,
UPDATED_BY
))
conn.commit()
dir_created += 1
finally:
insert_cursor.close()
else:
dir_created += 1
print(f" 匹配: {dir_matched} 个,创建: {dir_created}")
# 3. 处理文件节点
print("\n 同步文件节点...")
file_created = 0
file_matched = 0
file_updated = 0
for key, source_template in file_templates.items():
source_id = source_template['id']
file_path = source_template['file_path']
name = source_template['name']
# 通过file_path匹配
matched = existing_by_path.get(file_path)
if matched:
target_id = matched['id']
template_id_map[source_id] = target_id
file_matched += 1
# 检查是否需要更新
target_parent_id = None
if source_template['parent_id']:
target_parent_id = template_id_map.get(source_template['parent_id'])
if matched['parent_id'] != target_parent_id or matched['name'] != name:
file_updated += 1
if not dry_run:
update_cursor = conn.cursor()
try:
update_cursor.execute("""
UPDATE f_polic_file_config
SET parent_id = %s, name = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s AND tenant_id = %s
""", (target_parent_id, name, UPDATED_BY, target_id, tenant_id))
conn.commit()
finally:
update_cursor.close()
else:
target_id = generate_id()
template_id_map[source_id] = target_id
if not dry_run:
insert_cursor = conn.cursor()
try:
# 处理parent_id映射
target_parent_id = None
if source_template['parent_id']:
target_parent_id = template_id_map.get(source_template['parent_id'])
insert_cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
target_id,
tenant_id,
target_parent_id,
name,
file_path,
CREATED_BY,
UPDATED_BY
))
conn.commit()
file_created += 1
finally:
insert_cursor.close()
else:
file_created += 1
print(f" 匹配: {file_matched} 个,创建: {file_created} 个,更新: {file_updated}")
return template_id_map
finally:
cursor.close()
def sync_relations_to_target(conn, tenant_id: int, source_relations: Dict[int, List[int]],
template_id_map: Dict[int, int],
input_field_id_map: Dict[int, int],
output_field_id_map: Dict[int, int],
dry_run: bool = False):
"""同步字段关联关系到目标数据库"""
print_section("同步字段关联关系到目标数据库")
# 1. 清理现有关联关系
print("1. 清理现有关联关系...")
if not dry_run:
cursor = conn.cursor()
try:
cursor.execute("""
DELETE FROM f_polic_file_field
WHERE tenant_id = %s
""", (tenant_id,))
deleted_count = cursor.rowcount
conn.commit()
print_result(True, f"删除了 {deleted_count} 条旧关联关系")
finally:
cursor.close()
else:
print(" [预览模式] 将清理所有现有关联关系")
# 2. 创建新的关联关系
print("\n2. 创建新的关联关系...")
all_field_id_map = {**input_field_id_map, **output_field_id_map}
relations_created = 0
relations_skipped = 0
for source_file_id, source_field_ids in source_relations.items():
# 获取目标file_id
target_file_id = template_id_map.get(source_file_id)
if not target_file_id:
relations_skipped += 1
continue
# 转换field_id
target_field_ids = []
for source_field_id in source_field_ids:
target_field_id = all_field_id_map.get(source_field_id)
if target_field_id:
target_field_ids.append(target_field_id)
if not target_field_ids:
continue
# 创建关联关系
if not dry_run:
cursor = conn.cursor()
try:
for target_field_id in target_field_ids:
relation_id = generate_id()
cursor.execute("""
INSERT INTO f_polic_file_field
(id, tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
relation_id,
tenant_id,
target_file_id,
target_field_id,
CREATED_BY,
UPDATED_BY
))
conn.commit()
relations_created += len(target_field_ids)
except Exception as e:
conn.rollback()
print(f" [错误] 创建关联关系失败: {str(e)}")
finally:
cursor.close()
else:
relations_created += len(target_field_ids)
print_result(True, f"创建了 {relations_created} 条关联关系,跳过 {relations_skipped} 个模板")
return {
'created': relations_created,
'skipped': relations_skipped
}
def main():
"""主函数"""
print_section("跨数据库同步模板、字段和关联关系")
# 1. 获取源数据库配置(从.env
print_section("读取源数据库配置")
try:
source_config = get_source_db_config()
print_result(True, f"源数据库: {source_config['host']}:{source_config['port']}/{source_config['database']}")
except Exception as e:
print_result(False, str(e))
return
# 2. 获取目标数据库配置(从命令行参数)
print_section("读取目标数据库配置")
target_config = get_target_db_config_from_args()
print_result(True, f"目标数据库: {target_config['host']}:{target_config['port']}/{target_config['database']}")
print(f" 目标租户ID: {target_config['tenant_id']}")
if target_config['dry_run']:
print("\n[注意] 当前为预览模式,不会实际更新数据库")
# 3. 连接数据库
print_section("连接数据库")
source_conn = test_db_connection(source_config, "")
if not source_conn:
return
target_conn = test_db_connection(target_config, "目标")
if not target_conn:
source_conn.close()
return
print_result(True, "数据库连接成功")
try:
# 4. 获取源租户ID
source_tenant_id = target_config.get('source_tenant_id')
if not source_tenant_id:
source_tenant_id = get_source_tenant_id(source_conn)
print(f"\n源租户ID: {source_tenant_id}")
# 5. 读取源数据
print_section("读取源数据库数据")
print(" 读取字段...")
source_input_fields, source_output_fields = read_source_fields(source_conn, source_tenant_id)
print_result(True, f"输入字段: {len(source_input_fields)} 个,输出字段: {len(source_output_fields)}")
print("\n 读取模板...")
source_templates = read_source_templates(source_conn, source_tenant_id)
print_result(True, f"模板总数: {len(source_templates)}")
print("\n 读取关联关系...")
source_relations = read_source_relations(source_conn, source_tenant_id)
print_result(True, f"关联关系: {len(source_relations)} 个模板有字段关联")
# 6. 同步到目标数据库
target_tenant_id = target_config['tenant_id']
dry_run = target_config['dry_run']
# 6.1 同步字段
input_field_id_map, output_field_id_map = sync_fields_to_target(
target_conn, target_tenant_id,
source_input_fields, source_output_fields,
dry_run
)
# 6.2 同步模板
template_id_map = sync_templates_to_target(
target_conn, target_tenant_id,
source_templates,
dry_run
)
# 6.3 同步关联关系
relations_result = sync_relations_to_target(
target_conn, target_tenant_id,
source_relations,
template_id_map,
input_field_id_map,
output_field_id_map,
dry_run
)
# 7. 总结
print_section("同步完成")
if dry_run:
print(" 本次为预览模式,未实际更新数据库")
else:
print(" 数据库已更新")
print(f"\n 同步统计:")
print(f" - 输入字段: {len(input_field_id_map)}")
print(f" - 输出字段: {len(output_field_id_map)}")
print(f" - 模板: {len(template_id_map)}")
print(f" - 关联关系: {relations_result['created']}")
finally:
source_conn.close()
target_conn.close()
print_result(True, "数据库连接已关闭")
if __name__ == "__main__":
try:
main()
except KeyboardInterrupt:
print("\n\n[中断] 用户取消操作")
sys.exit(0)
except Exception as e:
print(f"\n[错误] 发生异常: {str(e)}")
import traceback
traceback.print_exc()
sys.exit(1)