ai-business-write/fix_duplicate_fields.py
2025-12-11 15:10:46 +08:00

177 lines
5.6 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""修复 f_polic_field 表中的重复字段"""
import pymysql
import os
from dotenv import load_dotenv
from collections import defaultdict
load_dotenv()
TENANT_ID = 615873064429507639
conn = pymysql.connect(
host=os.getenv('DB_HOST', '152.136.177.240'),
port=int(os.getenv('DB_PORT', 5012)),
user=os.getenv('DB_USER', 'finyx'),
password=os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
database=os.getenv('DB_NAME', 'finyx'),
charset='utf8mb4'
)
cursor = conn.cursor(pymysql.cursors.DictCursor)
print("=" * 80)
print("修复重复字段")
print("=" * 80)
# 1. 查找所有重复的 filed_code
cursor.execute("""
SELECT filed_code, COUNT(*) as cnt, GROUP_CONCAT(id ORDER BY id) as field_ids
FROM f_polic_field
WHERE tenant_id = %s
GROUP BY filed_code
HAVING cnt > 1
""", (TENANT_ID,))
duplicate_codes = cursor.fetchall()
print(f"\n发现 {len(duplicate_codes)} 个重复的字段编码:\n")
for dup in duplicate_codes:
code = dup['filed_code']
field_ids = [int(x) for x in dup['field_ids'].split(',')]
print(f"\n处理字段编码: {code}")
print(f" 字段ID列表: {field_ids}")
# 获取每个字段的详细信息
placeholders = ','.join(['%s'] * len(field_ids))
cursor.execute(f"""
SELECT id, name, field_type, state
FROM f_polic_field
WHERE id IN ({placeholders})
ORDER BY id
""", field_ids)
fields = cursor.fetchall()
# 获取每个字段的关联关系
field_associations = {}
for field_id in field_ids:
cursor.execute("""
SELECT COUNT(*) as cnt, GROUP_CONCAT(file_id) as file_ids
FROM f_polic_file_field
WHERE filed_id = %s
""", (field_id,))
result = cursor.fetchone()
field_associations[field_id] = {
'count': result['cnt'] if result else 0,
'file_ids': result['file_ids'].split(',') if result and result['file_ids'] else []
}
print(f"\n 字段详情和关联关系:")
for field in fields:
assoc = field_associations[field['id']]
print(f" ID: {field['id']}, name: {field['name']}, "
f"field_type: {field['field_type']}, state: {field['state']}, "
f"关联模板数: {assoc['count']}")
# 选择保留的字段优先选择关联模板数最多的如果相同则选择ID较小的
fields_with_assoc = [(f, field_associations[f['id']]) for f in fields]
fields_with_assoc.sort(key=lambda x: (-x[1]['count'], x[0]['id']))
keep_field = fields_with_assoc[0][0]
remove_fields = [f for f, _ in fields_with_assoc[1:]]
print(f"\n 保留字段: ID={keep_field['id']}, name={keep_field['name']}, "
f"关联模板数={field_associations[keep_field['id']]['count']}")
print(f" 删除字段: {[f['id'] for f in remove_fields]}")
# 迁移关联关系:将删除字段的关联关系迁移到保留字段
for remove_field in remove_fields:
remove_id = remove_field['id']
keep_id = keep_field['id']
# 获取删除字段的所有关联
cursor.execute("""
SELECT file_id
FROM f_polic_file_field
WHERE filed_id = %s
""", (remove_id,))
remove_assocs = cursor.fetchall()
migrated_count = 0
skipped_count = 0
for assoc in remove_assocs:
file_id = assoc['file_id']
# 检查保留字段是否已经关联了这个文件
cursor.execute("""
SELECT COUNT(*) as cnt
FROM f_polic_file_field
WHERE filed_id = %s AND file_id = %s
""", (keep_id, file_id))
exists = cursor.fetchone()['cnt'] > 0
if not exists:
# 迁移关联关系
cursor.execute("""
UPDATE f_polic_file_field
SET filed_id = %s
WHERE filed_id = %s AND file_id = %s
""", (keep_id, remove_id, file_id))
migrated_count += 1
else:
# 如果已存在,直接删除重复的关联
cursor.execute("""
DELETE FROM f_polic_file_field
WHERE filed_id = %s AND file_id = %s
""", (remove_id, file_id))
skipped_count += 1
print(f" 字段ID {remove_id} -> {keep_id}: 迁移 {migrated_count} 个关联, 跳过 {skipped_count} 个重复关联")
# 删除字段的所有关联关系(应该已经迁移或删除完毕)
cursor.execute("""
DELETE FROM f_polic_file_field
WHERE filed_id = %s
""", (remove_id,))
# 删除字段本身
cursor.execute("""
DELETE FROM f_polic_field
WHERE id = %s
""", (remove_id,))
print(f" 已删除字段 ID {remove_id} 及其关联关系")
print("\n" + "=" * 80)
print("验证修复结果")
print("=" * 80)
# 再次检查是否还有重复
cursor.execute("""
SELECT filed_code, COUNT(*) as cnt
FROM f_polic_field
WHERE tenant_id = %s
GROUP BY filed_code
HAVING cnt > 1
""", (TENANT_ID,))
remaining_duplicates = cursor.fetchall()
if remaining_duplicates:
print(f"\n警告:仍有 {len(remaining_duplicates)} 个重复的字段编码:")
for dup in remaining_duplicates:
print(f" {dup['filed_code']}: {dup['cnt']}")
else:
print("\n[OK] 所有重复字段已修复filed_code 现在唯一")
# 提交事务
conn.commit()
print("\n[OK] 所有更改已提交到数据库")
cursor.close()
conn.close()