ai-business-write/analyze_duplicate_fields.py
2025-12-11 15:10:46 +08:00

149 lines
4.9 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""分析 f_polic_field 表中的重复字段"""
import pymysql
import os
from dotenv import load_dotenv
from collections import defaultdict
load_dotenv()
TENANT_ID = 615873064429507639
conn = pymysql.connect(
host=os.getenv('DB_HOST', '152.136.177.240'),
port=int(os.getenv('DB_PORT', 5012)),
user=os.getenv('DB_USER', 'finyx'),
password=os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
database=os.getenv('DB_NAME', 'finyx'),
charset='utf8mb4'
)
cursor = conn.cursor(pymysql.cursors.DictCursor)
print("=" * 80)
print("1. 分析按 name 字段的重复情况")
print("=" * 80)
# 查询所有字段
cursor.execute("""
SELECT id, name, filed_code, field_type, state
FROM f_polic_field
WHERE tenant_id = %s
ORDER BY name, id
""", (TENANT_ID,))
all_fields = cursor.fetchall()
# 按 name 分组
name_groups = defaultdict(list)
for field in all_fields:
name_groups[field['name']].append(field)
# 找出重复的 name
duplicate_names = {name: fields for name, fields in name_groups.items() if len(fields) > 1}
print(f"\n发现 {len(duplicate_names)} 个重复的字段名称:\n")
for name, fields in sorted(duplicate_names.items()):
print(f"字段名称: {name}")
for field in fields:
print(f" ID: {field['id']}, filed_code: {field['filed_code']}, field_type: {field['field_type']}, state: {field['state']}")
print()
print("\n" + "=" * 80)
print("2. 分析按 filed_code 字段的重复情况")
print("=" * 80)
# 按 filed_code 分组
code_groups = defaultdict(list)
for field in all_fields:
code_groups[field['filed_code']].append(field)
# 找出重复的 filed_code
duplicate_codes = {code: fields for code, fields in code_groups.items() if len(fields) > 1}
print(f"\n发现 {len(duplicate_codes)} 个重复的字段编码:\n")
for code, fields in sorted(duplicate_codes.items()):
print(f"字段编码: {code}")
for field in fields:
print(f" ID: {field['id']}, name: {field['name']}, field_type: {field['field_type']}, state: {field['state']}")
print()
print("\n" + "=" * 80)
print("3. 分析重复字段的关联关系f_polic_file_field")
print("=" * 80)
# 获取所有重复字段的ID
all_duplicate_field_ids = set()
for fields in duplicate_names.values():
for field in fields:
all_duplicate_field_ids.add(field['id'])
for fields in duplicate_codes.values():
for field in fields:
all_duplicate_field_ids.add(field['id'])
if all_duplicate_field_ids:
placeholders = ','.join(['%s'] * len(all_duplicate_field_ids))
cursor.execute(f"""
SELECT ff.file_id, ff.filed_id, f.name, f.filed_code, fc.name as file_name, fc.state as file_state
FROM f_polic_file_field ff
INNER JOIN f_polic_field f ON ff.filed_id = f.id
INNER JOIN f_polic_file_config fc ON ff.file_id = fc.id
WHERE ff.filed_id IN ({placeholders})
AND f.tenant_id = %s
ORDER BY f.filed_code, ff.file_id
""", list(all_duplicate_field_ids) + [TENANT_ID])
associations = cursor.fetchall()
# 按 filed_code 分组关联关系
code_associations = defaultdict(list)
for assoc in associations:
code_associations[assoc['filed_code']].append(assoc)
print(f"\n重复字段的关联关系:\n")
for code, assocs in sorted(code_associations.items()):
print(f"字段编码: {code} ({assocs[0]['name']})")
for assoc in assocs:
print(f" 字段ID: {assoc['filed_id']}, 文件ID: {assoc['file_id']}, 文件名: {assoc['file_name']}, 文件状态: {assoc['file_state']}")
print()
else:
print("\n没有发现重复字段的关联关系")
print("\n" + "=" * 80)
print("4. 统计每个 filed_code 关联的模板数量")
print("=" * 80)
cursor.execute("""
SELECT f.filed_code, f.name, COUNT(DISTINCT ff.file_id) as template_count,
GROUP_CONCAT(DISTINCT ff.filed_id ORDER BY ff.filed_id) as field_ids,
GROUP_CONCAT(DISTINCT fc.name ORDER BY fc.name SEPARATOR ' | ') as template_names
FROM f_polic_field f
LEFT JOIN f_polic_file_field ff ON f.id = ff.filed_id
LEFT JOIN f_polic_file_config fc ON ff.file_id = fc.id AND fc.state = 1
WHERE f.tenant_id = %s
GROUP BY f.filed_code, f.name
HAVING COUNT(DISTINCT ff.filed_id) > 0 OR f.filed_code IN (
SELECT filed_code FROM (
SELECT filed_code, COUNT(*) as cnt
FROM f_polic_field
WHERE tenant_id = %s
GROUP BY filed_code
HAVING cnt > 1
) AS dup
)
ORDER BY template_count DESC, f.filed_code
""", (TENANT_ID, TENANT_ID))
stats = cursor.fetchall()
print(f"\n字段关联统计(包含重复字段):\n")
for stat in stats:
print(f"字段编码: {stat['filed_code']}")
print(f" 字段名称: {stat['name']}")
print(f" 关联模板数: {stat['template_count']}")
print(f" 字段ID列表: {stat['field_ids']}")
if stat['template_names']:
print(f" 关联模板: {stat['template_names']}")
print()
cursor.close()
conn.close()