"""分析 f_polic_field 表中的重复字段""" import pymysql import os from dotenv import load_dotenv from collections import defaultdict load_dotenv() TENANT_ID = 615873064429507639 conn = pymysql.connect( host=os.getenv('DB_HOST', '152.136.177.240'), port=int(os.getenv('DB_PORT', 5012)), user=os.getenv('DB_USER', 'finyx'), password=os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), database=os.getenv('DB_NAME', 'finyx'), charset='utf8mb4' ) cursor = conn.cursor(pymysql.cursors.DictCursor) print("=" * 80) print("1. 分析按 name 字段的重复情况") print("=" * 80) # 查询所有字段 cursor.execute(""" SELECT id, name, filed_code, field_type, state FROM f_polic_field WHERE tenant_id = %s ORDER BY name, id """, (TENANT_ID,)) all_fields = cursor.fetchall() # 按 name 分组 name_groups = defaultdict(list) for field in all_fields: name_groups[field['name']].append(field) # 找出重复的 name duplicate_names = {name: fields for name, fields in name_groups.items() if len(fields) > 1} print(f"\n发现 {len(duplicate_names)} 个重复的字段名称:\n") for name, fields in sorted(duplicate_names.items()): print(f"字段名称: {name}") for field in fields: print(f" ID: {field['id']}, filed_code: {field['filed_code']}, field_type: {field['field_type']}, state: {field['state']}") print() print("\n" + "=" * 80) print("2. 分析按 filed_code 字段的重复情况") print("=" * 80) # 按 filed_code 分组 code_groups = defaultdict(list) for field in all_fields: code_groups[field['filed_code']].append(field) # 找出重复的 filed_code duplicate_codes = {code: fields for code, fields in code_groups.items() if len(fields) > 1} print(f"\n发现 {len(duplicate_codes)} 个重复的字段编码:\n") for code, fields in sorted(duplicate_codes.items()): print(f"字段编码: {code}") for field in fields: print(f" ID: {field['id']}, name: {field['name']}, field_type: {field['field_type']}, state: {field['state']}") print() print("\n" + "=" * 80) print("3. 分析重复字段的关联关系(f_polic_file_field)") print("=" * 80) # 获取所有重复字段的ID all_duplicate_field_ids = set() for fields in duplicate_names.values(): for field in fields: all_duplicate_field_ids.add(field['id']) for fields in duplicate_codes.values(): for field in fields: all_duplicate_field_ids.add(field['id']) if all_duplicate_field_ids: placeholders = ','.join(['%s'] * len(all_duplicate_field_ids)) cursor.execute(f""" SELECT ff.file_id, ff.filed_id, f.name, f.filed_code, fc.name as file_name, fc.state as file_state FROM f_polic_file_field ff INNER JOIN f_polic_field f ON ff.filed_id = f.id INNER JOIN f_polic_file_config fc ON ff.file_id = fc.id WHERE ff.filed_id IN ({placeholders}) AND f.tenant_id = %s ORDER BY f.filed_code, ff.file_id """, list(all_duplicate_field_ids) + [TENANT_ID]) associations = cursor.fetchall() # 按 filed_code 分组关联关系 code_associations = defaultdict(list) for assoc in associations: code_associations[assoc['filed_code']].append(assoc) print(f"\n重复字段的关联关系:\n") for code, assocs in sorted(code_associations.items()): print(f"字段编码: {code} ({assocs[0]['name']})") for assoc in assocs: print(f" 字段ID: {assoc['filed_id']}, 文件ID: {assoc['file_id']}, 文件名: {assoc['file_name']}, 文件状态: {assoc['file_state']}") print() else: print("\n没有发现重复字段的关联关系") print("\n" + "=" * 80) print("4. 统计每个 filed_code 关联的模板数量") print("=" * 80) cursor.execute(""" SELECT f.filed_code, f.name, COUNT(DISTINCT ff.file_id) as template_count, GROUP_CONCAT(DISTINCT ff.filed_id ORDER BY ff.filed_id) as field_ids, GROUP_CONCAT(DISTINCT fc.name ORDER BY fc.name SEPARATOR ' | ') as template_names FROM f_polic_field f LEFT JOIN f_polic_file_field ff ON f.id = ff.filed_id LEFT JOIN f_polic_file_config fc ON ff.file_id = fc.id AND fc.state = 1 WHERE f.tenant_id = %s GROUP BY f.filed_code, f.name HAVING COUNT(DISTINCT ff.filed_id) > 0 OR f.filed_code IN ( SELECT filed_code FROM ( SELECT filed_code, COUNT(*) as cnt FROM f_polic_field WHERE tenant_id = %s GROUP BY filed_code HAVING cnt > 1 ) AS dup ) ORDER BY template_count DESC, f.filed_code """, (TENANT_ID, TENANT_ID)) stats = cursor.fetchall() print(f"\n字段关联统计(包含重复字段):\n") for stat in stats: print(f"字段编码: {stat['filed_code']}") print(f" 字段名称: {stat['name']}") print(f" 关联模板数: {stat['template_count']}") print(f" 字段ID列表: {stat['field_ids']}") if stat['template_names']: print(f" 关联模板: {stat['template_names']}") print() cursor.close() conn.close()