ai-business-write/fix_only_chinese_field_codes.py

"""
只修复真正包含中文的field_code字段
"""
import os
import pymysql
import re
from typing import Dict

# 数据库连接配置
DB_CONFIG = {
    'host': os.getenv('DB_HOST', '152.136.177.240'),
    'port': int(os.getenv('DB_PORT', 5012)),
    'user': os.getenv('DB_USER', 'finyx'),
    'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
    'database': os.getenv('DB_NAME', 'finyx'),
    'charset': 'utf8mb4'
}

TENANT_ID = 615873064429507639
UPDATED_BY = 655162080928945152

# 字段名称到field_code的映射（针对剩余的中文字段）
FIELD_MAPPING = {
    # 谈话相关字段
    '拟谈话地点': 'proposed_interview_location',
    '拟谈话时间': 'proposed_interview_time',
    '谈话事由': 'interview_reason',
    '谈话人': 'interviewer',
    '谈话人员-安全员': 'interview_personnel_safety_officer',
    '谈话人员-组长': 'interview_personnel_leader',
    '谈话人员-谈话人员': 'interview_personnel',
    '谈话前安全风险评估结果': 'pre_interview_risk_assessment_result',
    '谈话地点': 'interview_location',
    '谈话次数': 'interview_count',

    # 被核查人员相关字段
    '被核查人单位及职务': 'target_organization_and_position',  # 注意：这个和"被核查人员单位及职务"应该是同一个
    '被核查人员交代问题程度': 'target_confession_level',
    '被核查人员减压后的表现': 'target_behavior_after_relief',
    '被核查人员学历': 'target_education',  # 注意：这个和"被核查人员文化程度"可能不同
    '被核查人员工作履历': 'target_work_history',
    '被核查人员思想负担程度': 'target_mental_burden_level',
    '被核查人员职业': 'target_occupation',
    '被核查人员谈话中的表现': 'target_behavior_during_interview',
    '被核查人员问题严重程度': 'target_issue_severity_level',
    '被核查人员风险等级': 'target_risk_level',
    '被核查人基本情况': 'target_basic_info',

    # 其他字段
    '补空人员': 'backup_personnel',
    '记录人': 'recorder',
    '评估意见': 'assessment_opinion',
}

def is_chinese(text: str) -> bool:
    """判断字符串是否完全或主要包含中文字符"""
    if not text:
        return False
    # 如果包含中文字符，且中文字符占比超过50%，认为是中文
    chinese_chars = len(re.findall(r'[\u4e00-\u9fff]', text))
    total_chars = len(text)
    if total_chars == 0:
        return False
    return chinese_chars / total_chars > 0.3  # 如果中文字符占比超过30%，认为是中文


def fix_chinese_fields(dry_run: bool = True):
    """修复包含中文的field_code字段"""
    conn = pymysql.connect(**DB_CONFIG)
    cursor = conn.cursor(pymysql.cursors.DictCursor)

    print("="*80)
    print("修复包含中文的field_code字段")
    print("="*80)

    if dry_run:
        print("\n[DRY RUN模式 - 不会实际修改数据库]")

    # 查询所有字段
    cursor.execute("""
        SELECT id, name, filed_code, field_type, state
        FROM f_polic_field
        WHERE tenant_id = %s
        ORDER BY name
    """, (TENANT_ID,))

    all_fields = cursor.fetchall()

    # 找出field_code包含中文的字段
    chinese_fields = []
    for field in all_fields:
        if field['filed_code'] and is_chinese(field['filed_code']):
            chinese_fields.append(field)

    print(f"\n找到 {len(chinese_fields)} 个field_code包含中文的字段:\n")

    updates = []
    for field in chinese_fields:
        field_name = field['name']
        new_code = FIELD_MAPPING.get(field_name)

        if not new_code:
            # 如果没有映射，生成一个基于名称的code
            new_code = field_name.lower()
            new_code = new_code.replace('被核查人员', 'target_').replace('被核查人', 'target_')
            new_code = new_code.replace('谈话', 'interview_')
            new_code = new_code.replace('人员', '')
            new_code = new_code.replace('时间', '_time')
            new_code = new_code.replace('地点', '_location')
            new_code = new_code.replace('问题', '_issue')
            new_code = new_code.replace('情况', '_situation')
            new_code = new_code.replace('程度', '_level')
            new_code = new_code.replace('表现', '_behavior')
            new_code = new_code.replace('等级', '_level')
            new_code = new_code.replace('履历', '_history')
            new_code = new_code.replace('学历', '_education')
            new_code = new_code.replace('职业', '_occupation')
            new_code = new_code.replace('事由', '_reason')
            new_code = new_code.replace('次数', '_count')
            new_code = new_code.replace('结果', '_result')
            new_code = new_code.replace('意见', '_opinion')
            new_code = re.sub(r'[^\w]', '_', new_code)
            new_code = re.sub(r'_+', '_', new_code).strip('_')
            new_code = new_code.replace('__', '_')

        updates.append({
            'id': field['id'],
            'name': field_name,
            'old_code': field['filed_code'],
            'new_code': new_code,
            'field_type': field['field_type']
        })

        print(f"  ID: {field['id']}")
        print(f"  名称: {field_name}")
        print(f"  当前field_code: {field['filed_code']}")
        print(f"  新field_code: {new_code}")
        print()

    # 检查是否有重复的new_code
    code_to_fields = {}
    for update in updates:
        code = update['new_code']
        if code not in code_to_fields:
            code_to_fields[code] = []
        code_to_fields[code].append(update)

    duplicate_codes = {code: fields_list for code, fields_list in code_to_fields.items()
                      if len(fields_list) > 1}

    if duplicate_codes:
        print("\n⚠ 警告：以下field_code会重复:")
        for code, fields_list in duplicate_codes.items():
            print(f"  field_code: {code}")
            for field in fields_list:
                print(f"    - ID: {field['id']}, 名称: {field['name']}")
        print()

    # 执行更新
    if not dry_run:
        print("开始执行更新...\n")
        for update in updates:
            cursor.execute("""
                UPDATE f_polic_field
                SET filed_code = %s, updated_time = NOW(), updated_by = %s
                WHERE id = %s
            """, (update['new_code'], UPDATED_BY, update['id']))
            print(f"  ✓ 更新字段 ID {update['id']}: {update['name']}")
            print(f"    {update['old_code']} -> {update['new_code']}")

        conn.commit()
        print("\n✓ 更新完成")
    else:
        print("[DRY RUN] 以上操作不会实际执行")

    cursor.close()
    conn.close()

    return updates


if __name__ == '__main__':
    print("是否执行修复？")
    print("1. DRY RUN（不实际修改数据库）")
    print("2. 直接执行修复（会修改数据库）")

    choice = input("\n请选择 (1/2，默认1): ").strip() or "1"

    if choice == "2":
        print("\n执行实际修复...")
        fix_chinese_fields(dry_run=False)
    else:
        print("\n执行DRY RUN...")
        updates = fix_chinese_fields(dry_run=True)

        if updates:
            confirm = input("\nDRY RUN完成。是否执行实际修复？(y/n，默认n): ").strip().lower()
            if confirm == 'y':
                print("\n执行实际修复...")
                fix_chinese_fields(dry_run=False)