ai-business-write/fix_remaining_chinese_fields.py

192 lines
7.0 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
修复剩余的中文field_code字段
为这些字段生成合适的英文field_code
"""
import os
import pymysql
import re
from typing import Dict
# 数据库连接配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
TENANT_ID = 615873064429507639
UPDATED_BY = 655162080928945152
# 字段名称到field_code的映射针对剩余的中文字段
FIELD_MAPPING = {
# 谈话相关字段
'拟谈话地点': 'proposed_interview_location',
'拟谈话时间': 'proposed_interview_time',
'谈话事由': 'interview_reason',
'谈话人': 'interviewer',
'谈话人员-安全员': 'interview_personnel_safety_officer',
'谈话人员-组长': 'interview_personnel_leader',
'谈话人员-谈话人员': 'interview_personnel',
'谈话前安全风险评估结果': 'pre_interview_risk_assessment_result',
'谈话地点': 'interview_location',
'谈话次数': 'interview_count',
# 被核查人员相关字段
'被核查人单位及职务': 'target_organization_and_position', # 注意:这个和"被核查人员单位及职务"应该是同一个
'被核查人员交代问题程度': 'target_confession_level',
'被核查人员减压后的表现': 'target_behavior_after_relief',
'被核查人员学历': 'target_education', # 注意:这个和"被核查人员文化程度"可能不同
'被核查人员工作履历': 'target_work_history',
'被核查人员思想负担程度': 'target_mental_burden_level',
'被核查人员职业': 'target_occupation',
'被核查人员谈话中的表现': 'target_behavior_during_interview',
'被核查人员问题严重程度': 'target_issue_severity_level',
'被核查人员风险等级': 'target_risk_level',
'被核查人基本情况': 'target_basic_info',
# 其他字段
'补空人员': 'backup_personnel',
'记录人': 'recorder',
'评估意见': 'assessment_opinion',
}
def is_chinese(text: str) -> bool:
"""判断字符串是否包含中文字符"""
if not text:
return False
return bool(re.search(r'[\u4e00-\u9fff]', text))
def fix_remaining_fields(dry_run: bool = True):
"""修复剩余的中文field_code字段"""
conn = pymysql.connect(**DB_CONFIG)
cursor = conn.cursor(pymysql.cursors.DictCursor)
print("="*80)
print("修复剩余的中文field_code字段")
print("="*80)
if dry_run:
print("\n[DRY RUN模式 - 不会实际修改数据库]")
# 查询所有包含中文field_code的字段
cursor.execute("""
SELECT id, name, filed_code, field_type, state
FROM f_polic_field
WHERE tenant_id = %s AND filed_code REGEXP '[\\u4e00-\\u9fff]'
ORDER BY name
""", (TENANT_ID,))
fields = cursor.fetchall()
print(f"\n找到 {len(fields)} 个需要修复的字段:\n")
updates = []
for field in fields:
field_name = field['name']
new_code = FIELD_MAPPING.get(field_name)
if not new_code:
# 如果没有映射生成一个基于名称的code
new_code = field_name.lower()
new_code = new_code.replace('被核查人员', 'target_').replace('被核查人', 'target_')
new_code = new_code.replace('谈话', 'interview_')
new_code = new_code.replace('人员', '')
new_code = new_code.replace('时间', '_time')
new_code = new_code.replace('地点', '_location')
new_code = new_code.replace('问题', '_issue')
new_code = new_code.replace('情况', '_situation')
new_code = new_code.replace('程度', '_level')
new_code = new_code.replace('表现', '_behavior')
new_code = new_code.replace('等级', '_level')
new_code = new_code.replace('履历', '_history')
new_code = new_code.replace('学历', '_education')
new_code = new_code.replace('职业', '_occupation')
new_code = new_code.replace('事由', '_reason')
new_code = new_code.replace('次数', '_count')
new_code = new_code.replace('结果', '_result')
new_code = new_code.replace('意见', '_opinion')
new_code = re.sub(r'[^\w]', '_', new_code)
new_code = re.sub(r'_+', '_', new_code).strip('_')
new_code = new_code.replace('__', '_')
updates.append({
'id': field['id'],
'name': field_name,
'old_code': field['filed_code'],
'new_code': new_code,
'field_type': field['field_type']
})
print(f" ID: {field['id']}")
print(f" 名称: {field_name}")
print(f" 当前field_code: {field['filed_code']}")
print(f" 新field_code: {new_code}")
print()
# 检查是否有重复的new_code
code_to_fields = {}
for update in updates:
code = update['new_code']
if code not in code_to_fields:
code_to_fields[code] = []
code_to_fields[code].append(update)
duplicate_codes = {code: fields_list for code, fields_list in code_to_fields.items()
if len(fields_list) > 1}
if duplicate_codes:
print("\n⚠ 警告以下field_code会重复:")
for code, fields_list in duplicate_codes.items():
print(f" field_code: {code}")
for field in fields_list:
print(f" - ID: {field['id']}, 名称: {field['name']}")
print()
# 执行更新
if not dry_run:
print("开始执行更新...\n")
for update in updates:
cursor.execute("""
UPDATE f_polic_field
SET filed_code = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s
""", (update['new_code'], UPDATED_BY, update['id']))
print(f" ✓ 更新字段 ID {update['id']}: {update['name']}")
print(f" {update['old_code']} -> {update['new_code']}")
conn.commit()
print("\n✓ 更新完成")
else:
print("[DRY RUN] 以上操作不会实际执行")
cursor.close()
conn.close()
return updates
if __name__ == '__main__':
print("是否执行修复?")
print("1. DRY RUN不实际修改数据库")
print("2. 直接执行修复(会修改数据库)")
choice = input("\n请选择 (1/2默认1): ").strip() or "1"
if choice == "2":
print("\n执行实际修复...")
fix_remaining_fields(dry_run=False)
else:
print("\n执行DRY RUN...")
updates = fix_remaining_fields(dry_run=True)
if updates:
confirm = input("\nDRY RUN完成。是否执行实际修复(y/n默认n): ").strip().lower()
if confirm == 'y':
print("\n执行实际修复...")
fix_remaining_fields(dry_run=False)