ai-business-write/verify_template_file_id_relations.py

532 lines
21 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
检查模板的 file_id 和相关关联关系是否正确
重点检查:
1. f_polic_file_config 表中的模板记录file_id
2. f_polic_file_field 表中的关联关系file_id 和 filed_id 的对应关系)
"""
import sys
import pymysql
from pathlib import Path
from typing import Dict, List, Set, Tuple
from collections import defaultdict
# 设置控制台编码为UTF-8Windows兼容
if sys.platform == 'win32':
try:
sys.stdout.reconfigure(encoding='utf-8')
sys.stderr.reconfigure(encoding='utf-8')
except:
pass
# 数据库连接配置
DB_CONFIG = {
'host': '152.136.177.240',
'port': 5012,
'user': 'finyx',
'password': '6QsGK6MpePZDE57Z',
'database': 'finyx',
'charset': 'utf8mb4'
}
# 固定值
TENANT_ID = 615873064429507639
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
# 文档类型映射(用于识别模板)
DOCUMENT_TYPE_MAPPING = {
"1.请示报告卡XXX": "REPORT_CARD",
"2.初步核实审批表XXX": "PRELIMINARY_VERIFICATION_APPROVAL",
"3.附件初核方案(XXX)": "INVESTIGATION_PLAN",
"谈话通知书第一联": "NOTIFICATION_LETTER_1",
"谈话通知书第二联": "NOTIFICATION_LETTER_2",
"谈话通知书第三联": "NOTIFICATION_LETTER_3",
"1.请示报告卡(初核谈话)": "REPORT_CARD_INTERVIEW",
"2谈话审批表": "INTERVIEW_APPROVAL_FORM",
"3.谈话前安全风险评估表": "PRE_INTERVIEW_RISK_ASSESSMENT",
"4.谈话方案": "INTERVIEW_PLAN",
"5.谈话后安全风险评估表": "POST_INTERVIEW_RISK_ASSESSMENT",
"1.谈话笔录": "INTERVIEW_RECORD",
"2.谈话询问对象情况摸底调查30问": "INVESTIGATION_30_QUESTIONS",
"3.被谈话人权利义务告知书": "RIGHTS_OBLIGATIONS_NOTICE",
"4.点对点交接单": "HANDOVER_FORM",
"4.点对点交接单2": "HANDOVER_FORM_2",
"5.陪送交接单(新)": "ESCORT_HANDOVER_FORM",
"6.1保密承诺书(谈话对象使用-非中共党员用)": "CONFIDENTIALITY_COMMITMENT_NON_PARTY",
"6.2保密承诺书(谈话对象使用-中共党员用)": "CONFIDENTIALITY_COMMITMENT_PARTY",
"7.办案人员-办案安全保密承诺书": "INVESTIGATOR_CONFIDENTIALITY_COMMITMENT",
"8-1请示报告卡初核报告结论 ": "REPORT_CARD_CONCLUSION",
"8.XXX初核情况报告": "INVESTIGATION_REPORT"
}
def get_template_files() -> Dict[str, Path]:
"""获取所有模板文件"""
templates = {}
if not TEMPLATES_DIR.exists():
return templates
for root, dirs, files in os.walk(TEMPLATES_DIR):
for file in files:
if file.endswith('.docx') and not file.startswith('~$'):
file_path = Path(root) / file
base_name = Path(file).stem
if base_name in DOCUMENT_TYPE_MAPPING:
templates[base_name] = file_path
return templates
def check_file_configs(conn) -> Dict:
"""检查 f_polic_file_config 表中的模板记录"""
print("\n" + "="*80)
print("1. 检查 f_polic_file_config 表中的模板记录")
print("="*80)
cursor = conn.cursor(pymysql.cursors.DictCursor)
# 查询所有模板记录
cursor.execute("""
SELECT id, name, template_code, file_path, state, parent_id
FROM f_polic_file_config
WHERE tenant_id = %s
ORDER BY name
""", (TENANT_ID,))
all_configs = cursor.fetchall()
# 按 template_code 和 name 组织数据
configs_by_code = {}
configs_by_name = {}
for config in all_configs:
config_id = config['id']
name = config['name']
template_code = config.get('template_code')
if template_code:
if template_code not in configs_by_code:
configs_by_code[template_code] = []
configs_by_code[template_code].append(config)
if name:
if name not in configs_by_name:
configs_by_name[name] = []
configs_by_name[name].append(config)
print(f"\n总模板记录数: {len(all_configs)}")
print(f"按 template_code 分组: {len(configs_by_code)} 个不同的 template_code")
print(f"按 name 分组: {len(configs_by_name)} 个不同的 name")
# 检查重复的 template_code
duplicate_codes = {code: configs for code, configs in configs_by_code.items() if len(configs) > 1}
if duplicate_codes:
print(f"\n[WARN] 发现重复的 template_code ({len(duplicate_codes)} 个):")
for code, configs in duplicate_codes.items():
print(f" - {code}: {len(configs)} 条记录")
for cfg in configs:
print(f" ID: {cfg['id']}, 名称: {cfg['name']}, 路径: {cfg.get('file_path', 'N/A')}")
# 检查重复的 name
duplicate_names = {name: configs for name, configs in configs_by_name.items() if len(configs) > 1}
if duplicate_names:
print(f"\n[WARN] 发现重复的 name ({len(duplicate_names)} 个):")
for name, configs in duplicate_names.items():
print(f" - {name}: {len(configs)} 条记录")
for cfg in configs:
print(f" ID: {cfg['id']}, template_code: {cfg.get('template_code', 'N/A')}, 路径: {cfg.get('file_path', 'N/A')}")
# 检查未启用的记录
disabled_configs = [cfg for cfg in all_configs if cfg.get('state') != 1]
if disabled_configs:
print(f"\n[WARN] 发现未启用的模板记录 ({len(disabled_configs)} 个):")
for cfg in disabled_configs:
print(f" - ID: {cfg['id']}, 名称: {cfg['name']}, 状态: {cfg.get('state')}")
# 检查 file_path 为空的记录
empty_path_configs = [cfg for cfg in all_configs if not cfg.get('file_path')]
if empty_path_configs:
print(f"\n[WARN] 发现 file_path 为空的记录 ({len(empty_path_configs)} 个):")
for cfg in empty_path_configs:
print(f" - ID: {cfg['id']}, 名称: {cfg['name']}, template_code: {cfg.get('template_code', 'N/A')}")
cursor.close()
return {
'all_configs': all_configs,
'configs_by_code': configs_by_code,
'configs_by_name': configs_by_name,
'duplicate_codes': duplicate_codes,
'duplicate_names': duplicate_names,
'disabled_configs': disabled_configs,
'empty_path_configs': empty_path_configs
}
def check_file_field_relations(conn) -> Dict:
"""检查 f_polic_file_field 表中的关联关系"""
print("\n" + "="*80)
print("2. 检查 f_polic_file_field 表中的关联关系")
print("="*80)
cursor = conn.cursor(pymysql.cursors.DictCursor)
# 查询所有关联关系
cursor.execute("""
SELECT fff.id, fff.file_id, fff.filed_id, fff.state, fff.tenant_id
FROM f_polic_file_field fff
WHERE fff.tenant_id = %s
ORDER BY fff.file_id, fff.filed_id
""", (TENANT_ID,))
all_relations = cursor.fetchall()
print(f"\n总关联关系数: {len(all_relations)}")
# 检查无效的 file_id关联到不存在的文件配置
cursor.execute("""
SELECT fff.id, fff.file_id, fff.filed_id
FROM f_polic_file_field fff
LEFT JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id
WHERE fff.tenant_id = %s AND fc.id IS NULL
""", (TENANT_ID,))
invalid_file_relations = cursor.fetchall()
# 检查无效的 filed_id关联到不存在的字段
cursor.execute("""
SELECT fff.id, fff.file_id, fff.filed_id
FROM f_polic_file_field fff
LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
WHERE fff.tenant_id = %s AND f.id IS NULL
""", (TENANT_ID,))
invalid_field_relations = cursor.fetchall()
# 检查重复的关联关系(相同的 file_id 和 filed_id
cursor.execute("""
SELECT file_id, filed_id, COUNT(*) as count, GROUP_CONCAT(id ORDER BY id) as ids
FROM f_polic_file_field
WHERE tenant_id = %s
GROUP BY file_id, filed_id
HAVING COUNT(*) > 1
""", (TENANT_ID,))
duplicate_relations = cursor.fetchall()
# 检查关联到未启用文件的记录
cursor.execute("""
SELECT fff.id, fff.file_id, fff.filed_id, fc.name as file_name, fc.state as file_state
FROM f_polic_file_field fff
INNER JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id
WHERE fff.tenant_id = %s AND fc.state != 1
""", (TENANT_ID,))
disabled_file_relations = cursor.fetchall()
# 检查关联到未启用字段的记录
cursor.execute("""
SELECT fff.id, fff.file_id, fff.filed_id, f.name as field_name, f.filed_code, f.state as field_state
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
WHERE fff.tenant_id = %s AND f.state != 1
""", (TENANT_ID,))
disabled_field_relations = cursor.fetchall()
# 统计每个文件关联的字段数量
file_field_counts = defaultdict(int)
for rel in all_relations:
file_field_counts[rel['file_id']] += 1
print(f"\n文件关联字段统计:")
print(f" 有关联关系的文件数: {len(file_field_counts)}")
if file_field_counts:
max_count = max(file_field_counts.values())
min_count = min(file_field_counts.values())
avg_count = sum(file_field_counts.values()) / len(file_field_counts)
print(f" 每个文件关联字段数: 最少 {min_count}, 最多 {max_count}, 平均 {avg_count:.1f}")
# 输出检查结果
if invalid_file_relations:
print(f"\n[ERROR] 发现无效的 file_id 关联 ({len(invalid_file_relations)} 条):")
for rel in invalid_file_relations[:10]: # 只显示前10条
print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}")
if len(invalid_file_relations) > 10:
print(f" ... 还有 {len(invalid_file_relations) - 10}")
else:
print(f"\n[OK] 所有 file_id 关联都有效")
if invalid_field_relations:
print(f"\n[ERROR] 发现无效的 filed_id 关联 ({len(invalid_field_relations)} 条):")
for rel in invalid_field_relations[:10]: # 只显示前10条
print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}")
if len(invalid_field_relations) > 10:
print(f" ... 还有 {len(invalid_field_relations) - 10}")
else:
print(f"\n[OK] 所有 filed_id 关联都有效")
if duplicate_relations:
print(f"\n[WARN] 发现重复的关联关系 ({len(duplicate_relations)} 组):")
for dup in duplicate_relations[:10]: # 只显示前10组
print(f" - file_id: {dup['file_id']}, filed_id: {dup['filed_id']}, 重复次数: {dup['count']}, 关联ID: {dup['ids']}")
if len(duplicate_relations) > 10:
print(f" ... 还有 {len(duplicate_relations) - 10}")
else:
print(f"\n[OK] 没有重复的关联关系")
if disabled_file_relations:
print(f"\n[WARN] 发现关联到未启用文件的记录 ({len(disabled_file_relations)} 条):")
for rel in disabled_file_relations[:10]:
print(f" - 文件: {rel['file_name']} (ID: {rel['file_id']}, 状态: {rel['file_state']})")
if len(disabled_file_relations) > 10:
print(f" ... 还有 {len(disabled_file_relations) - 10}")
if disabled_field_relations:
print(f"\n[WARN] 发现关联到未启用字段的记录 ({len(disabled_field_relations)} 条):")
for rel in disabled_field_relations[:10]:
print(f" - 字段: {rel['field_name']} ({rel['filed_code']}, ID: {rel['filed_id']}, 状态: {rel['field_state']})")
if len(disabled_field_relations) > 10:
print(f" ... 还有 {len(disabled_field_relations) - 10}")
cursor.close()
return {
'all_relations': all_relations,
'invalid_file_relations': invalid_file_relations,
'invalid_field_relations': invalid_field_relations,
'duplicate_relations': duplicate_relations,
'disabled_file_relations': disabled_file_relations,
'disabled_field_relations': disabled_field_relations,
'file_field_counts': dict(file_field_counts)
}
def check_template_file_mapping(conn, file_configs: Dict) -> Dict:
"""检查模板文件与数据库记录的映射关系"""
print("\n" + "="*80)
print("3. 检查模板文件与数据库记录的映射关系")
print("="*80)
import os
templates = get_template_files()
print(f"\n本地模板文件数: {len(templates)}")
cursor = conn.cursor(pymysql.cursors.DictCursor)
# 检查每个模板文件是否在数据库中有对应记录
missing_in_db = []
found_in_db = []
duplicate_mappings = []
for template_name, file_path in templates.items():
template_code = DOCUMENT_TYPE_MAPPING.get(template_name)
if not template_code:
continue
# 通过 name 和 template_code 查找对应的数据库记录
# 优先通过 name 精确匹配,然后通过 template_code 匹配
matching_configs = []
# 1. 通过 name 精确匹配
if template_name in file_configs['configs_by_name']:
for config in file_configs['configs_by_name'][template_name]:
if config.get('file_path'): # 有文件路径的记录
matching_configs.append(config)
# 2. 通过 template_code 匹配
if template_code in file_configs['configs_by_code']:
for config in file_configs['configs_by_code'][template_code]:
if config.get('file_path') and config not in matching_configs:
matching_configs.append(config)
if len(matching_configs) == 0:
missing_in_db.append({
'template_name': template_name,
'template_code': template_code,
'file_path': str(file_path)
})
elif len(matching_configs) == 1:
config = matching_configs[0]
found_in_db.append({
'template_name': template_name,
'template_code': template_code,
'file_id': config['id'],
'file_path': config.get('file_path'),
'name': config.get('name')
})
else:
# 多个匹配,选择 file_path 最新的(包含最新日期的)
duplicate_mappings.append({
'template_name': template_name,
'template_code': template_code,
'matching_configs': matching_configs
})
# 仍然记录第一个作为找到的记录
config = matching_configs[0]
found_in_db.append({
'template_name': template_name,
'template_code': template_code,
'file_id': config['id'],
'file_path': config.get('file_path'),
'name': config.get('name'),
'is_duplicate': True
})
print(f"\n找到数据库记录的模板: {len(found_in_db)}")
print(f"未找到数据库记录的模板: {len(missing_in_db)}")
print(f"有重复映射的模板: {len(duplicate_mappings)}")
if duplicate_mappings:
print(f"\n[WARN] 以下模板文件在数据库中有多个匹配记录:")
for item in duplicate_mappings:
print(f" - {item['template_name']} (template_code: {item['template_code']}):")
for cfg in item['matching_configs']:
print(f" * file_id: {cfg['id']}, name: {cfg.get('name')}, path: {cfg.get('file_path', 'N/A')}")
if missing_in_db:
print(f"\n[WARN] 以下模板文件在数据库中没有对应记录:")
for item in missing_in_db:
print(f" - {item['template_name']} (template_code: {item['template_code']})")
cursor.close()
return {
'found_in_db': found_in_db,
'missing_in_db': missing_in_db,
'duplicate_mappings': duplicate_mappings
}
def check_field_type_consistency(conn, relations: Dict) -> Dict:
"""检查关联关系的字段类型一致性"""
print("\n" + "="*80)
print("4. 检查关联关系的字段类型一致性")
print("="*80)
cursor = conn.cursor(pymysql.cursors.DictCursor)
# 查询所有关联关系及其字段类型
cursor.execute("""
SELECT
fff.id,
fff.file_id,
fff.filed_id,
fc.name as file_name,
f.name as field_name,
f.filed_code,
f.field_type,
CASE
WHEN f.field_type = 1 THEN '输入字段'
WHEN f.field_type = 2 THEN '输出字段'
ELSE '未知'
END as field_type_name
FROM f_polic_file_field fff
INNER JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id
INNER JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
WHERE fff.tenant_id = %s
ORDER BY fff.file_id, f.field_type, f.name
""", (TENANT_ID,))
all_relations_with_type = cursor.fetchall()
# 统计字段类型分布
input_fields = [r for r in all_relations_with_type if r['field_type'] == 1]
output_fields = [r for r in all_relations_with_type if r['field_type'] == 2]
print(f"\n字段类型统计:")
print(f" 输入字段 (field_type=1): {len(input_fields)} 条关联")
print(f" 输出字段 (field_type=2): {len(output_fields)} 条关联")
# 按文件统计
file_type_counts = defaultdict(lambda: {'input': 0, 'output': 0})
for rel in all_relations_with_type:
file_id = rel['file_id']
if rel['field_type'] == 1:
file_type_counts[file_id]['input'] += 1
elif rel['field_type'] == 2:
file_type_counts[file_id]['output'] += 1
print(f"\n每个文件的字段类型分布:")
for file_id, counts in sorted(file_type_counts.items())[:10]: # 只显示前10个
print(f" 文件ID {file_id}: 输入字段 {counts['input']} 个, 输出字段 {counts['output']}")
if len(file_type_counts) > 10:
print(f" ... 还有 {len(file_type_counts) - 10} 个文件")
cursor.close()
return {
'input_fields': input_fields,
'output_fields': output_fields,
'file_type_counts': dict(file_type_counts)
}
def main():
"""主函数"""
print("="*80)
print("检查模板的 file_id 和相关关联关系")
print("="*80)
# 连接数据库
try:
conn = pymysql.connect(**DB_CONFIG)
print("\n[OK] 数据库连接成功")
except Exception as e:
print(f"\n[ERROR] 数据库连接失败: {e}")
return
try:
# 1. 检查文件配置表
file_configs = check_file_configs(conn)
# 2. 检查文件字段关联表
relations = check_file_field_relations(conn)
# 3. 检查模板文件与数据库记录的映射
template_mapping = check_template_file_mapping(conn, file_configs)
# 4. 检查字段类型一致性
field_type_info = check_field_type_consistency(conn, relations)
# 汇总报告
print("\n" + "="*80)
print("检查汇总")
print("="*80)
issues = []
if file_configs['duplicate_codes']:
issues.append(f"发现 {len(file_configs['duplicate_codes'])} 个重复的 template_code")
if file_configs['duplicate_names']:
issues.append(f"发现 {len(file_configs['duplicate_names'])} 个重复的 name")
if file_configs['empty_path_configs']:
issues.append(f"发现 {len(file_configs['empty_path_configs'])} 个 file_path 为空的记录")
if relations['invalid_file_relations']:
issues.append(f"发现 {len(relations['invalid_file_relations'])} 条无效的 file_id 关联")
if relations['invalid_field_relations']:
issues.append(f"发现 {len(relations['invalid_field_relations'])} 条无效的 filed_id 关联")
if relations['duplicate_relations']:
issues.append(f"发现 {len(relations['duplicate_relations'])} 组重复的关联关系")
if template_mapping['missing_in_db']:
issues.append(f"发现 {len(template_mapping['missing_in_db'])} 个模板文件在数据库中没有对应记录")
if issues:
print("\n[WARN] 发现以下问题:")
for issue in issues:
print(f" - {issue}")
else:
print("\n[OK] 未发现严重问题")
print(f"\n总模板记录数: {len(file_configs['all_configs'])}")
print(f"总关联关系数: {len(relations['all_relations'])}")
print(f"有关联关系的文件数: {len(relations['file_field_counts'])}")
finally:
conn.close()
print("\n数据库连接已关闭")
if __name__ == '__main__':
import os
main()