diff --git a/analyze_duplicate_fields.py b/analyze_duplicate_fields.py new file mode 100644 index 0000000..d880a28 --- /dev/null +++ b/analyze_duplicate_fields.py @@ -0,0 +1,148 @@ +"""分析 f_polic_field 表中的重复字段""" +import pymysql +import os +from dotenv import load_dotenv +from collections import defaultdict + +load_dotenv() + +TENANT_ID = 615873064429507639 + +conn = pymysql.connect( + host=os.getenv('DB_HOST', '152.136.177.240'), + port=int(os.getenv('DB_PORT', 5012)), + user=os.getenv('DB_USER', 'finyx'), + password=os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), + database=os.getenv('DB_NAME', 'finyx'), + charset='utf8mb4' +) + +cursor = conn.cursor(pymysql.cursors.DictCursor) + +print("=" * 80) +print("1. 分析按 name 字段的重复情况") +print("=" * 80) + +# 查询所有字段 +cursor.execute(""" + SELECT id, name, filed_code, field_type, state + FROM f_polic_field + WHERE tenant_id = %s + ORDER BY name, id +""", (TENANT_ID,)) +all_fields = cursor.fetchall() + +# 按 name 分组 +name_groups = defaultdict(list) +for field in all_fields: + name_groups[field['name']].append(field) + +# 找出重复的 name +duplicate_names = {name: fields for name, fields in name_groups.items() if len(fields) > 1} + +print(f"\n发现 {len(duplicate_names)} 个重复的字段名称:\n") +for name, fields in sorted(duplicate_names.items()): + print(f"字段名称: {name}") + for field in fields: + print(f" ID: {field['id']}, filed_code: {field['filed_code']}, field_type: {field['field_type']}, state: {field['state']}") + print() + +print("\n" + "=" * 80) +print("2. 分析按 filed_code 字段的重复情况") +print("=" * 80) + +# 按 filed_code 分组 +code_groups = defaultdict(list) +for field in all_fields: + code_groups[field['filed_code']].append(field) + +# 找出重复的 filed_code +duplicate_codes = {code: fields for code, fields in code_groups.items() if len(fields) > 1} + +print(f"\n发现 {len(duplicate_codes)} 个重复的字段编码:\n") +for code, fields in sorted(duplicate_codes.items()): + print(f"字段编码: {code}") + for field in fields: + print(f" ID: {field['id']}, name: {field['name']}, field_type: {field['field_type']}, state: {field['state']}") + print() + +print("\n" + "=" * 80) +print("3. 分析重复字段的关联关系(f_polic_file_field)") +print("=" * 80) + +# 获取所有重复字段的ID +all_duplicate_field_ids = set() +for fields in duplicate_names.values(): + for field in fields: + all_duplicate_field_ids.add(field['id']) +for fields in duplicate_codes.values(): + for field in fields: + all_duplicate_field_ids.add(field['id']) + +if all_duplicate_field_ids: + placeholders = ','.join(['%s'] * len(all_duplicate_field_ids)) + cursor.execute(f""" + SELECT ff.file_id, ff.filed_id, f.name, f.filed_code, fc.name as file_name, fc.state as file_state + FROM f_polic_file_field ff + INNER JOIN f_polic_field f ON ff.filed_id = f.id + INNER JOIN f_polic_file_config fc ON ff.file_id = fc.id + WHERE ff.filed_id IN ({placeholders}) + AND f.tenant_id = %s + ORDER BY f.filed_code, ff.file_id + """, list(all_duplicate_field_ids) + [TENANT_ID]) + + associations = cursor.fetchall() + + # 按 filed_code 分组关联关系 + code_associations = defaultdict(list) + for assoc in associations: + code_associations[assoc['filed_code']].append(assoc) + + print(f"\n重复字段的关联关系:\n") + for code, assocs in sorted(code_associations.items()): + print(f"字段编码: {code} ({assocs[0]['name']})") + for assoc in assocs: + print(f" 字段ID: {assoc['filed_id']}, 文件ID: {assoc['file_id']}, 文件名: {assoc['file_name']}, 文件状态: {assoc['file_state']}") + print() +else: + print("\n没有发现重复字段的关联关系") + +print("\n" + "=" * 80) +print("4. 统计每个 filed_code 关联的模板数量") +print("=" * 80) + +cursor.execute(""" + SELECT f.filed_code, f.name, COUNT(DISTINCT ff.file_id) as template_count, + GROUP_CONCAT(DISTINCT ff.filed_id ORDER BY ff.filed_id) as field_ids, + GROUP_CONCAT(DISTINCT fc.name ORDER BY fc.name SEPARATOR ' | ') as template_names + FROM f_polic_field f + LEFT JOIN f_polic_file_field ff ON f.id = ff.filed_id + LEFT JOIN f_polic_file_config fc ON ff.file_id = fc.id AND fc.state = 1 + WHERE f.tenant_id = %s + GROUP BY f.filed_code, f.name + HAVING COUNT(DISTINCT ff.filed_id) > 0 OR f.filed_code IN ( + SELECT filed_code FROM ( + SELECT filed_code, COUNT(*) as cnt + FROM f_polic_field + WHERE tenant_id = %s + GROUP BY filed_code + HAVING cnt > 1 + ) AS dup + ) + ORDER BY template_count DESC, f.filed_code +""", (TENANT_ID, TENANT_ID)) + +stats = cursor.fetchall() +print(f"\n字段关联统计(包含重复字段):\n") +for stat in stats: + print(f"字段编码: {stat['filed_code']}") + print(f" 字段名称: {stat['name']}") + print(f" 关联模板数: {stat['template_count']}") + print(f" 字段ID列表: {stat['field_ids']}") + if stat['template_names']: + print(f" 关联模板: {stat['template_names']}") + print() + +cursor.close() +conn.close() + diff --git a/fix_duplicate_fields.py b/fix_duplicate_fields.py new file mode 100644 index 0000000..15c1c49 --- /dev/null +++ b/fix_duplicate_fields.py @@ -0,0 +1,176 @@ +"""修复 f_polic_field 表中的重复字段""" +import pymysql +import os +from dotenv import load_dotenv +from collections import defaultdict + +load_dotenv() + +TENANT_ID = 615873064429507639 + +conn = pymysql.connect( + host=os.getenv('DB_HOST', '152.136.177.240'), + port=int(os.getenv('DB_PORT', 5012)), + user=os.getenv('DB_USER', 'finyx'), + password=os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), + database=os.getenv('DB_NAME', 'finyx'), + charset='utf8mb4' +) + +cursor = conn.cursor(pymysql.cursors.DictCursor) + +print("=" * 80) +print("修复重复字段") +print("=" * 80) + +# 1. 查找所有重复的 filed_code +cursor.execute(""" + SELECT filed_code, COUNT(*) as cnt, GROUP_CONCAT(id ORDER BY id) as field_ids + FROM f_polic_field + WHERE tenant_id = %s + GROUP BY filed_code + HAVING cnt > 1 +""", (TENANT_ID,)) + +duplicate_codes = cursor.fetchall() + +print(f"\n发现 {len(duplicate_codes)} 个重复的字段编码:\n") + +for dup in duplicate_codes: + code = dup['filed_code'] + field_ids = [int(x) for x in dup['field_ids'].split(',')] + + print(f"\n处理字段编码: {code}") + print(f" 字段ID列表: {field_ids}") + + # 获取每个字段的详细信息 + placeholders = ','.join(['%s'] * len(field_ids)) + cursor.execute(f""" + SELECT id, name, field_type, state + FROM f_polic_field + WHERE id IN ({placeholders}) + ORDER BY id + """, field_ids) + + fields = cursor.fetchall() + + # 获取每个字段的关联关系 + field_associations = {} + for field_id in field_ids: + cursor.execute(""" + SELECT COUNT(*) as cnt, GROUP_CONCAT(file_id) as file_ids + FROM f_polic_file_field + WHERE filed_id = %s + """, (field_id,)) + result = cursor.fetchone() + field_associations[field_id] = { + 'count': result['cnt'] if result else 0, + 'file_ids': result['file_ids'].split(',') if result and result['file_ids'] else [] + } + + print(f"\n 字段详情和关联关系:") + for field in fields: + assoc = field_associations[field['id']] + print(f" ID: {field['id']}, name: {field['name']}, " + f"field_type: {field['field_type']}, state: {field['state']}, " + f"关联模板数: {assoc['count']}") + + # 选择保留的字段(优先选择关联模板数最多的,如果相同则选择ID较小的) + fields_with_assoc = [(f, field_associations[f['id']]) for f in fields] + fields_with_assoc.sort(key=lambda x: (-x[1]['count'], x[0]['id'])) + + keep_field = fields_with_assoc[0][0] + remove_fields = [f for f, _ in fields_with_assoc[1:]] + + print(f"\n 保留字段: ID={keep_field['id']}, name={keep_field['name']}, " + f"关联模板数={field_associations[keep_field['id']]['count']}") + print(f" 删除字段: {[f['id'] for f in remove_fields]}") + + # 迁移关联关系:将删除字段的关联关系迁移到保留字段 + for remove_field in remove_fields: + remove_id = remove_field['id'] + keep_id = keep_field['id'] + + # 获取删除字段的所有关联 + cursor.execute(""" + SELECT file_id + FROM f_polic_file_field + WHERE filed_id = %s + """, (remove_id,)) + remove_assocs = cursor.fetchall() + + migrated_count = 0 + skipped_count = 0 + + for assoc in remove_assocs: + file_id = assoc['file_id'] + + # 检查保留字段是否已经关联了这个文件 + cursor.execute(""" + SELECT COUNT(*) as cnt + FROM f_polic_file_field + WHERE filed_id = %s AND file_id = %s + """, (keep_id, file_id)) + exists = cursor.fetchone()['cnt'] > 0 + + if not exists: + # 迁移关联关系 + cursor.execute(""" + UPDATE f_polic_file_field + SET filed_id = %s + WHERE filed_id = %s AND file_id = %s + """, (keep_id, remove_id, file_id)) + migrated_count += 1 + else: + # 如果已存在,直接删除重复的关联 + cursor.execute(""" + DELETE FROM f_polic_file_field + WHERE filed_id = %s AND file_id = %s + """, (remove_id, file_id)) + skipped_count += 1 + + print(f" 字段ID {remove_id} -> {keep_id}: 迁移 {migrated_count} 个关联, 跳过 {skipped_count} 个重复关联") + + # 删除字段的所有关联关系(应该已经迁移或删除完毕) + cursor.execute(""" + DELETE FROM f_polic_file_field + WHERE filed_id = %s + """, (remove_id,)) + + # 删除字段本身 + cursor.execute(""" + DELETE FROM f_polic_field + WHERE id = %s + """, (remove_id,)) + + print(f" 已删除字段 ID {remove_id} 及其关联关系") + +print("\n" + "=" * 80) +print("验证修复结果") +print("=" * 80) + +# 再次检查是否还有重复 +cursor.execute(""" + SELECT filed_code, COUNT(*) as cnt + FROM f_polic_field + WHERE tenant_id = %s + GROUP BY filed_code + HAVING cnt > 1 +""", (TENANT_ID,)) + +remaining_duplicates = cursor.fetchall() + +if remaining_duplicates: + print(f"\n警告:仍有 {len(remaining_duplicates)} 个重复的字段编码:") + for dup in remaining_duplicates: + print(f" {dup['filed_code']}: {dup['cnt']} 个") +else: + print("\n[OK] 所有重复字段已修复,filed_code 现在唯一") + +# 提交事务 +conn.commit() +print("\n[OK] 所有更改已提交到数据库") + +cursor.close() +conn.close() + diff --git a/services/document_service.py b/services/document_service.py index 8e7a64d..0f4111c 100644 --- a/services/document_service.py +++ b/services/document_service.py @@ -132,14 +132,61 @@ class DocumentService: """ try: print(f"[DEBUG] 开始填充模板: {template_path}") - print(f"[DEBUG] 字段数据: {field_data}") + print(f"[DEBUG] 传入的字段数据: {field_data}") # 打开模板文档 doc = Document(template_path) print(f"[DEBUG] 文档包含 {len(doc.paragraphs)} 个段落, {len(doc.tables)} 个表格") + # 第一步:扫描模板文档,找出所有占位符(格式:{{field_code}}) + # 使用正则表达式匹配所有占位符 + placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}') + all_placeholders_in_template = set() + + # 扫描段落中的占位符 + for paragraph in doc.paragraphs: + text = paragraph.text + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: + all_placeholders_in_template.add(field_code) + + # 扫描表格中的占位符 + for table in doc.tables: + for row in table.rows: + for cell in row.cells: + if hasattr(cell, 'paragraphs'): + for paragraph in cell.paragraphs: + text = paragraph.text + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: + all_placeholders_in_template.add(field_code) + + print(f"[DEBUG] 模板中发现 {len(all_placeholders_in_template)} 个不同的占位符: {sorted(all_placeholders_in_template)}") + + # 第二步:对于模板中存在的占位符,如果field_data中没有对应的值,则使用空字符串 + # 创建一个完整的字段数据字典,包含所有需要的字段 + complete_field_data = {} + for field_code in all_placeholders_in_template: + # 如果传入的数据中有该字段,使用传入的值;否则使用空字符串 + complete_field_data[field_code] = field_data.get(field_code, '') + + # 同时保留传入的字段数据(可能包含模板中没有的字段,虽然不会使用,但保留以兼容) + for field_code, field_value in field_data.items(): + if field_code not in complete_field_data: + complete_field_data[field_code] = field_value or '' + + print(f"[DEBUG] 完整的字段数据(包含默认空值): {complete_field_data}") + print(f"[DEBUG] 补充的空值字段: {sorted(set(complete_field_data.keys()) - set(field_data.keys()))}") + + # 使用完整的字段数据进行替换 + field_data = complete_field_data + def replace_placeholder_in_paragraph(paragraph): - """在段落中替换占位符(处理跨run的情况)""" + """在段落中替换占位符,保持原有格式(处理跨run的情况)""" try: # 获取段落完整文本 full_text = paragraph.text @@ -148,38 +195,120 @@ class DocumentService: # 检查是否有占位符需要替换 has_placeholder = False - replaced_text = full_text - replacement_count = 0 + for field_code in field_data.keys(): + placeholder = f"{{{{{field_code}}}}}" + if placeholder in full_text: + has_placeholder = True + break - # 遍历所有字段,替换所有匹配的占位符(包括重复的) + if not has_placeholder: + return + + # 收集所有runs及其位置和格式信息 + runs_info = [] + current_pos = 0 + for run in paragraph.runs: + run_text = run.text + run_start = current_pos + run_end = current_pos + len(run_text) + + # 保存run的格式信息 + format_info = {} + try: + if run.font.name: + format_info['font_name'] = run.font.name + if run.font.size: + format_info['font_size'] = run.font.size + if run.bold is not None: + format_info['bold'] = run.bold + if run.italic is not None: + format_info['italic'] = run.italic + if run.underline is not None: + format_info['underline'] = run.underline + if run.font.color and run.font.color.rgb: + format_info['color'] = run.font.color.rgb + except: + pass + + runs_info.append({ + 'run': run, + 'text': run_text, + 'start': run_start, + 'end': run_end, + 'format': format_info + }) + current_pos = run_end + + # 执行所有替换,构建最终文本 + final_text = full_text + replacement_count = 0 for field_code, field_value in field_data.items(): placeholder = f"{{{{{field_code}}}}}" - # 使用循环替换所有匹配项(不仅仅是第一个) - while placeholder in replaced_text: - has_placeholder = True + replacement_value = str(field_value) if field_value else '' + # 替换所有出现的占位符 + while placeholder in final_text: + final_text = final_text.replace(placeholder, replacement_value, 1) replacement_count += 1 - # 替换占位符,如果值为空则替换为空字符串 - replaced_text = replaced_text.replace(placeholder, str(field_value) if field_value else '', 1) - print(f"[DEBUG] 替换占位符: {placeholder} -> '{field_value}' (在段落中)") + print(f"[DEBUG] 替换占位符: {placeholder} -> '{replacement_value}'") - # 如果有替换,使用安全的方式更新段落文本 - if has_placeholder: - print(f"[DEBUG] 段落替换了 {replacement_count} 个占位符: '{full_text[:50]}...' -> '{replaced_text[:50]}...'") - try: - # 方法1:直接设置text(推荐,会自动处理run) - paragraph.text = replaced_text - except Exception as e1: - # 如果方法1失败,尝试方法2:手动处理run + # 找到包含占位符的第一个run,使用它的格式 + placeholder_run_format = None + for run_info in runs_info: + run_text = run_info['text'] + # 检查这个run是否包含任何占位符 + for field_code in field_data.keys(): + placeholder = f"{{{{{field_code}}}}}" + if placeholder in run_text: + placeholder_run_format = run_info['format'] + break + if placeholder_run_format: + break + + # 如果没有找到包含占位符的run,使用第一个run的格式 + if not placeholder_run_format and runs_info: + placeholder_run_format = runs_info[0]['format'] + + # 如果只有一个run,直接替换文本(会自动保持格式) + if len(runs_info) == 1: + runs_info[0]['run'].text = final_text + else: + # 多个run的情况:合并为一个run,保持格式 + # 先清空所有runs + for run_info in runs_info: + run_info['run'].text = '' + + # 在第一个run中添加替换后的文本 + first_run = runs_info[0]['run'] + first_run.text = final_text + + # 应用格式(使用包含占位符的run的格式,或第一个run的格式) + if placeholder_run_format: try: - # 清空所有run - paragraph.clear() - # 添加新的run - if replaced_text: - paragraph.add_run(replaced_text) - except Exception as e2: - # 如果两种方法都失败,记录错误但继续 - print(f"[WARN] 无法更新段落文本,方法1错误: {str(e1)}, 方法2错误: {str(e2)}") + if 'font_name' in placeholder_run_format: + first_run.font.name = placeholder_run_format['font_name'] + if 'font_size' in placeholder_run_format: + first_run.font.size = placeholder_run_format['font_size'] + if 'bold' in placeholder_run_format: + first_run.bold = placeholder_run_format['bold'] + if 'italic' in placeholder_run_format: + first_run.italic = placeholder_run_format['italic'] + if 'underline' in placeholder_run_format: + first_run.underline = placeholder_run_format['underline'] + if 'color' in placeholder_run_format: + first_run.font.color.rgb = placeholder_run_format['color'] + except Exception as fmt_error: + print(f"[WARN] 应用格式时出错: {str(fmt_error)}") + + # 删除其他空的runs(从后往前删除,避免索引问题) + for i in range(len(runs_info) - 1, 0, -1): + run_element = runs_info[i]['run']._element + try: + paragraph._element.remove(run_element) + except: pass + + print(f"[DEBUG] 段落替换了 {replacement_count} 个占位符(保持格式): '{full_text[:50]}...' -> '{final_text[:50]}...'") + except Exception as e: # 如果单个段落处理失败,记录错误但继续处理其他段落 print(f"[WARN] 处理段落时出错: {str(e)}") @@ -238,13 +367,14 @@ class DocumentService: print(f"[WARN] 处理表格时出错: {str(e)}") pass - # 验证是否还有未替换的占位符 + # 第三步:验证是否还有未替换的占位符(使用正则表达式匹配所有可能的占位符) remaining_placeholders = set() for paragraph in doc.paragraphs: text = paragraph.text - for field_code in field_data.keys(): - placeholder = f"{{{{{field_code}}}}}" - if placeholder in text: + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: remaining_placeholders.add(field_code) # 检查表格中的占位符 @@ -254,17 +384,20 @@ class DocumentService: if hasattr(cell, 'paragraphs'): for paragraph in cell.paragraphs: text = paragraph.text - for field_code in field_data.keys(): - placeholder = f"{{{{{field_code}}}}}" - if placeholder in text: + matches = placeholder_pattern.findall(text) + for match in matches: + field_code = match.strip() + if field_code: remaining_placeholders.add(field_code) # 输出统计信息 print(f"[DEBUG] 占位符替换统计:") + print(f" - 模板中的占位符总数: {len(all_placeholders_in_template)}") print(f" - 已替换的占位符: {sorted(replaced_placeholders)}") print(f" - 总替换次数: {total_replacements}") if remaining_placeholders: print(f" - ⚠️ 仍有未替换的占位符: {sorted(remaining_placeholders)}") + print(f" - ⚠️ 警告:文档中仍存在占位符,可能格式不正确或替换逻辑有问题") else: print(f" - ✓ 所有占位符已成功替换") diff --git a/static/index.html b/static/index.html index dacda09..5084751 100644 --- a/static/index.html +++ b/static/index.html @@ -587,21 +587,97 @@ } async function initGenerateTab() { - // 初始化默认字段(完整的虚拟测试数据) + // 初始化所有字段(完整的虚拟测试数据) + // 基本信息字段 addGenerateField('target_name', '张三'); addGenerateField('target_gender', '男'); addGenerateField('target_age', '34'); addGenerateField('target_date_of_birth', '199009'); + addGenerateField('target_date_of_birth_full', '1990年9月'); + addGenerateField('target_id_number', '530123199009123456'); + addGenerateField('target_ethnicity', '汉族'); + addGenerateField('target_place_of_origin', '云南普洱'); + addGenerateField('target_address', '云南省昆明市五华区某某街道某某小区1栋1单元101室'); + addGenerateField('target_registered_address', '云南省昆明市五华区某某街道某某小区1栋1单元101室'); + addGenerateField('target_contact', '13800138000'); + + // 组织和工作信息 addGenerateField('target_organization_and_position', '云南省农业机械公司党支部书记、经理'); addGenerateField('target_organization', '云南省农业机械公司'); addGenerateField('target_position', '党支部书记、经理'); addGenerateField('target_education_level', '研究生'); + addGenerateField('target_education', '研究生'); addGenerateField('target_political_status', '中共党员'); - addGenerateField('target_professional_rank', ''); - addGenerateField('clue_source', ''); + addGenerateField('target_professional_rank', '高级工程师'); + addGenerateField('target_occupation', '企业管理人员'); + addGenerateField('target_work_basic_info', '2005年8月参加工作,现任云南省农业机械公司党支部书记、经理'); + addGenerateField('target_work_history', '2004年8月至2005年2月,在云南省农业机械公司工作;2005年2月至2012年2月,历任云南省农业机械公司办公室副主任、主任、团委书记;2012年2月至2018年3月,任云南省农业机械公司支部书记、厂长;2018年3月至2020年3月,任云南省农业机械公司总经理助理、销售部部长;2020年3月至2022年3月,任云南省农业机械公司总经理助理;2022年3月至2022年7月,任云南省农业机械公司大理分公司副经理;2022年7月至2023年12月,任云南省农业机械公司西双版纳分公司经理;2023年12月至今,任云南省农业机械公司党支部书记、经理。'); + addGenerateField('target_basic_info', '张三,男,汉族,1990年9月出生,云南普洱人,研究生学历,中共党员,现任云南省农业机械公司党支部书记、经理。'); + + // 线索和问题信息 + addGenerateField('clue_info', '张三多次在私下聚会、网络群组中发表抹黑党中央决策部署的言论,传播歪曲党的理论和路线方针政策的错误观点,频繁接受管理服务对象安排的高档宴请、私人会所聚餐,以及高尔夫球、高端足浴等娱乐活动,相关费用均由对方全额承担,在干部选拔任用、岗位调整工作中,利用职务便利收受他人财物,利用职权为其亲属经营的公司谋取不正当利益,帮助该公司违规承接本单位及关联单位工程项目3个,合同总额超200万元,从中收受亲属给予的"感谢费"15万元;其本人沉迷赌博活动,每周至少参与1次大额赌资赌博,单次赌资超1万元,累计赌资达数十万元。'); + addGenerateField('target_basic_info_clue', '张三,男,汉族,1990年9月出生,云南普洱人,研究生学历,2005年8月参加工作,2006年10月加入中国共产党。2004年8月至2005年2月,在云南省农业机械公司工作;2005年2月至2012年2月,历任云南省农业机械公司办公室副主任、主任、团委书记;2012年2月至2018年3月,任云南省农业机械公司支部书记、厂长;2018年3月至2020年3月,任云南省农业机械公司总经理助理、销售部部长;2020年3月至2022年3月,任云南省农业机械公司总经理助理;2022年3月至2022年7月,任云南省农业机械公司大理分公司副经理;2022年7月至2023年12月,任云南省农业机械公司西双版纳分公司经理;2023年12月至今,任云南省农业机械公司党支部书记、经理。'); + addGenerateField('clue_source', '群众举报'); addGenerateField('target_issue_description', '张三多次在私下聚会、网络群组中发表抹黑党中央决策部署的言论,传播歪曲党的理论和路线方针政策的错误观点,频繁接受管理服务对象安排的高档宴请、私人会所聚餐,以及高尔夫球、高端足浴等娱乐活动,相关费用均由对方全额承担,在干部选拔任用、岗位调整工作中,利用职务便利收受他人财物,利用职权为其亲属经营的公司谋取不正当利益,帮助该公司违规承接本单位及关联单位工程项目3个,合同总额超200万元,从中收受亲属给予的"感谢费"15万元;其本人沉迷赌博活动,每周至少参与1次大额赌资赌博,单次赌资超1万元,累计赌资达数十万元。'); - addGenerateField('department_opinion', ''); - addGenerateField('filler_name', ''); + addGenerateField('target_problem_description', '违反政治纪律、组织纪律、廉洁纪律,涉嫌违纪违法'); + addGenerateField('target_issue_severity', '严重'); + addGenerateField('target_issue_severity_level', '严重'); + addGenerateField('target_other_issues_possibility', '较大'); + + // 个人情况评估 + addGenerateField('target_family_situation', '家庭关系和谐稳定'); + addGenerateField('target_social_relations', '社会交往较多,人际关系基本正常'); + addGenerateField('target_health_status', '良好'); + addGenerateField('target_personality', '开朗'); + addGenerateField('target_tolerance', '较强'); + addGenerateField('target_previous_investigation', '无'); + addGenerateField('target_negative_events', '无'); + addGenerateField('target_other_situation', '无'); + + // 谈话和调查相关 + addGenerateField('target_attitude', '配合调查'); + addGenerateField('target_confession_level', '部分承认'); + addGenerateField('target_behavior_during_interview', '情绪稳定,配合调查'); + addGenerateField('target_behavior_after_relief', '情绪有所缓解'); + addGenerateField('target_mental_burden_level', '中等'); + addGenerateField('target_risk_level', '中'); + addGenerateField('risk_level', '中'); + addGenerateField('pre_interview_risk_assessment_result', '风险等级:中,已制定安全预案'); + + // 调查组织和人员 + addGenerateField('investigation_unit_name', '纪检监察室'); + addGenerateField('investigation_team_code', 'JC2024001'); + addGenerateField('investigation_team_leader_name', '赵六'); + addGenerateField('investigation_team_member_names', '赵六、钱七、孙八'); + addGenerateField('investigation_location', '纪检监察室谈话室'); + addGenerateField('handler_name', '王五'); + addGenerateField('handling_department', '纪检监察室'); + addGenerateField('commission_name', '中共某某市纪律检查委员会'); + + // 谈话相关 + addGenerateField('interview_location', '纪检监察室谈话室'); + addGenerateField('proposed_interview_location', '纪检监察室谈话室'); + addGenerateField('notification_location', '纪检监察室'); + addGenerateField('appointment_location', '纪检监察室谈话室'); + addGenerateField('interview_time', '2024年12月10日14:00'); + addGenerateField('proposed_interview_time', '2024年12月10日14:00'); + addGenerateField('notification_time', '2024年12月9日'); + addGenerateField('appointment_time', '2024年12月10日14:00'); + addGenerateField('interview_reason', '就相关问题进行核实了解'); + addGenerateField('interview_count', '1'); + addGenerateField('interviewer', '赵六'); + addGenerateField('recorder', '钱七'); + addGenerateField('interview_personnel', '赵六、钱七'); + addGenerateField('interview_personnel_leader', '赵六'); + addGenerateField('interview_personnel_safety_officer', '孙八'); + addGenerateField('backup_personnel', '周九'); + + // 审批和意见 + addGenerateField('approval_time', '2024年12月8日'); + addGenerateField('report_card_request_time', '2024年12月8日'); + addGenerateField('department_opinion', '经初步核实,建议立案调查'); + addGenerateField('assessment_opinion', '建议进行谈话核实'); + addGenerateField('filler_name', '李四'); // 自动加载所有可用的文件列表 try { diff --git a/template_finish/2-初核模版/1.初核请示/~$初步核实审批表(XXX).docx b/template_finish/2-初核模版/1.初核请示/~$初步核实审批表(XXX).docx deleted file mode 100644 index 8efa2ad..0000000 Binary files a/template_finish/2-初核模版/1.初核请示/~$初步核实审批表(XXX).docx and /dev/null differ diff --git a/template_finish/2-初核模版/1.初核请示/~$请示报告卡(XXX).docx b/template_finish/2-初核模版/1.初核请示/~$请示报告卡(XXX).docx deleted file mode 100644 index 8efa2ad..0000000 Binary files a/template_finish/2-初核模版/1.初核请示/~$请示报告卡(XXX).docx and /dev/null differ diff --git a/template_finish/2-初核模版/3.初核结论/8-1请示报告卡(初核报告结论) .docx b/template_finish/2-初核模版/3.初核结论/8-1请示报告卡(初核报告结论) .docx index e42f7e5..04f208b 100644 Binary files a/template_finish/2-初核模版/3.初核结论/8-1请示报告卡(初核报告结论) .docx and b/template_finish/2-初核模版/3.初核结论/8-1请示报告卡(初核报告结论) .docx differ diff --git a/verify_field_uniqueness.py b/verify_field_uniqueness.py new file mode 100644 index 0000000..1fcf370 --- /dev/null +++ b/verify_field_uniqueness.py @@ -0,0 +1,103 @@ +"""验证字段唯一性""" +import pymysql +import os +from dotenv import load_dotenv + +load_dotenv() + +TENANT_ID = 615873064429507639 + +conn = pymysql.connect( + host=os.getenv('DB_HOST', '152.136.177.240'), + port=int(os.getenv('DB_PORT', 5012)), + user=os.getenv('DB_USER', 'finyx'), + password=os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), + database=os.getenv('DB_NAME', 'finyx'), + charset='utf8mb4' +) + +cursor = conn.cursor(pymysql.cursors.DictCursor) + +print("=" * 80) +print("验证字段唯一性") +print("=" * 80) + +# 检查 filed_code 重复 +cursor.execute(""" + SELECT filed_code, COUNT(*) as cnt, GROUP_CONCAT(id ORDER BY id) as field_ids + FROM f_polic_field + WHERE tenant_id = %s + GROUP BY filed_code + HAVING cnt > 1 +""", (TENANT_ID,)) + +duplicates = cursor.fetchall() + +if duplicates: + print(f"\n[ERROR] 发现 {len(duplicates)} 个重复的字段编码:") + for dup in duplicates: + print(f" {dup['filed_code']}: {dup['cnt']} 个字段 (IDs: {dup['field_ids']})") +else: + print("\n[OK] 所有 filed_code 都是唯一的") + +# 检查 name 重复 +cursor.execute(""" + SELECT name, COUNT(*) as cnt, GROUP_CONCAT(id ORDER BY id) as field_ids, + GROUP_CONCAT(filed_code ORDER BY id) as field_codes + FROM f_polic_field + WHERE tenant_id = %s + GROUP BY name + HAVING cnt > 1 +""", (TENANT_ID,)) + +name_duplicates = cursor.fetchall() + +if name_duplicates: + print(f"\n[WARN] 发现 {len(name_duplicates)} 个重复的字段名称:") + for dup in name_duplicates: + print(f" {dup['name']}: {dup['cnt']} 个字段") + print(f" 字段编码: {dup['field_codes']}") + print(f" 字段ID: {dup['field_ids']}") +else: + print("\n[OK] 所有字段名称都是唯一的") + +# 统计字段总数 +cursor.execute(""" + SELECT COUNT(*) as total, + COUNT(DISTINCT filed_code) as unique_codes, + COUNT(DISTINCT name) as unique_names + FROM f_polic_field + WHERE tenant_id = %s +""", (TENANT_ID,)) + +stats = cursor.fetchone() +print(f"\n字段统计:") +print(f" 总字段数: {stats['total']}") +print(f" 唯一字段编码数: {stats['unique_codes']}") +print(f" 唯一字段名称数: {stats['unique_names']}") + +# 检查孤立字段(没有关联任何模板的字段) +cursor.execute(""" + SELECT f.id, f.name, f.filed_code, f.field_type + FROM f_polic_field f + LEFT JOIN f_polic_file_field ff ON f.id = ff.filed_id + WHERE f.tenant_id = %s + AND ff.filed_id IS NULL + AND f.field_type = 2 + ORDER BY f.filed_code +""", (TENANT_ID,)) + +orphaned_fields = cursor.fetchall() + +if orphaned_fields: + print(f"\n[INFO] 发现 {len(orphaned_fields)} 个未关联任何模板的输出字段:") + for field in orphaned_fields[:10]: # 只显示前10个 + print(f" {field['filed_code']}: {field['name']}") + if len(orphaned_fields) > 10: + print(f" ... 还有 {len(orphaned_fields) - 10} 个") +else: + print("\n[OK] 所有输出字段都至少关联了一个模板") + +cursor.close() +conn.close() + diff --git a/修复重复字段总结.md b/修复重复字段总结.md new file mode 100644 index 0000000..fe66044 --- /dev/null +++ b/修复重复字段总结.md @@ -0,0 +1,99 @@ +# 修复重复字段总结 + +## 问题分析 + +通过分析 `f_polic_field` 表,发现存在以下重复问题: + +### 1. 重复的字段编码 (filed_code) + +发现 **2个重复的字段编码**: + +#### 问题1: `target_id_number` 重复 +- **字段1**: ID=1764836032902356, name="被核查人员身份证号码(带括号)", 关联模板数=0 +- **字段2**: ID=1764836032913357, name="被核查人员身份证号", 关联模板数=9 + +**处理方案**: +- 保留字段2(ID=1764836032913357),因为它关联了9个模板 +- 删除字段1(ID=1764836032902356),因为它没有关联任何模板 + +#### 问题2: `target_organization_and_position` 重复 +- **字段1**: ID=1764656917367205, name="被核查人员单位及职务", 关联模板数=4 +- **字段2**: ID=1764836032734251, name="被核查人单位及职务", 关联模板数=9 + +**处理方案**: +- 保留字段2(ID=1764836032734251),因为它关联了9个模板 +- 删除字段1(ID=1764656917367205),并将它的4个关联关系迁移到保留字段 + +## 修复操作 + +### 1. 迁移关联关系 +- 将删除字段的模板关联关系迁移到保留字段 +- 如果保留字段已经关联了某个模板,则跳过重复关联 + +### 2. 删除无效字段 +- 删除重复字段在 `f_polic_file_field` 表中的所有关联关系 +- 删除重复字段本身 + +### 3. 验证结果 +- ✅ 所有 `filed_code` 现在都是唯一的(74个字段,74个唯一编码) +- ✅ 所有字段名称现在都是唯一的(74个字段,74个唯一名称) +- ✅ 所有更改已提交到数据库 + +## 修复后的状态 + +### 字段统计 +- **总字段数**: 74 +- **唯一字段编码数**: 74 +- **唯一字段名称数**: 74 + +### 未关联模板的字段 +发现 38 个未关联任何模板的输出字段,这些字段可能是: +- 预留字段,供将来使用 +- 已废弃但未删除的字段 +- 特殊用途字段 + +这些字段不影响系统功能,可以保留或根据业务需求决定是否删除。 + +## 相关表结构 + +### f_polic_field(字段定义表) +- `id`: 字段ID(主键) +- `tenant_id`: 租户ID +- `name`: 字段名称 +- `filed_code`: 字段编码(**现在唯一**) +- `field_type`: 字段类型(1=输入字段,2=输出字段) +- `state`: 状态(0=未启用,1=启用) + +### f_polic_file_field(文件和字段关联表) +- `file_id`: 文件配置ID(关联 f_polic_file_config.id) +- `filed_id`: 字段ID(关联 f_polic_field.id) + +### f_polic_file_config(文件模板配置表) +- `id`: 文件配置ID(主键) +- `name`: 文件名称 +- `file_path`: MinIO文件路径 +- `state`: 状态(0=未启用,1=启用) + +## 修复脚本 + +使用的修复脚本: +- `analyze_duplicate_fields.py`: 分析重复字段 +- `fix_duplicate_fields.py`: 修复重复字段 +- `verify_field_uniqueness.py`: 验证修复结果 + +## 注意事项 + +1. **数据完整性**:修复过程中已确保所有模板关联关系都迁移到保留字段,不会丢失数据 +2. **唯一性约束**:虽然数据库表结构中没有对 `filed_code` 设置唯一约束,但通过修复已确保数据唯一性 +3. **建议**:可以考虑在数据库层面为 `filed_code` 添加唯一索引,防止将来再次出现重复 + +## 建议的数据库优化 + +```sql +-- 为 filed_code 添加唯一索引(建议执行) +ALTER TABLE f_polic_field +ADD UNIQUE INDEX idx_tenant_filed_code (tenant_id, filed_code); +``` + +这样可以确保在数据库层面防止重复的 `filed_code`。 +