diff --git a/__pycache__/app.cpython-312.pyc b/__pycache__/app.cpython-312.pyc index 9d2c16f..1521c3a 100644 Binary files a/__pycache__/app.cpython-312.pyc and b/__pycache__/app.cpython-312.pyc differ diff --git a/app.py b/app.py index 9d8da78..447f995 100644 --- a/app.py +++ b/app.py @@ -5,6 +5,7 @@ from flask import Flask, request, jsonify, send_from_directory from flask_cors import CORS from flasgger import Swagger import os +import pymysql from datetime import datetime from dotenv import load_dotenv @@ -284,6 +285,85 @@ def extract(): return error_response(2001, f"AI解析超时或发生错误: {str(e)}") +@app.route('/api/file-configs', methods=['GET']) +def get_file_configs(): + """ + 获取可用的文件配置列表 + 用于查询可用的fileId,供文档生成接口使用 + + --- + tags: + - 字段配置 + summary: 获取文件配置列表 + description: 返回所有启用的文件配置,包含fileId和文件名称 + responses: + 200: + description: 成功 + schema: + type: object + properties: + code: + type: integer + example: 0 + data: + type: object + properties: + fileConfigs: + type: array + items: + type: object + properties: + fileId: + type: integer + description: 文件配置ID + example: 1765273961563507 + fileName: + type: string + description: 文件名称 + example: 1.请示报告卡(XXX) + filePath: + type: string + description: MinIO文件路径 + example: /615873064429507639/TEMPLATE/2025/12/1.请示报告卡(XXX).docx + isSuccess: + type: boolean + example: true + """ + try: + conn = document_service.get_connection() + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + sql = """ + SELECT id, name, file_path + FROM f_polic_file_config + WHERE tenant_id = %s + AND state = 1 + ORDER BY name + """ + cursor.execute(sql, (document_service.tenant_id,)) + configs = cursor.fetchall() + + file_configs = [] + for config in configs: + file_configs.append({ + 'fileId': config['id'], + 'fileName': config['name'], + 'filePath': config['file_path'] or '' + }) + + return success_response({ + 'fileConfigs': file_configs + }) + + finally: + cursor.close() + conn.close() + + except Exception as e: + return error_response(500, f"查询文件配置失败: {str(e)}") + + @app.route('/api/fields', methods=['GET']) def get_fields(): """ @@ -575,11 +655,12 @@ def generate_document(): first_document_name = None # 用于存储第一个生成的文档名 for file_info in file_list: - file_id = file_info.get('fileId') - file_name = file_info.get('fileName', '') + # 兼容 id 和 fileId 两种字段 + file_id = file_info.get('fileId') or file_info.get('id') + file_name = file_info.get('fileName') or file_info.get('name', '') if not file_id: - return error_response(1001, f"文件 {file_name} 缺少fileId参数") + return error_response(1001, f"文件 {file_name} 缺少fileId或id参数") try: # 生成文档(使用fileId而不是templateCode) @@ -625,6 +706,137 @@ def generate_document(): return error_response(3001, f"文档生成失败: {str(e)}") +<<<<<<< HEAD +@app.route('/fPolicTask/getDocument', methods=['POST']) +def get_document_by_task(): + """ + 通过taskId获取文档(兼容接口) + 支持通过taskId查询关联的文件列表,或直接使用提供的文件列表 + """ + try: + data = request.get_json() + + # 验证请求参数 + if not data: + return error_response(400, "请求参数不能为空") + + task_id = data.get('taskId') + input_data = data.get('inputData', []) + file_list = data.get('fpolicFieldParamFileList', []) + + # 如果没有提供file_list,尝试通过taskId查询 + if not file_list and task_id: + try: + conn = document_service.get_connection() + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 尝试从f_polic_task表查询关联的文件列表 + # 注意:这里需要根据实际表结构调整SQL + sql = """ + SELECT file_id, file_name + FROM f_polic_task_file + WHERE task_id = %s + AND tenant_id = %s + AND state = 1 + """ + cursor.execute(sql, (task_id, document_service.tenant_id)) + task_files = cursor.fetchall() + + if task_files: + file_list = [] + for tf in task_files: + file_list.append({ + 'fileId': tf['file_id'], + 'fileName': tf.get('file_name', '') + }) + except Exception as e: + # 如果表不存在或查询失败,记录日志但不报错 + print(f"[WARN] 无法通过taskId查询文件列表: {str(e)}") + finally: + cursor.close() + conn.close() + except Exception as e: + print(f"[WARN] 查询taskId关联文件时出错: {str(e)}") + + # 如果仍然没有file_list,返回错误 + if not file_list: + return error_response(400, "缺少fpolicFieldParamFileList参数,且无法通过taskId查询到关联文件。请提供fpolicFieldParamFileList参数,格式: [{'fileId': 文件ID, 'fileName': '文件名'}]") + + if not input_data or not isinstance(input_data, list): + return error_response(400, "inputData参数必须是非空数组") + + if not file_list or not isinstance(file_list, list): + return error_response(400, "fpolicFieldParamFileList参数必须是非空数组") + + # 将input_data转换为字典格式(用于生成文档名称) + field_data = {} + for item in input_data: + field_code = item.get('fieldCode', '') + field_value = item.get('fieldValue', '') + if field_code: + field_data[field_code] = field_value or '' + + # 生成文档ID + document_id = document_service.generate_document_id() + + # 处理每个文件 + result_file_list = [] + first_document_name = None # 用于存储第一个生成的文档名 + + for file_info in file_list: + # 兼容 id 和 fileId 两种字段 + file_id = file_info.get('fileId') or file_info.get('id') + file_name = file_info.get('fileName') or file_info.get('name', '') + + if not file_id: + return error_response(1001, f"文件 {file_name} 缺少fileId或id参数") + + try: + # 生成文档(使用fileId而不是templateCode) + result = document_service.generate_document( + file_id=file_id, + input_data=input_data, + file_info=file_info + ) + + # 使用生成的文档名称(.docx格式),而不是原始文件名 + generated_file_name = result.get('fileName', file_name) + + # 保存第一个文档名作为 documentName + if first_document_name is None: + first_document_name = generated_file_name + + result_file_list.append({ + 'fileId': file_id, + 'fileName': generated_file_name, # 使用生成的文档名 + 'filePath': result['filePath'] + }) + + except Exception as e: + error_msg = str(e) + if '不存在' in error_msg or '模板' in error_msg: + return error_response(1001, error_msg) + elif '生成' in error_msg or '填充' in error_msg: + return error_response(3001, error_msg) + elif '上传' in error_msg or '保存' in error_msg: + return error_response(3002, error_msg) + else: + return error_response(3001, f"文件生成失败: {error_msg}") + + # 构建返回数据(不包含inputData,只返回生成的文档信息) + return success_response({ + 'documentId': document_id, + 'documentName': first_document_name or 'generated.docx', # 使用第一个生成的文档名 + 'fpolicFieldParamFileList': result_file_list + }) + + except Exception as e: + return error_response(3001, f"文档生成失败: {str(e)}") + + +======= +>>>>>>> parent of 4897c96 (添加通过taskId获取文档的接口,支持文件列表查询和参数验证,增强错误处理能力。同时,优化文档生成逻辑,确保生成的文档名称和路径的准确性。) if __name__ == '__main__': # 确保static目录存在 os.makedirs('static', exist_ok=True) diff --git a/generate_download_urls.py b/generate_download_urls.py index df8b19e..c91226d 100644 --- a/generate_download_urls.py +++ b/generate_download_urls.py @@ -23,8 +23,13 @@ BUCKET_NAME = 'finyx' # 文件相对路径列表 FILE_PATHS = [ +<<<<<<< HEAD '/615873064429507639/20251211112544/初步核实审批表_张三.docx', '/615873064429507639/20251211112545/请示报告卡_张三.docx' +======= + '/615873064429507639/20251211101046/1_张三.docx', + '/615873064429507639/20251211101046/1_张三.docx' +>>>>>>> e3f4a394c1a4333db2fd3a9383be29fa9d9055e0 ] def generate_download_urls(): diff --git a/generate_template_file_id_report.py b/generate_template_file_id_report.py new file mode 100644 index 0000000..3098e5d --- /dev/null +++ b/generate_template_file_id_report.py @@ -0,0 +1,219 @@ +""" +生成模板 file_id 和关联关系的详细报告 +重点检查每个模板的 file_id 是否正确,以及 f_polic_file_field 表的关联关系 +""" +import sys +import pymysql +from pathlib import Path +from typing import Dict, List +from collections import defaultdict + +# 设置控制台编码为UTF-8(Windows兼容) +if sys.platform == 'win32': + try: + sys.stdout.reconfigure(encoding='utf-8') + sys.stderr.reconfigure(encoding='utf-8') + except: + pass + +# 数据库连接配置 +DB_CONFIG = { + 'host': '152.136.177.240', + 'port': 5012, + 'user': 'finyx', + 'password': '6QsGK6MpePZDE57Z', + 'database': 'finyx', + 'charset': 'utf8mb4' +} + +TENANT_ID = 615873064429507639 + + +def generate_detailed_report(): + """生成详细的 file_id 和关联关系报告""" + print("="*80) + print("模板 file_id 和关联关系详细报告") + print("="*80) + + # 连接数据库 + try: + conn = pymysql.connect(**DB_CONFIG) + print("\n[OK] 数据库连接成功\n") + except Exception as e: + print(f"\n[ERROR] 数据库连接失败: {e}") + return + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + # 1. 查询所有有 file_path 的模板(实际模板文件,不是目录节点) + cursor.execute(""" + SELECT id, name, template_code, file_path, state, parent_id + FROM f_polic_file_config + WHERE tenant_id = %s AND file_path IS NOT NULL AND file_path != '' + ORDER BY name, id + """, (TENANT_ID,)) + + all_templates = cursor.fetchall() + + print(f"总模板数(有 file_path): {len(all_templates)}\n") + + # 2. 查询每个模板的关联字段 + template_field_map = defaultdict(list) + + cursor.execute(""" + SELECT + fff.file_id, + fff.filed_id, + fff.state as relation_state, + fc.name as template_name, + fc.template_code, + f.name as field_name, + f.filed_code, + f.field_type, + CASE + WHEN f.field_type = 1 THEN '输入字段' + WHEN f.field_type = 2 THEN '输出字段' + ELSE '未知' + END as field_type_name + FROM f_polic_file_field fff + INNER JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id + INNER JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s + ORDER BY fff.file_id, f.field_type, f.name + """, (TENANT_ID,)) + + all_relations = cursor.fetchall() + + for rel in all_relations: + template_field_map[rel['file_id']].append(rel) + + # 3. 按模板分组显示 + print("="*80) + print("每个模板的 file_id 和关联字段详情") + print("="*80) + + # 按名称分组,显示重复的模板 + templates_by_name = defaultdict(list) + for template in all_templates: + templates_by_name[template['name']].append(template) + + duplicate_templates = {name: tmpls for name, tmpls in templates_by_name.items() if len(tmpls) > 1} + + if duplicate_templates: + print("\n[WARN] 发现重复名称的模板:\n") + for name, tmpls in duplicate_templates.items(): + print(f" 模板名称: {name}") + for tmpl in tmpls: + field_count = len(template_field_map.get(tmpl['id'], [])) + input_count = sum(1 for f in template_field_map.get(tmpl['id'], []) if f['field_type'] == 1) + output_count = sum(1 for f in template_field_map.get(tmpl['id'], []) if f['field_type'] == 2) + print(f" - file_id: {tmpl['id']}") + print(f" template_code: {tmpl.get('template_code', 'N/A')}") + print(f" file_path: {tmpl.get('file_path', 'N/A')}") + print(f" 关联字段: 总计 {field_count} 个 (输入 {input_count}, 输出 {output_count})") + print() + + # 4. 显示每个模板的详细信息 + print("\n" + "="*80) + print("所有模板的 file_id 和关联字段统计") + print("="*80) + + for template in all_templates: + file_id = template['id'] + name = template['name'] + template_code = template.get('template_code', 'N/A') + file_path = template.get('file_path', 'N/A') + + fields = template_field_map.get(file_id, []) + input_fields = [f for f in fields if f['field_type'] == 1] + output_fields = [f for f in fields if f['field_type'] == 2] + + print(f"\n模板: {name}") + print(f" file_id: {file_id}") + print(f" template_code: {template_code}") + print(f" file_path: {file_path}") + print(f" 关联字段: 总计 {len(fields)} 个") + print(f" - 输入字段 (field_type=1): {len(input_fields)} 个") + print(f" - 输出字段 (field_type=2): {len(output_fields)} 个") + + if len(fields) == 0: + print(f" [WARN] 该模板没有关联任何字段") + + # 5. 检查关联关系的完整性 + print("\n" + "="*80) + print("关联关系完整性检查") + print("="*80) + + # 检查是否有 file_id 在 f_polic_file_field 中但没有对应的文件配置 + cursor.execute(""" + SELECT DISTINCT fff.file_id + FROM f_polic_file_field fff + LEFT JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id + WHERE fff.tenant_id = %s AND fc.id IS NULL + """, (TENANT_ID,)) + orphan_file_ids = cursor.fetchall() + + if orphan_file_ids: + print(f"\n[ERROR] 发现孤立的 file_id(在 f_polic_file_field 中但不在 f_polic_file_config 中):") + for item in orphan_file_ids: + print(f" - file_id: {item['file_id']}") + else: + print("\n[OK] 所有关联关系的 file_id 都有效") + + # 检查是否有 filed_id 在 f_polic_file_field 中但没有对应的字段 + cursor.execute(""" + SELECT DISTINCT fff.filed_id + FROM f_polic_file_field fff + LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s AND f.id IS NULL + """, (TENANT_ID,)) + orphan_field_ids = cursor.fetchall() + + if orphan_field_ids: + print(f"\n[ERROR] 发现孤立的 filed_id(在 f_polic_file_field 中但不在 f_polic_field 中):") + for item in orphan_field_ids: + print(f" - filed_id: {item['filed_id']}") + else: + print("\n[OK] 所有关联关系的 filed_id 都有效") + + # 6. 统计汇总 + print("\n" + "="*80) + print("统计汇总") + print("="*80) + + total_templates = len(all_templates) + templates_with_fields = len([t for t in all_templates if len(template_field_map.get(t['id'], [])) > 0]) + templates_without_fields = total_templates - templates_with_fields + + total_relations = len(all_relations) + total_input_relations = sum(1 for r in all_relations if r['field_type'] == 1) + total_output_relations = sum(1 for r in all_relations if r['field_type'] == 2) + + print(f"\n模板统计:") + print(f" 总模板数: {total_templates}") + print(f" 有关联字段的模板: {templates_with_fields}") + print(f" 无关联字段的模板: {templates_without_fields}") + + print(f"\n关联关系统计:") + print(f" 总关联关系数: {total_relations}") + print(f" 输入字段关联: {total_input_relations}") + print(f" 输出字段关联: {total_output_relations}") + + if duplicate_templates: + print(f"\n[WARN] 发现 {len(duplicate_templates)} 个模板名称有重复记录") + print(" 建议: 确认每个模板应该使用哪个 file_id,并清理重复记录") + + if templates_without_fields: + print(f"\n[WARN] 发现 {templates_without_fields} 个模板没有关联任何字段") + print(" 建议: 检查这些模板是否需要关联字段") + + finally: + cursor.close() + conn.close() + print("\n数据库连接已关闭") + + +if __name__ == '__main__': + generate_detailed_report() + diff --git a/get_available_file_ids.py b/get_available_file_ids.py new file mode 100644 index 0000000..177074f --- /dev/null +++ b/get_available_file_ids.py @@ -0,0 +1,64 @@ +""" +获取所有可用的文件ID列表(用于测试) +""" +import pymysql +import os + +# 数据库连接配置 +DB_CONFIG = { + 'host': os.getenv('DB_HOST', '152.136.177.240'), + 'port': int(os.getenv('DB_PORT', 5012)), + 'user': os.getenv('DB_USER', 'finyx'), + 'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), + 'database': os.getenv('DB_NAME', 'finyx'), + 'charset': 'utf8mb4' +} + +TENANT_ID = 615873064429507639 + +def get_available_file_configs(): + """获取所有可用的文件配置""" + conn = pymysql.connect(**DB_CONFIG) + cursor = conn.cursor(pymysql.cursors.DictCursor) + + try: + sql = """ + SELECT id, name, file_path, state + FROM f_polic_file_config + WHERE tenant_id = %s + AND state = 1 + ORDER BY name + """ + cursor.execute(sql, (TENANT_ID,)) + configs = cursor.fetchall() + + print("="*80) + print("可用的文件配置列表(state=1)") + print("="*80) + print(f"\n共找到 {len(configs)} 个启用的文件配置:\n") + + for i, config in enumerate(configs, 1): + print(f"{i}. ID: {config['id']}") + print(f" 名称: {config['name']}") + print(f" 文件路径: {config['file_path'] or '(空)'}") + print() + + # 输出JSON格式,方便复制 + print("\n" + "="*80) + print("JSON格式(可用于测试):") + print("="*80) + print("[") + for i, config in enumerate(configs): + comma = "," if i < len(configs) - 1 else "" + print(f' {{"fileId": {config["id"]}, "fileName": "{config["name"]}.doc"}}{comma}') + print("]") + + return configs + + finally: + cursor.close() + conn.close() + +if __name__ == '__main__': + get_available_file_configs() + diff --git a/services/document_service.py b/services/document_service.py index 794b45a..841e7db 100644 --- a/services/document_service.py +++ b/services/document_service.py @@ -131,9 +131,80 @@ class DocumentService: 填充后的文档路径 """ try: + print(f"[DEBUG] 开始填充模板: {template_path}") + print(f"[DEBUG] 字段数据: {field_data}") + # 打开模板文档 doc = Document(template_path) + print(f"[DEBUG] 文档包含 {len(doc.paragraphs)} 个段落, {len(doc.tables)} 个表格") +<<<<<<< HEAD + def replace_placeholder_in_paragraph(paragraph): + """在段落中替换占位符(处理跨run的情况)""" + try: + # 获取段落完整文本 + full_text = paragraph.text + if not full_text: + return + + # 检查是否有占位符需要替换 + has_placeholder = False + replaced_text = full_text + replacement_count = 0 + + # 遍历所有字段,替换所有匹配的占位符(包括重复的) + for field_code, field_value in field_data.items(): + placeholder = f"{{{{{field_code}}}}}" + # 使用循环替换所有匹配项(不仅仅是第一个) + while placeholder in replaced_text: + has_placeholder = True + replacement_count += 1 + # 替换占位符,如果值为空则替换为空字符串 + replaced_text = replaced_text.replace(placeholder, str(field_value) if field_value else '', 1) + print(f"[DEBUG] 替换占位符: {placeholder} -> '{field_value}' (在段落中)") + + # 如果有替换,使用安全的方式更新段落文本 + if has_placeholder: + print(f"[DEBUG] 段落替换了 {replacement_count} 个占位符: '{full_text[:50]}...' -> '{replaced_text[:50]}...'") + try: + # 方法1:直接设置text(推荐,会自动处理run) + paragraph.text = replaced_text + except Exception as e1: + # 如果方法1失败,尝试方法2:手动处理run + try: + # 清空所有run + paragraph.clear() + # 添加新的run + if replaced_text: + paragraph.add_run(replaced_text) + except Exception as e2: + # 如果两种方法都失败,记录错误但继续 + print(f"[WARN] 无法更新段落文本,方法1错误: {str(e1)}, 方法2错误: {str(e2)}") + pass + except Exception as e: + # 如果单个段落处理失败,记录错误但继续处理其他段落 + print(f"[WARN] 处理段落时出错: {str(e)}") + import traceback + print(traceback.format_exc()) + pass + + # 统计替换信息 + total_replacements = 0 + replaced_placeholders = set() + + # 替换段落中的占位符 + for para_idx, paragraph in enumerate(doc.paragraphs): + before_text = paragraph.text + replace_placeholder_in_paragraph(paragraph) + after_text = paragraph.text + if before_text != after_text: + # 检查哪些占位符被替换了 + for field_code in field_data.keys(): + placeholder = f"{{{{{field_code}}}}}" + if placeholder in before_text and placeholder not in after_text: + replaced_placeholders.add(field_code) + total_replacements += before_text.count(placeholder) +======= # 替换占位符 {{field_code}} 为实际值 for paragraph in doc.paragraphs: # 替换段落文本中的占位符 @@ -144,11 +215,73 @@ class DocumentService: for run in paragraph.runs: if placeholder in run.text: run.text = run.text.replace(placeholder, field_value or '') +>>>>>>> parent of 4897c96 (添加通过taskId获取文档的接口,支持文件列表查询和参数验证,增强错误处理能力。同时,优化文档生成逻辑,确保生成的文档名称和路径的准确性。) # 替换表格中的占位符 + try: + for table in doc.tables: + if not table.rows: + continue + for row in table.rows: + if not row.cells: + continue + for cell in row.cells: + try: + # 检查cell是否有paragraphs属性且不为空 + if hasattr(cell, 'paragraphs'): + # 安全地获取paragraphs列表 + paragraphs = list(cell.paragraphs) if cell.paragraphs else [] + for paragraph in paragraphs: + before_text = paragraph.text + replace_placeholder_in_paragraph(paragraph) + after_text = paragraph.text + if before_text != after_text: + # 检查哪些占位符被替换了 + for field_code in field_data.keys(): + placeholder = f"{{{{{field_code}}}}}" + if placeholder in before_text and placeholder not in after_text: + replaced_placeholders.add(field_code) + total_replacements += before_text.count(placeholder) + except Exception as e: + # 如果单个单元格处理失败,记录错误但继续处理其他单元格 + print(f"[WARN] 处理表格单元格时出错: {str(e)}") + pass + except Exception as e: + # 如果表格处理失败,记录错误但继续保存文档 + print(f"[WARN] 处理表格时出错: {str(e)}") + pass + + # 验证是否还有未替换的占位符 + remaining_placeholders = set() + for paragraph in doc.paragraphs: + text = paragraph.text + for field_code in field_data.keys(): + placeholder = f"{{{{{field_code}}}}}" + if placeholder in text: + remaining_placeholders.add(field_code) + + # 检查表格中的占位符 for table in doc.tables: for row in table.rows: for cell in row.cells: +<<<<<<< HEAD + if hasattr(cell, 'paragraphs'): + for paragraph in cell.paragraphs: + text = paragraph.text + for field_code in field_data.keys(): + placeholder = f"{{{{{field_code}}}}}" + if placeholder in text: + remaining_placeholders.add(field_code) + + # 输出统计信息 + print(f"[DEBUG] 占位符替换统计:") + print(f" - 已替换的占位符: {sorted(replaced_placeholders)}") + print(f" - 总替换次数: {total_replacements}") + if remaining_placeholders: + print(f" - ⚠️ 仍有未替换的占位符: {sorted(remaining_placeholders)}") + else: + print(f" - ✓ 所有占位符已成功替换") +======= for paragraph in cell.paragraphs: for field_code, field_value in field_data.items(): placeholder = f"{{{{{field_code}}}}}" @@ -156,16 +289,26 @@ class DocumentService: for run in paragraph.runs: if placeholder in run.text: run.text = run.text.replace(placeholder, field_value or '') +>>>>>>> parent of 4897c96 (添加通过taskId获取文档的接口,支持文件列表查询和参数验证,增强错误处理能力。同时,优化文档生成逻辑,确保生成的文档名称和路径的准确性。) # 保存到临时文件 temp_dir = tempfile.gettempdir() output_file = os.path.join(temp_dir, f"filled_{datetime.now().strftime('%Y%m%d%H%M%S')}.docx") doc.save(output_file) + print(f"[DEBUG] 文档已保存到: {output_file}") return output_file + except IndexError as e: + # 索引越界错误,提供更详细的错误信息 + import traceback + error_detail = traceback.format_exc() + raise Exception(f"填充模板失败: list index out of range. 详细信息: {str(e)}\n{error_detail}") except Exception as e: - raise Exception(f"填充模板失败: {str(e)}") + # 其他错误,提供详细的错误信息 + import traceback + error_detail = traceback.format_exc() + raise Exception(f"填充模板失败: {str(e)}\n{error_detail}") def upload_to_minio(self, file_path: str, file_name: str) -> str: """ @@ -183,8 +326,9 @@ class DocumentService: try: # 生成MinIO对象路径(相对路径) now = datetime.now() - # 使用日期路径组织文件 - object_name = f"{self.tenant_id}/{now.strftime('%Y%m%d%H%M%S')}/{file_name}" + # 使用日期路径组织文件,添加微秒确保唯一性 + timestamp = f"{now.strftime('%Y%m%d%H%M%S')}{now.microsecond:06d}" + object_name = f"{self.tenant_id}/{timestamp}/{file_name}" # 上传文件 client.fput_object( @@ -215,7 +359,12 @@ class DocumentService: # 获取文件配置 file_config = self.get_file_config_by_id(file_id) if not file_config: - raise Exception(f"文件ID {file_id} 对应的模板不存在或未启用") + # 提供更详细的错误信息 + raise Exception( + f"文件ID {file_id} 对应的模板不存在或未启用。" + f"请通过查询 f_polic_file_config 表获取有效的文件ID," + f"或访问 /api/file-configs 接口查看可用的文件配置列表。" + ) # 检查file_path是否存在 file_path = file_config.get('file_path') @@ -240,8 +389,15 @@ class DocumentService: filled_doc_path = self.fill_template(template_path, field_data) # 生成文档名称(.docx格式) - original_file_name = file_info.get('fileName', 'generated.doc') + # 优先使用file_info中的fileName,如果没有则使用数据库中的name + # 确保每个文件都使用自己的文件名 + original_file_name = file_info.get('fileName') or file_info.get('name') or file_config.get('name', 'generated.doc') + print(f"[DEBUG] 文件ID: {file_id}, 原始文件名: {original_file_name}") + print(f"[DEBUG] file_info内容: {file_info}") + print(f"[DEBUG] file_config内容: {file_config}") + print(f"[DEBUG] 字段数据用于生成文档名: {field_data}") generated_file_name = self.generate_document_name(original_file_name, field_data) + print(f"[DEBUG] 文件ID: {file_id}, 生成的文档名: {generated_file_name}") # 上传到MinIO(使用生成的文档名) file_path = self.upload_to_minio(filled_doc_path, generated_file_name) @@ -282,16 +438,62 @@ class DocumentService: field_data: 字段数据 Returns: - 生成的文档名称,如 "初步核实审批表_张三.docx" + 生成的文档名称,如 "请示报告卡_张三.docx" """ + import re + # 提取文件基础名称(不含扩展名) - base_name = Path(original_file_name).stem + # 处理可能包含路径的情况 + # 先移除路径,只保留文件名 + file_name_only = Path(original_file_name).name + + # 判断是否有扩展名(.doc, .docx等) + # 如果最后有常见的文档扩展名,则提取stem + if file_name_only.lower().endswith(('.doc', '.docx', '.txt', '.pdf')): + base_name = Path(file_name_only).stem + else: + # 如果没有扩展名,直接使用文件名 + base_name = file_name_only + + print(f"[DEBUG] 原始文件名: '{original_file_name}'") + print(f"[DEBUG] 提取的基础名称(清理前): '{base_name}'") + + # 清理文件名中的特殊标记 + # 1. 移除开头的数字和点(如 "1."、"2." 等),但保留后面的内容 + # 使用非贪婪匹配,只匹配开头的数字和点 + base_name = re.sub(r'^\d+\.\s*', '', base_name) + + # 2. 移除括号及其内容(如 "(XXX)"、"(初核谈话)" 等) + base_name = re.sub(r'[((].*?[))]', '', base_name) + + # 3. 清理首尾空白字符和多余的点 + base_name = base_name.strip().strip('.') + + # 4. 如果清理后为空或只有数字,使用原始文件名重新处理 + if not base_name or base_name.isdigit(): + print(f"[DEBUG] 清理后为空或只有数字,重新处理原始文件名") + # 从原始文件名中提取,但保留更多内容 + temp_name = file_name_only + # 只移除括号,保留数字前缀(但格式化为更友好的形式) + temp_name = re.sub(r'[((].*?[))]', '', temp_name) + # 移除扩展名(如果存在) + if temp_name.lower().endswith(('.doc', '.docx', '.txt', '.pdf')): + temp_name = Path(temp_name).stem + temp_name = temp_name.strip().strip('.') + if temp_name: + base_name = temp_name + else: + base_name = "文档" # 最后的备选方案 + + print(f"[DEBUG] 清理后的基础名称: '{base_name}'") # 尝试从字段数据中提取被核查人姓名作为后缀 suffix = '' - if 'target_name' in field_data and field_data['target_name']: - suffix = f"_{field_data['target_name']}" + target_name = field_data.get('target_name', '') + if target_name and target_name.strip(): + suffix = f"_{target_name.strip()}" +<<<<<<< HEAD # 生成新文件名 return f"{base_name}{suffix}.docx" @@ -328,4 +530,11 @@ class DocumentService: # 如果生成URL失败,记录错误但不影响主流程 print(f"生成预签名URL失败: {str(e)}") return None +======= + # 生成新文件名(确保是.docx格式) + generated_name = f"{base_name}{suffix}.docx" + print(f"[DEBUG] 文档名称生成: '{original_file_name}' -> '{generated_name}' (base_name='{base_name}', suffix='{suffix}')") + + return generated_name +>>>>>>> e3f4a394c1a4333db2fd3a9383be29fa9d9055e0 diff --git a/static/index.html b/static/index.html index aca06fc..3a4e600 100644 --- a/static/index.html +++ b/static/index.html @@ -327,10 +327,13 @@
+
+ + +
-
@@ -548,27 +551,81 @@ // ==================== 文档生成接口相关 ==================== - function initGenerateTab() { + async function loadAvailableFiles() { + try { + const response = await fetch('/api/file-configs'); + const result = await response.json(); + + if (result.isSuccess && result.data && result.data.fileConfigs) { + const container = document.getElementById('fileListContainer'); + container.innerHTML = ''; // 清空现有列表 + + // 只添加有filePath的文件(有模板文件的) + const filesWithPath = result.data.fileConfigs.filter(f => f.filePath); + + if (filesWithPath.length === 0) { + alert('没有找到可用的文件配置(需要有filePath)'); + return; + } + + // 添加前5个文件作为示例 + filesWithPath.slice(0, 5).forEach(file => { + addFileItem(file.fileId, file.fileName); + }); + + if (filesWithPath.length > 5) { + alert(`已加载前5个文件,共找到 ${filesWithPath.length} 个可用文件`); + } else { + alert(`已加载 ${filesWithPath.length} 个可用文件`); + } + } else { + alert('获取文件列表失败: ' + (result.errorMsg || '未知错误')); + } + } catch (error) { + alert('加载文件列表失败: ' + error.message); + } + } + + async function initGenerateTab() { // 初始化默认字段(完整的虚拟测试数据) addGenerateField('target_name', '张三'); addGenerateField('target_gender', '男'); - addGenerateField('target_age', '44'); - addGenerateField('target_date_of_birth', '198005'); - addGenerateField('target_organization_and_position', '某公司总经理'); - addGenerateField('target_organization', '某公司'); - addGenerateField('target_position', '总经理'); - addGenerateField('target_education_level', '本科'); + addGenerateField('target_age', '34'); + addGenerateField('target_date_of_birth', '199009'); + addGenerateField('target_organization_and_position', '云南省农业机械公司党支部书记、经理'); + addGenerateField('target_organization', '云南省农业机械公司'); + addGenerateField('target_position', '党支部书记、经理'); + addGenerateField('target_education_level', '研究生'); addGenerateField('target_political_status', '中共党员'); - addGenerateField('target_professional_rank', '正处级'); - addGenerateField('clue_source', '群众举报'); - addGenerateField('target_issue_description', '违反国家计划生育有关政策规定,于2010年10月生育二胎。'); - addGenerateField('department_opinion', '建议进行初步核实'); - addGenerateField('filler_name', '李四'); + addGenerateField('target_professional_rank', ''); + addGenerateField('clue_source', ''); + addGenerateField('target_issue_description', '张三多次在私下聚会、网络群组中发表抹黑党中央决策部署的言论,传播歪曲党的理论和路线方针政策的错误观点,频繁接受管理服务对象安排的高档宴请、私人会所聚餐,以及高尔夫球、高端足浴等娱乐活动,相关费用均由对方全额承担,在干部选拔任用、岗位调整工作中,利用职务便利收受他人财物,利用职权为其亲属经营的公司谋取不正当利益,帮助该公司违规承接本单位及关联单位工程项目3个,合同总额超200万元,从中收受亲属给予的"感谢费"15万元;其本人沉迷赌博活动,每周至少参与1次大额赌资赌博,单次赌资超1万元,累计赌资达数十万元。'); + addGenerateField('department_opinion', ''); + addGenerateField('filler_name', ''); - // 初始化默认文件(使用fileId,不再需要templateCode) - // fileId可以从f_polic_file_config表查询获取 - addFileItem(1765273961883544, '初步核实审批表.doc'); // 2.初步核实审批表(XXX) - addFileItem(1765273961563507, '请示报告卡.doc'); // 1.请示报告卡(XXX) + // 自动加载可用的文件列表(只加载前2个作为示例) + try { + const response = await fetch('/api/file-configs'); + const result = await response.json(); + + if (result.isSuccess && result.data && result.data.fileConfigs) { + // 只添加有filePath的文件(有模板文件的) + const filesWithPath = result.data.fileConfigs.filter(f => f.filePath); + + // 添加前2个文件作为示例 + filesWithPath.slice(0, 2).forEach(file => { + addFileItem(file.fileId, file.fileName); + }); + } else { + // 如果加载失败,使用默认的fileId + addFileItem(1765273961883544, '初步核实审批表.doc'); // 2.初步核实审批表(XXX) + addFileItem(1765273961563507, '请示报告卡.doc'); // 1.请示报告卡(XXX) + } + } catch (error) { + // 如果加载失败,使用默认的fileId + addFileItem(1765273961883544, '初步核实审批表.doc'); + addFileItem(1765273961563507, '请示报告卡.doc'); + } } function addGenerateField(fieldCode = '', fieldValue = '') { diff --git a/update_all_templates.py b/update_all_templates.py new file mode 100644 index 0000000..7024ee9 --- /dev/null +++ b/update_all_templates.py @@ -0,0 +1,467 @@ +""" +更新 template_finish 目录下所有模板文件 +重新上传到 MinIO 并更新数据库信息,确保模板文件是最新版本 +""" +import os +import sys +import json +import pymysql +from minio import Minio +from minio.error import S3Error +from datetime import datetime +from pathlib import Path +from typing import Dict, List, Optional + +# 设置控制台编码为UTF-8(Windows兼容) +if sys.platform == 'win32': + try: + sys.stdout.reconfigure(encoding='utf-8') + sys.stderr.reconfigure(encoding='utf-8') + except: + pass + +# MinIO连接配置 +MINIO_CONFIG = { + 'endpoint': 'minio.datacubeworld.com:9000', + 'access_key': 'JOLXFXny3avFSzB0uRA5', + 'secret_key': 'G1BR8jStNfovkfH5ou39EmPl34E4l7dGrnd3Cz0I', + 'secure': True # 使用HTTPS +} + +# 数据库连接配置 +DB_CONFIG = { + 'host': '152.136.177.240', + 'port': 5012, + 'user': 'finyx', + 'password': '6QsGK6MpePZDE57Z', + 'database': 'finyx', + 'charset': 'utf8mb4' +} + +# 固定值 +TENANT_ID = 615873064429507639 +CREATED_BY = 655162080928945152 +UPDATED_BY = 655162080928945152 +BUCKET_NAME = 'finyx' + +# 项目根目录 +PROJECT_ROOT = Path(__file__).parent +TEMPLATES_DIR = PROJECT_ROOT / "template_finish" + +# 文档类型映射(根据完整文件名识别,保持原文件名不变) +# 每个文件名都是独立的模板,使用完整文件名作为key +DOCUMENT_TYPE_MAPPING = { + "1.请示报告卡(XXX)": { + "template_code": "REPORT_CARD", + "name": "1.请示报告卡(XXX)", + "business_type": "INVESTIGATION" + }, + "2.初步核实审批表(XXX)": { + "template_code": "PRELIMINARY_VERIFICATION_APPROVAL", + "name": "2.初步核实审批表(XXX)", + "business_type": "INVESTIGATION" + }, + "3.附件初核方案(XXX)": { + "template_code": "INVESTIGATION_PLAN", + "name": "3.附件初核方案(XXX)", + "business_type": "INVESTIGATION" + }, + "谈话通知书第一联": { + "template_code": "NOTIFICATION_LETTER_1", + "name": "谈话通知书第一联", + "business_type": "INVESTIGATION" + }, + "谈话通知书第二联": { + "template_code": "NOTIFICATION_LETTER_2", + "name": "谈话通知书第二联", + "business_type": "INVESTIGATION" + }, + "谈话通知书第三联": { + "template_code": "NOTIFICATION_LETTER_3", + "name": "谈话通知书第三联", + "business_type": "INVESTIGATION" + }, + "1.请示报告卡(初核谈话)": { + "template_code": "REPORT_CARD_INTERVIEW", + "name": "1.请示报告卡(初核谈话)", + "business_type": "INVESTIGATION" + }, + "2谈话审批表": { + "template_code": "INTERVIEW_APPROVAL_FORM", + "name": "2谈话审批表", + "business_type": "INVESTIGATION" + }, + "3.谈话前安全风险评估表": { + "template_code": "PRE_INTERVIEW_RISK_ASSESSMENT", + "name": "3.谈话前安全风险评估表", + "business_type": "INVESTIGATION" + }, + "4.谈话方案": { + "template_code": "INTERVIEW_PLAN", + "name": "4.谈话方案", + "business_type": "INVESTIGATION" + }, + "5.谈话后安全风险评估表": { + "template_code": "POST_INTERVIEW_RISK_ASSESSMENT", + "name": "5.谈话后安全风险评估表", + "business_type": "INVESTIGATION" + }, + "1.谈话笔录": { + "template_code": "INTERVIEW_RECORD", + "name": "1.谈话笔录", + "business_type": "INVESTIGATION" + }, + "2.谈话询问对象情况摸底调查30问": { + "template_code": "INVESTIGATION_30_QUESTIONS", + "name": "2.谈话询问对象情况摸底调查30问", + "business_type": "INVESTIGATION" + }, + "3.被谈话人权利义务告知书": { + "template_code": "RIGHTS_OBLIGATIONS_NOTICE", + "name": "3.被谈话人权利义务告知书", + "business_type": "INVESTIGATION" + }, + "4.点对点交接单": { + "template_code": "HANDOVER_FORM", + "name": "4.点对点交接单", + "business_type": "INVESTIGATION" + }, + "4.点对点交接单2": { + "template_code": "HANDOVER_FORM_2", + "name": "4.点对点交接单2", + "business_type": "INVESTIGATION" + }, + "5.陪送交接单(新)": { + "template_code": "ESCORT_HANDOVER_FORM", + "name": "5.陪送交接单(新)", + "business_type": "INVESTIGATION" + }, + "6.1保密承诺书(谈话对象使用-非中共党员用)": { + "template_code": "CONFIDENTIALITY_COMMITMENT_NON_PARTY", + "name": "6.1保密承诺书(谈话对象使用-非中共党员用)", + "business_type": "INVESTIGATION" + }, + "6.2保密承诺书(谈话对象使用-中共党员用)": { + "template_code": "CONFIDENTIALITY_COMMITMENT_PARTY", + "name": "6.2保密承诺书(谈话对象使用-中共党员用)", + "business_type": "INVESTIGATION" + }, + "7.办案人员-办案安全保密承诺书": { + "template_code": "INVESTIGATOR_CONFIDENTIALITY_COMMITMENT", + "name": "7.办案人员-办案安全保密承诺书", + "business_type": "INVESTIGATION" + }, + "8-1请示报告卡(初核报告结论) ": { + "template_code": "REPORT_CARD_CONCLUSION", + "name": "8-1请示报告卡(初核报告结论) ", + "business_type": "INVESTIGATION" + }, + "8.XXX初核情况报告": { + "template_code": "INVESTIGATION_REPORT", + "name": "8.XXX初核情况报告", + "business_type": "INVESTIGATION" + } +} + + +def identify_document_type(file_name: str) -> Optional[Dict]: + """ + 根据完整文件名识别文档类型(保持原文件名不变) + + Args: + file_name: 文件名(不含扩展名) + + Returns: + 文档类型配置,如果无法识别返回None + """ + # 获取文件名(不含扩展名),保持原样 + base_name = Path(file_name).stem + + # 直接使用完整文件名进行精确匹配 + if base_name in DOCUMENT_TYPE_MAPPING: + return DOCUMENT_TYPE_MAPPING[base_name] + + # 如果精确匹配失败,返回None(不进行任何修改或模糊匹配) + return None + + +def upload_to_minio(file_path: Path, minio_client: Minio) -> str: + """ + 上传文件到MinIO(覆盖已存在的文件) + + Args: + file_path: 本地文件路径 + minio_client: MinIO客户端实例 + + Returns: + MinIO中的相对路径 + """ + try: + # 检查存储桶是否存在 + found = minio_client.bucket_exists(BUCKET_NAME) + if not found: + raise Exception(f"存储桶 '{BUCKET_NAME}' 不存在,请先创建") + + # 生成MinIO对象路径(使用当前日期,确保是最新版本) + now = datetime.now() + object_name = f'{TENANT_ID}/TEMPLATE/{now.year}/{now.month:02d}/{file_path.name}' + + # 上传文件(fput_object 会自动覆盖已存在的文件) + minio_client.fput_object( + BUCKET_NAME, + object_name, + str(file_path), + content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document' + ) + + # 返回相对路径(以/开头) + return f"/{object_name}" + + except S3Error as e: + raise Exception(f"MinIO错误: {e}") + except Exception as e: + raise Exception(f"上传文件时发生错误: {e}") + + +def update_file_config(conn, doc_config: Dict, file_path: str) -> int: + """ + 更新或创建文件配置记录 + + Args: + conn: 数据库连接 + doc_config: 文档配置 + file_path: MinIO文件路径 + + Returns: + 文件配置ID + """ + cursor = conn.cursor() + current_time = datetime.now() + + try: + # 检查是否已存在(通过 template_code 查找) + select_sql = """ + SELECT id, name, file_path FROM f_polic_file_config + WHERE tenant_id = %s AND template_code = %s + """ + cursor.execute(select_sql, (TENANT_ID, doc_config['template_code'])) + existing = cursor.fetchone() + + # 构建 input_data + input_data = json.dumps({ + 'template_code': doc_config['template_code'], + 'business_type': doc_config['business_type'] + }, ensure_ascii=False) + + if existing: + file_config_id, old_name, old_path = existing + # 更新现有记录 + update_sql = """ + UPDATE f_polic_file_config + SET file_path = %s, + input_data = %s, + name = %s, + updated_time = %s, + updated_by = %s, + state = 1 + WHERE id = %s AND tenant_id = %s + """ + cursor.execute(update_sql, ( + file_path, + input_data, + doc_config['name'], + current_time, + UPDATED_BY, + file_config_id, + TENANT_ID + )) + conn.commit() + print(f" [OK] 更新数据库记录 (ID: {file_config_id})") + if old_path != file_path: + print(f" 旧路径: {old_path}") + print(f" 新路径: {file_path}") + return file_config_id + else: + # 创建新记录 + import time + import random + timestamp = int(time.time() * 1000) + random_part = random.randint(100000, 999999) + file_config_id = timestamp * 1000 + random_part + + insert_sql = """ + INSERT INTO f_polic_file_config + (id, tenant_id, parent_id, name, input_data, file_path, template_code, + created_time, created_by, updated_time, updated_by, state) + VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + """ + cursor.execute(insert_sql, ( + file_config_id, + TENANT_ID, + None, # parent_id + doc_config['name'], + input_data, + file_path, + doc_config['template_code'], + current_time, + CREATED_BY, + current_time, + CREATED_BY, + 1 # state: 1表示启用 + )) + conn.commit() + print(f" [OK] 创建新数据库记录 (ID: {file_config_id})") + return file_config_id + + except Exception as e: + conn.rollback() + raise Exception(f"更新数据库失败: {str(e)}") + finally: + cursor.close() + + +def update_all_templates(): + """ + 更新所有模板文件,重新上传到MinIO并更新数据库 + """ + print("="*80) + print("开始更新所有模板文件") + print("="*80) + print(f"模板目录: {TEMPLATES_DIR}") + print() + + if not TEMPLATES_DIR.exists(): + print(f"错误: 模板目录不存在: {TEMPLATES_DIR}") + return + + # 连接数据库和MinIO + try: + conn = pymysql.connect(**DB_CONFIG) + print("[OK] 数据库连接成功") + + minio_client = Minio( + MINIO_CONFIG['endpoint'], + access_key=MINIO_CONFIG['access_key'], + secret_key=MINIO_CONFIG['secret_key'], + secure=MINIO_CONFIG['secure'] + ) + + # 检查存储桶 + if not minio_client.bucket_exists(BUCKET_NAME): + raise Exception(f"存储桶 '{BUCKET_NAME}' 不存在,请先创建") + print("[OK] MinIO连接成功") + print() + + except Exception as e: + print(f"[ERROR] 连接失败: {e}") + return + + # 统计信息 + processed_count = 0 + updated_count = 0 + created_count = 0 + skipped_count = 0 + failed_count = 0 + failed_files = [] + + # 遍历所有.docx文件 + print("="*80) + print("开始处理模板文件...") + print("="*80) + print() + + for root, dirs, files in os.walk(TEMPLATES_DIR): + for file in files: + # 只处理.docx文件,跳过临时文件 + if not file.endswith('.docx') or file.startswith('~$'): + continue + + file_path = Path(root) / file + + # 识别文档类型 + doc_config = identify_document_type(file) + + if not doc_config: + print(f"\n[{processed_count + skipped_count + failed_count + 1}] [WARN] 跳过: {file}") + print(f" 原因: 无法识别文档类型") + print(f" 路径: {file_path}") + skipped_count += 1 + continue + + processed_count += 1 + print(f"\n[{processed_count}] 处理: {file}") + print(f" 类型: {doc_config.get('template_code', 'UNKNOWN')}") + print(f" 名称: {doc_config.get('name', 'UNKNOWN')}") + print(f" 路径: {file_path}") + + try: + # 检查文件是否存在 + if not file_path.exists(): + raise FileNotFoundError(f"文件不存在: {file_path}") + + # 获取文件信息 + file_size = file_path.stat().st_size + file_mtime = datetime.fromtimestamp(file_path.stat().st_mtime) + print(f" 大小: {file_size:,} 字节") + print(f" 修改时间: {file_mtime.strftime('%Y-%m-%d %H:%M:%S')}") + + # 上传到MinIO(覆盖旧版本) + print(f" 上传到MinIO...") + minio_path = upload_to_minio(file_path, minio_client) + print(f" [OK] MinIO路径: {minio_path}") + + # 更新数据库 + print(f" 更新数据库...") + file_config_id = update_file_config(conn, doc_config, minio_path) + + # 判断是更新还是创建 + cursor = conn.cursor() + check_sql = """ + SELECT created_time, updated_time FROM f_polic_file_config + WHERE id = %s + """ + cursor.execute(check_sql, (file_config_id,)) + result = cursor.fetchone() + cursor.close() + + if result: + created_time, updated_time = result + if created_time == updated_time: + created_count += 1 + else: + updated_count += 1 + + print(f" [OK] 处理成功 (配置ID: {file_config_id})") + + except Exception as e: + failed_count += 1 + failed_files.append((str(file_path), str(e))) + print(f" [ERROR] 处理失败: {e}") + import traceback + traceback.print_exc() + + # 关闭数据库连接 + conn.close() + + # 输出统计信息 + print("\n" + "="*80) + print("更新完成") + print("="*80) + print(f"总处理数: {processed_count}") + print(f" 成功更新: {updated_count}") + print(f" 成功创建: {created_count}") + print(f" 跳过: {skipped_count}") + print(f" 失败: {failed_count}") + + if failed_files: + print("\n失败的文件:") + for file_path, error in failed_files: + print(f" - {file_path}") + print(f" 错误: {error}") + + print("\n所有模板文件已更新到最新版本!") + + +if __name__ == '__main__': + update_all_templates() + diff --git a/verify_template_file_id_relations.py b/verify_template_file_id_relations.py new file mode 100644 index 0000000..1939178 --- /dev/null +++ b/verify_template_file_id_relations.py @@ -0,0 +1,531 @@ +""" +检查模板的 file_id 和相关关联关系是否正确 +重点检查: +1. f_polic_file_config 表中的模板记录(file_id) +2. f_polic_file_field 表中的关联关系(file_id 和 filed_id 的对应关系) +""" +import sys +import pymysql +from pathlib import Path +from typing import Dict, List, Set, Tuple +from collections import defaultdict + +# 设置控制台编码为UTF-8(Windows兼容) +if sys.platform == 'win32': + try: + sys.stdout.reconfigure(encoding='utf-8') + sys.stderr.reconfigure(encoding='utf-8') + except: + pass + +# 数据库连接配置 +DB_CONFIG = { + 'host': '152.136.177.240', + 'port': 5012, + 'user': 'finyx', + 'password': '6QsGK6MpePZDE57Z', + 'database': 'finyx', + 'charset': 'utf8mb4' +} + +# 固定值 +TENANT_ID = 615873064429507639 + +# 项目根目录 +PROJECT_ROOT = Path(__file__).parent +TEMPLATES_DIR = PROJECT_ROOT / "template_finish" + +# 文档类型映射(用于识别模板) +DOCUMENT_TYPE_MAPPING = { + "1.请示报告卡(XXX)": "REPORT_CARD", + "2.初步核实审批表(XXX)": "PRELIMINARY_VERIFICATION_APPROVAL", + "3.附件初核方案(XXX)": "INVESTIGATION_PLAN", + "谈话通知书第一联": "NOTIFICATION_LETTER_1", + "谈话通知书第二联": "NOTIFICATION_LETTER_2", + "谈话通知书第三联": "NOTIFICATION_LETTER_3", + "1.请示报告卡(初核谈话)": "REPORT_CARD_INTERVIEW", + "2谈话审批表": "INTERVIEW_APPROVAL_FORM", + "3.谈话前安全风险评估表": "PRE_INTERVIEW_RISK_ASSESSMENT", + "4.谈话方案": "INTERVIEW_PLAN", + "5.谈话后安全风险评估表": "POST_INTERVIEW_RISK_ASSESSMENT", + "1.谈话笔录": "INTERVIEW_RECORD", + "2.谈话询问对象情况摸底调查30问": "INVESTIGATION_30_QUESTIONS", + "3.被谈话人权利义务告知书": "RIGHTS_OBLIGATIONS_NOTICE", + "4.点对点交接单": "HANDOVER_FORM", + "4.点对点交接单2": "HANDOVER_FORM_2", + "5.陪送交接单(新)": "ESCORT_HANDOVER_FORM", + "6.1保密承诺书(谈话对象使用-非中共党员用)": "CONFIDENTIALITY_COMMITMENT_NON_PARTY", + "6.2保密承诺书(谈话对象使用-中共党员用)": "CONFIDENTIALITY_COMMITMENT_PARTY", + "7.办案人员-办案安全保密承诺书": "INVESTIGATOR_CONFIDENTIALITY_COMMITMENT", + "8-1请示报告卡(初核报告结论) ": "REPORT_CARD_CONCLUSION", + "8.XXX初核情况报告": "INVESTIGATION_REPORT" +} + + +def get_template_files() -> Dict[str, Path]: + """获取所有模板文件""" + templates = {} + if not TEMPLATES_DIR.exists(): + return templates + + for root, dirs, files in os.walk(TEMPLATES_DIR): + for file in files: + if file.endswith('.docx') and not file.startswith('~$'): + file_path = Path(root) / file + base_name = Path(file).stem + if base_name in DOCUMENT_TYPE_MAPPING: + templates[base_name] = file_path + + return templates + + +def check_file_configs(conn) -> Dict: + """检查 f_polic_file_config 表中的模板记录""" + print("\n" + "="*80) + print("1. 检查 f_polic_file_config 表中的模板记录") + print("="*80) + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + # 查询所有模板记录 + cursor.execute(""" + SELECT id, name, template_code, file_path, state, parent_id + FROM f_polic_file_config + WHERE tenant_id = %s + ORDER BY name + """, (TENANT_ID,)) + + all_configs = cursor.fetchall() + + # 按 template_code 和 name 组织数据 + configs_by_code = {} + configs_by_name = {} + + for config in all_configs: + config_id = config['id'] + name = config['name'] + template_code = config.get('template_code') + + if template_code: + if template_code not in configs_by_code: + configs_by_code[template_code] = [] + configs_by_code[template_code].append(config) + + if name: + if name not in configs_by_name: + configs_by_name[name] = [] + configs_by_name[name].append(config) + + print(f"\n总模板记录数: {len(all_configs)}") + print(f"按 template_code 分组: {len(configs_by_code)} 个不同的 template_code") + print(f"按 name 分组: {len(configs_by_name)} 个不同的 name") + + # 检查重复的 template_code + duplicate_codes = {code: configs for code, configs in configs_by_code.items() if len(configs) > 1} + if duplicate_codes: + print(f"\n[WARN] 发现重复的 template_code ({len(duplicate_codes)} 个):") + for code, configs in duplicate_codes.items(): + print(f" - {code}: {len(configs)} 条记录") + for cfg in configs: + print(f" ID: {cfg['id']}, 名称: {cfg['name']}, 路径: {cfg.get('file_path', 'N/A')}") + + # 检查重复的 name + duplicate_names = {name: configs for name, configs in configs_by_name.items() if len(configs) > 1} + if duplicate_names: + print(f"\n[WARN] 发现重复的 name ({len(duplicate_names)} 个):") + for name, configs in duplicate_names.items(): + print(f" - {name}: {len(configs)} 条记录") + for cfg in configs: + print(f" ID: {cfg['id']}, template_code: {cfg.get('template_code', 'N/A')}, 路径: {cfg.get('file_path', 'N/A')}") + + # 检查未启用的记录 + disabled_configs = [cfg for cfg in all_configs if cfg.get('state') != 1] + if disabled_configs: + print(f"\n[WARN] 发现未启用的模板记录 ({len(disabled_configs)} 个):") + for cfg in disabled_configs: + print(f" - ID: {cfg['id']}, 名称: {cfg['name']}, 状态: {cfg.get('state')}") + + # 检查 file_path 为空的记录 + empty_path_configs = [cfg for cfg in all_configs if not cfg.get('file_path')] + if empty_path_configs: + print(f"\n[WARN] 发现 file_path 为空的记录 ({len(empty_path_configs)} 个):") + for cfg in empty_path_configs: + print(f" - ID: {cfg['id']}, 名称: {cfg['name']}, template_code: {cfg.get('template_code', 'N/A')}") + + cursor.close() + + return { + 'all_configs': all_configs, + 'configs_by_code': configs_by_code, + 'configs_by_name': configs_by_name, + 'duplicate_codes': duplicate_codes, + 'duplicate_names': duplicate_names, + 'disabled_configs': disabled_configs, + 'empty_path_configs': empty_path_configs + } + + +def check_file_field_relations(conn) -> Dict: + """检查 f_polic_file_field 表中的关联关系""" + print("\n" + "="*80) + print("2. 检查 f_polic_file_field 表中的关联关系") + print("="*80) + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + # 查询所有关联关系 + cursor.execute(""" + SELECT fff.id, fff.file_id, fff.filed_id, fff.state, fff.tenant_id + FROM f_polic_file_field fff + WHERE fff.tenant_id = %s + ORDER BY fff.file_id, fff.filed_id + """, (TENANT_ID,)) + + all_relations = cursor.fetchall() + + print(f"\n总关联关系数: {len(all_relations)}") + + # 检查无效的 file_id(关联到不存在的文件配置) + cursor.execute(""" + SELECT fff.id, fff.file_id, fff.filed_id + FROM f_polic_file_field fff + LEFT JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id + WHERE fff.tenant_id = %s AND fc.id IS NULL + """, (TENANT_ID,)) + invalid_file_relations = cursor.fetchall() + + # 检查无效的 filed_id(关联到不存在的字段) + cursor.execute(""" + SELECT fff.id, fff.file_id, fff.filed_id + FROM f_polic_file_field fff + LEFT JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s AND f.id IS NULL + """, (TENANT_ID,)) + invalid_field_relations = cursor.fetchall() + + # 检查重复的关联关系(相同的 file_id 和 filed_id) + cursor.execute(""" + SELECT file_id, filed_id, COUNT(*) as count, GROUP_CONCAT(id ORDER BY id) as ids + FROM f_polic_file_field + WHERE tenant_id = %s + GROUP BY file_id, filed_id + HAVING COUNT(*) > 1 + """, (TENANT_ID,)) + duplicate_relations = cursor.fetchall() + + # 检查关联到未启用文件的记录 + cursor.execute(""" + SELECT fff.id, fff.file_id, fff.filed_id, fc.name as file_name, fc.state as file_state + FROM f_polic_file_field fff + INNER JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id + WHERE fff.tenant_id = %s AND fc.state != 1 + """, (TENANT_ID,)) + disabled_file_relations = cursor.fetchall() + + # 检查关联到未启用字段的记录 + cursor.execute(""" + SELECT fff.id, fff.file_id, fff.filed_id, f.name as field_name, f.filed_code, f.state as field_state + FROM f_polic_file_field fff + INNER JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s AND f.state != 1 + """, (TENANT_ID,)) + disabled_field_relations = cursor.fetchall() + + # 统计每个文件关联的字段数量 + file_field_counts = defaultdict(int) + for rel in all_relations: + file_field_counts[rel['file_id']] += 1 + + print(f"\n文件关联字段统计:") + print(f" 有关联关系的文件数: {len(file_field_counts)}") + if file_field_counts: + max_count = max(file_field_counts.values()) + min_count = min(file_field_counts.values()) + avg_count = sum(file_field_counts.values()) / len(file_field_counts) + print(f" 每个文件关联字段数: 最少 {min_count}, 最多 {max_count}, 平均 {avg_count:.1f}") + + # 输出检查结果 + if invalid_file_relations: + print(f"\n[ERROR] 发现无效的 file_id 关联 ({len(invalid_file_relations)} 条):") + for rel in invalid_file_relations[:10]: # 只显示前10条 + print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}") + if len(invalid_file_relations) > 10: + print(f" ... 还有 {len(invalid_file_relations) - 10} 条") + else: + print(f"\n[OK] 所有 file_id 关联都有效") + + if invalid_field_relations: + print(f"\n[ERROR] 发现无效的 filed_id 关联 ({len(invalid_field_relations)} 条):") + for rel in invalid_field_relations[:10]: # 只显示前10条 + print(f" - 关联ID: {rel['id']}, file_id: {rel['file_id']}, filed_id: {rel['filed_id']}") + if len(invalid_field_relations) > 10: + print(f" ... 还有 {len(invalid_field_relations) - 10} 条") + else: + print(f"\n[OK] 所有 filed_id 关联都有效") + + if duplicate_relations: + print(f"\n[WARN] 发现重复的关联关系 ({len(duplicate_relations)} 组):") + for dup in duplicate_relations[:10]: # 只显示前10组 + print(f" - file_id: {dup['file_id']}, filed_id: {dup['filed_id']}, 重复次数: {dup['count']}, 关联ID: {dup['ids']}") + if len(duplicate_relations) > 10: + print(f" ... 还有 {len(duplicate_relations) - 10} 组") + else: + print(f"\n[OK] 没有重复的关联关系") + + if disabled_file_relations: + print(f"\n[WARN] 发现关联到未启用文件的记录 ({len(disabled_file_relations)} 条):") + for rel in disabled_file_relations[:10]: + print(f" - 文件: {rel['file_name']} (ID: {rel['file_id']}, 状态: {rel['file_state']})") + if len(disabled_file_relations) > 10: + print(f" ... 还有 {len(disabled_file_relations) - 10} 条") + + if disabled_field_relations: + print(f"\n[WARN] 发现关联到未启用字段的记录 ({len(disabled_field_relations)} 条):") + for rel in disabled_field_relations[:10]: + print(f" - 字段: {rel['field_name']} ({rel['filed_code']}, ID: {rel['filed_id']}, 状态: {rel['field_state']})") + if len(disabled_field_relations) > 10: + print(f" ... 还有 {len(disabled_field_relations) - 10} 条") + + cursor.close() + + return { + 'all_relations': all_relations, + 'invalid_file_relations': invalid_file_relations, + 'invalid_field_relations': invalid_field_relations, + 'duplicate_relations': duplicate_relations, + 'disabled_file_relations': disabled_file_relations, + 'disabled_field_relations': disabled_field_relations, + 'file_field_counts': dict(file_field_counts) + } + + +def check_template_file_mapping(conn, file_configs: Dict) -> Dict: + """检查模板文件与数据库记录的映射关系""" + print("\n" + "="*80) + print("3. 检查模板文件与数据库记录的映射关系") + print("="*80) + + import os + templates = get_template_files() + + print(f"\n本地模板文件数: {len(templates)}") + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + # 检查每个模板文件是否在数据库中有对应记录 + missing_in_db = [] + found_in_db = [] + duplicate_mappings = [] + + for template_name, file_path in templates.items(): + template_code = DOCUMENT_TYPE_MAPPING.get(template_name) + if not template_code: + continue + + # 通过 name 和 template_code 查找对应的数据库记录 + # 优先通过 name 精确匹配,然后通过 template_code 匹配 + matching_configs = [] + + # 1. 通过 name 精确匹配 + if template_name in file_configs['configs_by_name']: + for config in file_configs['configs_by_name'][template_name]: + if config.get('file_path'): # 有文件路径的记录 + matching_configs.append(config) + + # 2. 通过 template_code 匹配 + if template_code in file_configs['configs_by_code']: + for config in file_configs['configs_by_code'][template_code]: + if config.get('file_path') and config not in matching_configs: + matching_configs.append(config) + + if len(matching_configs) == 0: + missing_in_db.append({ + 'template_name': template_name, + 'template_code': template_code, + 'file_path': str(file_path) + }) + elif len(matching_configs) == 1: + config = matching_configs[0] + found_in_db.append({ + 'template_name': template_name, + 'template_code': template_code, + 'file_id': config['id'], + 'file_path': config.get('file_path'), + 'name': config.get('name') + }) + else: + # 多个匹配,选择 file_path 最新的(包含最新日期的) + duplicate_mappings.append({ + 'template_name': template_name, + 'template_code': template_code, + 'matching_configs': matching_configs + }) + # 仍然记录第一个作为找到的记录 + config = matching_configs[0] + found_in_db.append({ + 'template_name': template_name, + 'template_code': template_code, + 'file_id': config['id'], + 'file_path': config.get('file_path'), + 'name': config.get('name'), + 'is_duplicate': True + }) + + print(f"\n找到数据库记录的模板: {len(found_in_db)}") + print(f"未找到数据库记录的模板: {len(missing_in_db)}") + print(f"有重复映射的模板: {len(duplicate_mappings)}") + + if duplicate_mappings: + print(f"\n[WARN] 以下模板文件在数据库中有多个匹配记录:") + for item in duplicate_mappings: + print(f" - {item['template_name']} (template_code: {item['template_code']}):") + for cfg in item['matching_configs']: + print(f" * file_id: {cfg['id']}, name: {cfg.get('name')}, path: {cfg.get('file_path', 'N/A')}") + + if missing_in_db: + print(f"\n[WARN] 以下模板文件在数据库中没有对应记录:") + for item in missing_in_db: + print(f" - {item['template_name']} (template_code: {item['template_code']})") + + cursor.close() + + return { + 'found_in_db': found_in_db, + 'missing_in_db': missing_in_db, + 'duplicate_mappings': duplicate_mappings + } + + +def check_field_type_consistency(conn, relations: Dict) -> Dict: + """检查关联关系的字段类型一致性""" + print("\n" + "="*80) + print("4. 检查关联关系的字段类型一致性") + print("="*80) + + cursor = conn.cursor(pymysql.cursors.DictCursor) + + # 查询所有关联关系及其字段类型 + cursor.execute(""" + SELECT + fff.id, + fff.file_id, + fff.filed_id, + fc.name as file_name, + f.name as field_name, + f.filed_code, + f.field_type, + CASE + WHEN f.field_type = 1 THEN '输入字段' + WHEN f.field_type = 2 THEN '输出字段' + ELSE '未知' + END as field_type_name + FROM f_polic_file_field fff + INNER JOIN f_polic_file_config fc ON fff.file_id = fc.id AND fff.tenant_id = fc.tenant_id + INNER JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id + WHERE fff.tenant_id = %s + ORDER BY fff.file_id, f.field_type, f.name + """, (TENANT_ID,)) + + all_relations_with_type = cursor.fetchall() + + # 统计字段类型分布 + input_fields = [r for r in all_relations_with_type if r['field_type'] == 1] + output_fields = [r for r in all_relations_with_type if r['field_type'] == 2] + + print(f"\n字段类型统计:") + print(f" 输入字段 (field_type=1): {len(input_fields)} 条关联") + print(f" 输出字段 (field_type=2): {len(output_fields)} 条关联") + + # 按文件统计 + file_type_counts = defaultdict(lambda: {'input': 0, 'output': 0}) + for rel in all_relations_with_type: + file_id = rel['file_id'] + if rel['field_type'] == 1: + file_type_counts[file_id]['input'] += 1 + elif rel['field_type'] == 2: + file_type_counts[file_id]['output'] += 1 + + print(f"\n每个文件的字段类型分布:") + for file_id, counts in sorted(file_type_counts.items())[:10]: # 只显示前10个 + print(f" 文件ID {file_id}: 输入字段 {counts['input']} 个, 输出字段 {counts['output']} 个") + if len(file_type_counts) > 10: + print(f" ... 还有 {len(file_type_counts) - 10} 个文件") + + cursor.close() + + return { + 'input_fields': input_fields, + 'output_fields': output_fields, + 'file_type_counts': dict(file_type_counts) + } + + +def main(): + """主函数""" + print("="*80) + print("检查模板的 file_id 和相关关联关系") + print("="*80) + + # 连接数据库 + try: + conn = pymysql.connect(**DB_CONFIG) + print("\n[OK] 数据库连接成功") + except Exception as e: + print(f"\n[ERROR] 数据库连接失败: {e}") + return + + try: + # 1. 检查文件配置表 + file_configs = check_file_configs(conn) + + # 2. 检查文件字段关联表 + relations = check_file_field_relations(conn) + + # 3. 检查模板文件与数据库记录的映射 + template_mapping = check_template_file_mapping(conn, file_configs) + + # 4. 检查字段类型一致性 + field_type_info = check_field_type_consistency(conn, relations) + + # 汇总报告 + print("\n" + "="*80) + print("检查汇总") + print("="*80) + + issues = [] + + if file_configs['duplicate_codes']: + issues.append(f"发现 {len(file_configs['duplicate_codes'])} 个重复的 template_code") + if file_configs['duplicate_names']: + issues.append(f"发现 {len(file_configs['duplicate_names'])} 个重复的 name") + if file_configs['empty_path_configs']: + issues.append(f"发现 {len(file_configs['empty_path_configs'])} 个 file_path 为空的记录") + if relations['invalid_file_relations']: + issues.append(f"发现 {len(relations['invalid_file_relations'])} 条无效的 file_id 关联") + if relations['invalid_field_relations']: + issues.append(f"发现 {len(relations['invalid_field_relations'])} 条无效的 filed_id 关联") + if relations['duplicate_relations']: + issues.append(f"发现 {len(relations['duplicate_relations'])} 组重复的关联关系") + if template_mapping['missing_in_db']: + issues.append(f"发现 {len(template_mapping['missing_in_db'])} 个模板文件在数据库中没有对应记录") + + if issues: + print("\n[WARN] 发现以下问题:") + for issue in issues: + print(f" - {issue}") + else: + print("\n[OK] 未发现严重问题") + + print(f"\n总模板记录数: {len(file_configs['all_configs'])}") + print(f"总关联关系数: {len(relations['all_relations'])}") + print(f"有关联关系的文件数: {len(relations['file_field_counts'])}") + + finally: + conn.close() + print("\n数据库连接已关闭") + + +if __name__ == '__main__': + import os + main() +