""" 智慧监督AI文书写作服务 - 主应用 """ from flask import Flask, request, jsonify, send_from_directory from flask_cors import CORS from flasgger import Swagger import os import pymysql from datetime import datetime from dotenv import load_dotenv from services.ai_service import AIService from services.field_service import FieldService from services.document_service import DocumentService from utils.response import success_response, error_response # 加载环境变量 load_dotenv() app = Flask(__name__) CORS(app) # 允许跨域请求 # 配置Swagger swagger_config = { "headers": [], "specs": [ { "endpoint": "apispec", "route": "/apispec.json", "rule_filter": lambda rule: True, "model_filter": lambda tag: True, } ], "static_url_path": "/flasgger_static", "swagger_ui": True, "specs_route": "/api-docs" } swagger_template = { "swagger": "2.0", "info": { "title": "智慧监督AI文书写作服务 API", "description": "基于大模型的智能文书生成服务,支持从非结构化文本中提取结构化字段数据", "version": "1.0.0", "contact": { "name": "API支持" } }, "basePath": "/", "schemes": ["http", "https"], "tags": [ { "name": "AI解析", "description": "AI字段提取相关接口" }, { "name": "文档生成", "description": "文档生成相关接口" }, { "name": "字段配置", "description": "字段配置查询接口" } ] } swagger = Swagger(app, config=swagger_config, template=swagger_template) # 初始化服务 ai_service = AIService() field_service = FieldService() document_service = DocumentService() @app.route('/') def index(): """返回测试页面""" return send_from_directory('static', 'index.html') @app.route('/ai/extract', methods=['POST']) @app.route('/api/ai/extract', methods=['POST']) # 保留旧路径以兼容 def extract(): """ AI字段提取接口 从输入的非结构化文本中提取结构化字段数据 --- tags: - AI解析 summary: 从输入数据中提取结构化字段 description: 使用AI大模型从输入文本中提取结构化字段,根据fieldCode从数据库查询字段配置 consumes: - application/json produces: - application/json parameters: - in: body name: body description: 请求参数 required: true schema: type: object required: - inputData - outputData properties: inputData: type: array description: 输入数据列表 items: type: object properties: fieldCode: type: string description: 字段编码 example: clue_info fieldValue: type: string description: 字段值(原始文本) example: 被举报用户名称是张三,年龄30岁,某公司总经理 outputData: type: array description: 需要提取的输出字段列表 items: type: object properties: fieldCode: type: string description: 字段编码 example: userName responses: 200: description: 解析成功 schema: type: object properties: code: type: integer description: 响应码,0表示成功 example: 0 data: type: object properties: outData: type: array description: 提取的字段列表 items: type: object properties: fieldCode: type: string description: 字段编码 example: userName fieldValue: type: string description: 提取的字段值 example: 张三 msg: type: string description: 响应消息 example: ok isSuccess: type: boolean description: 是否成功 example: true timestamp: type: string description: 时间戳 errorMsg: type: string description: 错误信息(成功时为空) 400: description: 请求参数错误 schema: type: object properties: code: type: integer example: 400 errorMsg: type: string example: 请求参数不能为空 isSuccess: type: boolean example: false 2001: description: AI解析超时或发生错误 schema: type: object properties: code: type: integer example: 2001 errorMsg: type: string example: AI解析超时或发生错误 isSuccess: type: boolean example: false 2002: description: AI解析失败 schema: type: object properties: code: type: integer example: 2002 errorMsg: type: string example: AI解析失败,请检查输入文本质量 isSuccess: type: boolean example: false """ try: data = request.get_json() # 验证请求参数 if not data: return error_response(400, "请求参数不能为空") input_data = data.get('inputData', []) output_data = data.get('outputData', []) if not input_data or not isinstance(input_data, list): return error_response(400, "inputData参数必须是非空数组") if not output_data or not isinstance(output_data, list): return error_response(400, "outputData参数必须是非空数组") # 提取outputData中的fieldCode列表 output_field_codes = [] for item in output_data: if isinstance(item, dict) and 'fieldCode' in item: output_field_codes.append(item['fieldCode']) elif isinstance(item, str): output_field_codes.append(item) if not output_field_codes: return error_response(400, "outputData中必须包含至少一个fieldCode") # 根据fieldCode从数据库查询输出字段配置 output_fields = field_service.get_output_fields_by_field_codes(output_field_codes) if not output_fields: return error_response(2002, f"未找到字段编码 {output_field_codes} 对应的字段配置") # 构建AI提示词(不再需要business_type) prompt = field_service.build_extract_prompt(input_data, output_fields) # 调用AI服务进行解析 ai_result = ai_service.extract_fields(prompt, output_fields) if not ai_result: return error_response(2002, "AI解析失败,请检查输入文本质量") # 调试:打印AI返回的结果 print(f"[API] AI返回结果包含 {len(ai_result)} 个字段") for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']: if key in ai_result: print(f"[API] AI返回 {key} = '{ai_result[key]}'") # 构建返回数据(按照outputData中的字段顺序返回) out_data = [] # 创建一个字段编码到字段信息的映射 field_map = {field['field_code']: field for field in output_fields} # 按照outputData的顺序构建返回数据 # 注意:如果AI未提取到值,返回空字符串,不自动应用默认值 # 默认值信息在文档中说明,由前端根据业务需求决定是否应用 for field_code in output_field_codes: field_value = ai_result.get(field_code, '') # 调试:打印关键字段的映射 if field_code in ['target_name', 'target_gender', 'target_age']: print(f"[API] 构建返回数据: {field_code} = '{field_value}' (从ai_result获取)") out_data.append({ 'fieldCode': field_code, 'fieldValue': field_value }) return success_response({'outData': out_data}) except Exception as e: return error_response(2001, f"AI解析超时或发生错误: {str(e)}") @app.route('/api/file-configs', methods=['GET']) def get_file_configs(): """ 获取可用的文件配置列表 用于查询可用的fileId,供文档生成接口使用 --- tags: - 字段配置 summary: 获取文件配置列表 description: 返回所有启用的文件配置,包含fileId和文件名称 responses: 200: description: 成功 schema: type: object properties: code: type: integer example: 0 data: type: object properties: fileConfigs: type: array items: type: object properties: fileId: type: integer description: 文件配置ID example: 1765273961563507 fileName: type: string description: 文件名称 example: 1.请示报告卡(XXX) filePath: type: string description: MinIO文件路径 example: /615873064429507639/TEMPLATE/2025/12/1.请示报告卡(XXX).docx isSuccess: type: boolean example: true """ try: conn = document_service.get_connection() cursor = conn.cursor(pymysql.cursors.DictCursor) try: sql = """ SELECT id, name, file_path FROM f_polic_file_config WHERE tenant_id = %s AND state = 1 ORDER BY name """ cursor.execute(sql, (document_service.tenant_id,)) configs = cursor.fetchall() file_configs = [] for config in configs: file_configs.append({ 'fileId': config['id'], 'fileName': config['name'], 'filePath': config['file_path'] or '' }) return success_response({ 'fileConfigs': file_configs }) finally: cursor.close() conn.close() except Exception as e: return error_response(500, f"查询文件配置失败: {str(e)}") @app.route('/api/fields', methods=['GET']) def get_fields(): """ 获取字段配置接口 获取指定业务类型的输入和输出字段配置 --- tags: - 字段配置 summary: 获取字段配置 description: 获取指定业务类型的输入字段和输出字段配置,用于测试页面展示 produces: - application/json parameters: - in: query name: businessType type: string required: false default: INVESTIGATION description: 业务类型 example: INVESTIGATION responses: 200: description: 获取成功 schema: type: object properties: code: type: integer description: 响应码,0表示成功 example: 0 data: type: object properties: fields: type: object properties: input_fields: type: array description: 输入字段列表 items: type: object properties: id: type: integer description: 字段ID name: type: string description: 字段名称 example: 线索信息 field_code: type: string description: 字段编码 example: clue_info field_type: type: integer description: 字段类型(1=输入字段,2=输出字段) example: 1 output_fields: type: array description: 输出字段列表 items: type: object properties: id: type: integer description: 字段ID name: type: string description: 字段名称 example: 被核查人姓名 field_code: type: string description: 字段编码 example: target_name field_type: type: integer description: 字段类型(1=输入字段,2=输出字段) example: 2 msg: type: string description: 响应消息 example: ok isSuccess: type: boolean description: 是否成功 example: true 500: description: 服务器错误 schema: type: object properties: code: type: integer example: 500 errorMsg: type: string example: 获取字段配置失败 isSuccess: type: boolean example: false """ try: business_type = request.args.get('businessType', 'INVESTIGATION') fields = field_service.get_fields_by_business_type(business_type) return success_response({'fields': fields}) except Exception as e: return error_response(500, f"获取字段配置失败: {str(e)}") @app.route('/ai/generate-document', methods=['POST']) @app.route('/api/ai/generate-document', methods=['POST']) # 保留旧路径以兼容 def generate_document(): """ 文档生成接口 根据输入数据填充Word模板并生成文档 --- tags: - 文档生成 summary: 生成填充后的文档 description: 根据输入数据填充Word模板,上传到MinIO并返回文件路径 consumes: - application/json produces: - application/json parameters: - in: body name: body description: 请求参数 required: true schema: type: object required: - inputData - fpolicFieldParamFileList properties: inputData: type: array description: 输入数据列表 items: type: object properties: fieldCode: type: string description: 字段编码 example: userName fieldValue: type: string description: 字段值 example: 张三 fpolicFieldParamFileList: type: array description: 文件列表 items: type: object required: - fileId properties: fileId: type: integer description: 文件配置ID(从f_polic_file_config表获取) example: 1765273961563507 fileName: type: string description: 文件名称(可选,用于生成文档名称) example: 请示报告卡.doc responses: 200: description: 生成成功 schema: type: object properties: code: type: integer description: 响应码,0表示成功 example: 0 data: type: object properties: documentId: type: string description: 文档ID example: DOC202411260001 documentName: type: string description: 文档名称(第一个生成的文档名称) example: 初步核实审批表_张三.docx fpolicFieldParamFileList: type: array description: 生成的文档列表(数量与请求一致) items: type: object properties: fileId: type: integer description: 文件ID(与请求中的fileId一致) example: 1 fileName: type: string description: 实际生成的文档名称(.docx格式),与请求中的fileName可能不同 example: 初步核实审批表_张三.docx filePath: type: string description: MinIO相对路径(指向生成的文档文件) example: /615873064429507639/20251205090700/初步核实审批表_张三.docx downloadUrl: type: string description: MinIO预签名下载URL(完整链接,7天有效,可直接下载) example: https://minio.datacubeworld.com:9000/finyx/615873064429507639/20251205090700/初步核实审批表_张三.docx?X-Amz-Algorithm=... msg: type: string example: ok isSuccess: type: boolean example: true 1001: description: 模板不存在或参数错误 schema: type: object properties: code: type: integer example: 1001 errorMsg: type: string example: 文件ID对应的模板不存在或未启用 isSuccess: type: boolean example: false 3001: description: 文件生成失败 schema: type: object properties: code: type: integer example: 3001 errorMsg: type: string example: 文件生成失败 isSuccess: type: boolean example: false 3002: description: 文件保存失败 schema: type: object properties: code: type: integer example: 3002 errorMsg: type: string example: 文件保存失败 isSuccess: type: boolean example: false """ try: data = request.get_json() # 验证请求参数 if not data: return error_response(400, "请求参数不能为空") input_data = data.get('inputData', []) file_list = data.get('fpolicFieldParamFileList', []) if not input_data or not isinstance(input_data, list): return error_response(400, "inputData参数必须是非空数组") if not file_list or not isinstance(file_list, list): return error_response(400, "fpolicFieldParamFileList参数必须是非空数组") # 将input_data转换为字典格式(用于生成文档名称) field_data = {} for item in input_data: field_code = item.get('fieldCode', '') field_value = item.get('fieldValue', '') if field_code: field_data[field_code] = field_value or '' # 生成文档ID document_id = document_service.generate_document_id() # 处理每个文件 result_file_list = [] first_document_name = None # 用于存储第一个生成的文档名 for file_info in file_list: # 兼容 id 和 fileId 两种字段 file_id = file_info.get('fileId') or file_info.get('id') file_name = file_info.get('fileName') or file_info.get('name', '') if not file_id: return error_response(1001, f"文件 {file_name} 缺少fileId或id参数") try: # 生成文档(使用fileId而不是templateCode) result = document_service.generate_document( file_id=file_id, input_data=input_data, file_info=file_info ) # 使用生成的文档名称(.docx格式),而不是原始文件名 generated_file_name = result.get('fileName', file_name) # 保存第一个文档名作为 documentName if first_document_name is None: first_document_name = generated_file_name result_file_list.append({ 'fileId': file_id, 'fileName': generated_file_name, # 使用生成的文档名 'filePath': result['filePath'], # MinIO相对路径 'downloadUrl': result.get('downloadUrl') # MinIO预签名下载URL(完整链接) }) except Exception as e: error_msg = str(e) if '不存在' in error_msg or '模板' in error_msg: return error_response(1001, error_msg) elif '生成' in error_msg or '填充' in error_msg: return error_response(3001, error_msg) elif '上传' in error_msg or '保存' in error_msg: return error_response(3002, error_msg) else: return error_response(3001, f"文件生成失败: {error_msg}") # 构建返回数据(不包含inputData,只返回生成的文档信息) return success_response({ 'documentId': document_id, 'documentName': first_document_name or 'generated.docx', # 使用第一个生成的文档名 'fpolicFieldParamFileList': result_file_list }) except Exception as e: return error_response(3001, f"文档生成失败: {str(e)}") @app.route('/fPolicTask/getDocument', methods=['POST']) def get_document_by_task(): """ 通过taskId获取文档(兼容接口) 支持通过taskId查询关联的文件列表,或直接使用提供的文件列表 """ try: data = request.get_json() # 验证请求参数 if not data: return error_response(400, "请求参数不能为空") task_id = data.get('taskId') input_data = data.get('inputData', []) file_list = data.get('fpolicFieldParamFileList', []) # 如果没有提供file_list,尝试通过taskId查询 if not file_list and task_id: try: conn = document_service.get_connection() cursor = conn.cursor(pymysql.cursors.DictCursor) try: # 尝试从f_polic_task表查询关联的文件列表 # 注意:这里需要根据实际表结构调整SQL sql = """ SELECT file_id, file_name FROM f_polic_task_file WHERE task_id = %s AND tenant_id = %s AND state = 1 """ cursor.execute(sql, (task_id, document_service.tenant_id)) task_files = cursor.fetchall() if task_files: file_list = [] for tf in task_files: file_list.append({ 'fileId': tf['file_id'], 'fileName': tf.get('file_name', '') }) except Exception as e: # 如果表不存在或查询失败,记录日志但不报错 print(f"[WARN] 无法通过taskId查询文件列表: {str(e)}") finally: cursor.close() conn.close() except Exception as e: print(f"[WARN] 查询taskId关联文件时出错: {str(e)}") # 如果仍然没有file_list,返回错误 if not file_list: return error_response(400, "缺少fpolicFieldParamFileList参数,且无法通过taskId查询到关联文件。请提供fpolicFieldParamFileList参数,格式: [{'fileId': 文件ID, 'fileName': '文件名'}]") if not input_data or not isinstance(input_data, list): return error_response(400, "inputData参数必须是非空数组") if not file_list or not isinstance(file_list, list): return error_response(400, "fpolicFieldParamFileList参数必须是非空数组") # 将input_data转换为字典格式(用于生成文档名称) field_data = {} for item in input_data: field_code = item.get('fieldCode', '') field_value = item.get('fieldValue', '') if field_code: field_data[field_code] = field_value or '' # 生成文档ID document_id = document_service.generate_document_id() # 处理每个文件 result_file_list = [] first_document_name = None # 用于存储第一个生成的文档名 for file_info in file_list: # 兼容 id 和 fileId 两种字段 file_id = file_info.get('fileId') or file_info.get('id') file_name = file_info.get('fileName') or file_info.get('name', '') if not file_id: return error_response(1001, f"文件 {file_name} 缺少fileId或id参数") try: # 生成文档(使用fileId而不是templateCode) result = document_service.generate_document( file_id=file_id, input_data=input_data, file_info=file_info ) # 使用生成的文档名称(.docx格式),而不是原始文件名 generated_file_name = result.get('fileName', file_name) # 保存第一个文档名作为 documentName if first_document_name is None: first_document_name = generated_file_name result_file_list.append({ 'fileId': file_id, 'fileName': generated_file_name, # 使用生成的文档名 'filePath': result['filePath'], # MinIO相对路径 'downloadUrl': result.get('downloadUrl') # MinIO预签名下载URL(完整链接) }) except Exception as e: error_msg = str(e) if '不存在' in error_msg or '模板' in error_msg: return error_response(1001, error_msg) elif '生成' in error_msg or '填充' in error_msg: return error_response(3001, error_msg) elif '上传' in error_msg or '保存' in error_msg: return error_response(3002, error_msg) else: return error_response(3001, f"文件生成失败: {error_msg}") # 构建返回数据(不包含inputData,只返回生成的文档信息) return success_response({ 'documentId': document_id, 'documentName': first_document_name or 'generated.docx', # 使用第一个生成的文档名 'fpolicFieldParamFileList': result_file_list }) except Exception as e: return error_response(3001, f"文档生成失败: {str(e)}") @app.route('/template-field-manager') def template_field_manager(): """返回模板字段关联管理页面""" return send_from_directory('static', 'template_field_manager.html') @app.route('/api/template-field-relations', methods=['GET']) def get_template_field_relations(): """ 获取所有模板和字段的关联关系 用于模板字段关联管理页面 """ try: conn = document_service.get_connection() cursor = conn.cursor(pymysql.cursors.DictCursor) try: # 获取所有启用的模板 cursor.execute(""" SELECT id, name, template_code FROM f_polic_file_config WHERE tenant_id = %s AND state = 1 ORDER BY name """, (document_service.tenant_id,)) templates = cursor.fetchall() # 获取所有启用的输入字段 cursor.execute(""" SELECT id, name, filed_code, field_type FROM f_polic_field WHERE tenant_id = %s AND field_type = 1 AND state = 1 ORDER BY name """, (document_service.tenant_id,)) input_fields = cursor.fetchall() # 获取所有启用的输出字段 cursor.execute(""" SELECT id, name, filed_code, field_type FROM f_polic_field WHERE tenant_id = %s AND field_type = 2 AND state = 1 ORDER BY name """, (document_service.tenant_id,)) output_fields = cursor.fetchall() # 获取现有的关联关系 cursor.execute(""" SELECT file_id, filed_id FROM f_polic_file_field WHERE tenant_id = %s AND state = 1 """, (document_service.tenant_id,)) relations = cursor.fetchall() # 构建关联关系映射 (file_id -> list of filed_id) # 注意:JSON不支持set,所以转换为list relation_map = {} for rel in relations: file_id = rel['file_id'] filed_id = rel['filed_id'] if file_id not in relation_map: relation_map[file_id] = [] relation_map[file_id].append(filed_id) return success_response({ 'templates': templates, 'input_fields': input_fields, 'output_fields': output_fields, 'relations': relation_map }) finally: cursor.close() conn.close() except Exception as e: return error_response(500, f"获取关联关系失败: {str(e)}") @app.route('/api/template-field-relations', methods=['POST']) def save_template_field_relations(): """ 保存模板和字段的关联关系 请求体格式: { "template_id": 123, "input_field_ids": [1, 2, 3], "output_field_ids": [4, 5, 6] } """ try: data = request.get_json() if not data: return error_response(400, "请求参数不能为空") template_id = data.get('template_id') input_field_ids = data.get('input_field_ids', []) output_field_ids = data.get('output_field_ids', []) if not template_id: return error_response(400, "template_id参数不能为空") conn = document_service.get_connection() cursor = conn.cursor() try: # 验证模板是否存在 cursor.execute(""" SELECT id FROM f_polic_file_config WHERE id = %s AND tenant_id = %s AND state = 1 """, (template_id, document_service.tenant_id)) if not cursor.fetchone(): return error_response(400, f"模板ID {template_id} 不存在或未启用") # 合并所有字段ID all_field_ids = set(input_field_ids + output_field_ids) # 验证字段是否存在 if all_field_ids: placeholders = ','.join(['%s'] * len(all_field_ids)) cursor.execute(f""" SELECT id FROM f_polic_field WHERE id IN ({placeholders}) AND tenant_id = %s AND state = 1 """, list(all_field_ids) + [document_service.tenant_id]) existing_field_ids = {row[0] for row in cursor.fetchall()} invalid_field_ids = all_field_ids - existing_field_ids if invalid_field_ids: return error_response(400, f"字段ID {list(invalid_field_ids)} 不存在或未启用") # 删除该模板的所有现有关联关系 cursor.execute(""" DELETE FROM f_polic_file_field WHERE file_id = %s AND tenant_id = %s """, (template_id, document_service.tenant_id)) # 插入新的关联关系 current_time = datetime.now() created_by = 655162080928945152 # 默认创建者ID if all_field_ids: insert_sql = """ INSERT INTO f_polic_file_field (tenant_id, file_id, filed_id, created_time, created_by, updated_time, updated_by, state) VALUES (%s, %s, %s, %s, %s, %s, %s, 1) """ for field_id in all_field_ids: cursor.execute(insert_sql, ( document_service.tenant_id, template_id, field_id, current_time, created_by, current_time, created_by )) conn.commit() return success_response({ 'template_id': template_id, 'input_field_count': len(input_field_ids), 'output_field_count': len(output_field_ids), 'total_field_count': len(all_field_ids) }, "保存成功") except Exception as e: conn.rollback() raise e finally: cursor.close() conn.close() except Exception as e: return error_response(500, f"保存关联关系失败: {str(e)}") if __name__ == '__main__': # 确保static目录存在 os.makedirs('static', exist_ok=True) port = int(os.getenv('PORT', 7500)) debug = os.getenv('DEBUG', 'False').lower() == 'true' print(f"服务启动在 http://localhost:{port}") print(f"测试页面: http://localhost:{port}/") print(f"模板字段管理页面: http://localhost:{port}/template-field-manager") print(f"Swagger API文档: http://localhost:{port}/api-docs") app.run(host='0.0.0.0', port=port, debug=debug)