ai-business-write/app.py

"""
智慧监督AI文书写作服务 - 主应用
"""
from flask import Flask, request, jsonify, send_from_directory
from flask_cors import CORS
from flasgger import Swagger
import os
from datetime import datetime
from dotenv import load_dotenv

from services.ai_service import AIService
from services.field_service import FieldService
from services.document_service import DocumentService
from utils.response import success_response, error_response

# 加载环境变量
load_dotenv()

app = Flask(__name__)
CORS(app)  # 允许跨域请求

# 配置Swagger
swagger_config = {
    "headers": [],
    "specs": [
        {
            "endpoint": "apispec",
            "route": "/apispec.json",
            "rule_filter": lambda rule: True,
            "model_filter": lambda tag: True,
        }
    ],
    "static_url_path": "/flasgger_static",
    "swagger_ui": True,
    "specs_route": "/api-docs"
}

swagger_template = {
    "swagger": "2.0",
    "info": {
        "title": "智慧监督AI文书写作服务 API",
        "description": "基于大模型的智能文书生成服务，支持从非结构化文本中提取结构化字段数据",
        "version": "1.0.0",
        "contact": {
            "name": "API支持"
        }
    },
    "basePath": "/",
    "schemes": ["http", "https"],
    "tags": [
        {
            "name": "AI解析",
            "description": "AI字段提取相关接口"
        },
        {
            "name": "文档生成",
            "description": "文档生成相关接口"
        },
        {
            "name": "字段配置",
            "description": "字段配置查询接口"
        }
    ]
}

swagger = Swagger(app, config=swagger_config, template=swagger_template)

# 初始化服务
ai_service = AIService()
field_service = FieldService()
document_service = DocumentService()


@app.route('/')
def index():
    """返回测试页面"""
    return send_from_directory('static', 'index.html')


@app.route('/ai/extract', methods=['POST'])
@app.route('/api/ai/extract', methods=['POST'])  # 保留旧路径以兼容
def extract():
    """
    AI字段提取接口
    从输入的非结构化文本中提取结构化字段数据

    ---
    tags:
      - AI解析
    summary: 从输入数据中提取结构化字段
    description: 使用AI大模型从输入文本中提取结构化字段，根据fieldCode从数据库查询字段配置
    consumes:
      - application/json
    produces:
      - application/json
    parameters:
      - in: body
        name: body
        description: 请求参数
        required: true
        schema:
          type: object
          required:
            - inputData
            - outputData
          properties:
            inputData:
              type: array
              description: 输入数据列表
              items:
                type: object
                properties:
                  fieldCode:
                    type: string
                    description: 字段编码
                    example: clue_info
                  fieldValue:
                    type: string
                    description: 字段值（原始文本）
                    example: 被举报用户名称是张三，年龄30岁，某公司总经理
            outputData:
              type: array
              description: 需要提取的输出字段列表
              items:
                type: object
                properties:
                  fieldCode:
                    type: string
                    description: 字段编码
                    example: userName
    responses:
      200:
        description: 解析成功
        schema:
          type: object
          properties:
            code:
              type: integer
              description: 响应码，0表示成功
              example: 0
            data:
              type: object
              properties:
                outData:
                  type: array
                  description: 提取的字段列表
                  items:
                    type: object
                    properties:
                      fieldCode:
                        type: string
                        description: 字段编码
                        example: userName
                      fieldValue:
                        type: string
                        description: 提取的字段值
                        example: 张三
            msg:
              type: string
              description: 响应消息
              example: ok
            isSuccess:
              type: boolean
              description: 是否成功
              example: true
            timestamp:
              type: string
              description: 时间戳
            errorMsg:
              type: string
              description: 错误信息（成功时为空）
      400:
        description: 请求参数错误
        schema:
          type: object
          properties:
            code:
              type: integer
              example: 400
            errorMsg:
              type: string
              example: 请求参数不能为空
            isSuccess:
              type: boolean
              example: false
      2001:
        description: AI解析超时或发生错误
        schema:
          type: object
          properties:
            code:
              type: integer
              example: 2001
            errorMsg:
              type: string
              example: AI解析超时或发生错误
            isSuccess:
              type: boolean
              example: false
      2002:
        description: AI解析失败
        schema:
          type: object
          properties:
            code:
              type: integer
              example: 2002
            errorMsg:
              type: string
              example: AI解析失败，请检查输入文本质量
            isSuccess:
              type: boolean
              example: false
    """
    try:
        data = request.get_json()

        # 验证请求参数
        if not data:
            return error_response(400, "请求参数不能为空")

        input_data = data.get('inputData', [])
        output_data = data.get('outputData', [])

        if not input_data or not isinstance(input_data, list):
            return error_response(400, "inputData参数必须是非空数组")

        if not output_data or not isinstance(output_data, list):
            return error_response(400, "outputData参数必须是非空数组")

        # 提取outputData中的fieldCode列表
        output_field_codes = []
        for item in output_data:
            if isinstance(item, dict) and 'fieldCode' in item:
                output_field_codes.append(item['fieldCode'])
            elif isinstance(item, str):
                output_field_codes.append(item)

        if not output_field_codes:
            return error_response(400, "outputData中必须包含至少一个fieldCode")

        # 根据fieldCode从数据库查询输出字段配置
        output_fields = field_service.get_output_fields_by_field_codes(output_field_codes)

        if not output_fields:
            return error_response(2002, f"未找到字段编码 {output_field_codes} 对应的字段配置")

        # 构建AI提示词（不再需要business_type）
        prompt = field_service.build_extract_prompt(input_data, output_fields)

        # 调用AI服务进行解析
        ai_result = ai_service.extract_fields(prompt, output_fields)

        if not ai_result:
            return error_response(2002, "AI解析失败，请检查输入文本质量")

        # 构建返回数据（按照outputData中的字段顺序返回）
        out_data = []
        # 创建一个字段编码到字段信息的映射
        field_map = {field['field_code']: field for field in output_fields}

        # 按照outputData的顺序构建返回数据
        # 注意：如果AI未提取到值，返回空字符串，不自动应用默认值
        # 默认值信息在文档中说明，由前端根据业务需求决定是否应用
        for field_code in output_field_codes:
            field_value = ai_result.get(field_code, '')
            out_data.append({
                'fieldCode': field_code,
                'fieldValue': field_value
            })

        return success_response({'outData': out_data})

    except Exception as e:
        return error_response(2001, f"AI解析超时或发生错误: {str(e)}")


@app.route('/api/fields', methods=['GET'])
def get_fields():
    """
    获取字段配置接口
    获取指定业务类型的输入和输出字段配置

    ---
    tags:
      - 字段配置
    summary: 获取字段配置
    description: 获取指定业务类型的输入字段和输出字段配置，用于测试页面展示
    produces:
      - application/json
    parameters:
      - in: query
        name: businessType
        type: string
        required: false
        default: INVESTIGATION
        description: 业务类型
        example: INVESTIGATION
    responses:
      200:
        description: 获取成功
        schema:
          type: object
          properties:
            code:
              type: integer
              description: 响应码，0表示成功
              example: 0
            data:
              type: object
              properties:
                fields:
                  type: object
                  properties:
                    input_fields:
                      type: array
                      description: 输入字段列表
                      items:
                        type: object
                        properties:
                          id:
                            type: integer
                            description: 字段ID
                          name:
                            type: string
                            description: 字段名称
                            example: 线索信息
                          field_code:
                            type: string
                            description: 字段编码
                            example: clue_info
                          field_type:
                            type: integer
                            description: 字段类型（1=输入字段，2=输出字段）
                            example: 1
                    output_fields:
                      type: array
                      description: 输出字段列表
                      items:
                        type: object
                        properties:
                          id:
                            type: integer
                            description: 字段ID
                          name:
                            type: string
                            description: 字段名称
                            example: 被核查人姓名
                          field_code:
                            type: string
                            description: 字段编码
                            example: target_name
                          field_type:
                            type: integer
                            description: 字段类型（1=输入字段，2=输出字段）
                            example: 2
            msg:
              type: string
              description: 响应消息
              example: ok
            isSuccess:
              type: boolean
              description: 是否成功
              example: true
      500:
        description: 服务器错误
        schema:
          type: object
          properties:
            code:
              type: integer
              example: 500
            errorMsg:
              type: string
              example: 获取字段配置失败
            isSuccess:
              type: boolean
              example: false
    """
    try:
        business_type = request.args.get('businessType', 'INVESTIGATION')
        fields = field_service.get_fields_by_business_type(business_type)
        return success_response({'fields': fields})
    except Exception as e:
        return error_response(500, f"获取字段配置失败: {str(e)}")


@app.route('/ai/generate-document', methods=['POST'])
@app.route('/api/ai/generate-document', methods=['POST'])  # 保留旧路径以兼容
def generate_document():
    """
    文档生成接口
    根据输入数据填充Word模板并生成文档

    ---
    tags:
      - 文档生成
    summary: 生成填充后的文档
    description: 根据输入数据填充Word模板，上传到MinIO并返回文件路径
    consumes:
      - application/json
    produces:
      - application/json
    parameters:
      - in: body
        name: body
        description: 请求参数
        required: true
        schema:
          type: object
          required:
            - inputData
            - fpolicFieldParamFileList
          properties:
            inputData:
              type: array
              description: 输入数据列表
              items:
                type: object
                properties:
                  fieldCode:
                    type: string
                    description: 字段编码
                    example: userName
                  fieldValue:
                    type: string
                    description: 字段值
                    example: 张三
            fpolicFieldParamFileList:
              type: array
              description: 文件列表
              items:
                type: object
                properties:
                  fileId:
                    type: integer
                    description: 文件ID
                    example: 1
                  fileName:
                    type: string
                    description: 文件名称
                    example: 请示报告卡.doc
                  templateCode:
                    type: string
                    description: 模板编码
                    example: REPORT_CARD
    responses:
      200:
        description: 生成成功
        schema:
          type: object
          properties:
            code:
              type: integer
              description: 响应码，0表示成功
              example: 0
            data:
              type: object
              properties:
                documentId:
                  type: string
                  description: 文档ID
                  example: DOC202411260001
                documentName:
                  type: string
                  description: 文档名称（第一个生成的文档名称）
                  example: 初步核实审批表_张三.docx
                fpolicFieldParamFileList:
                  type: array
                  description: 生成的文档列表（数量与请求一致）
                  items:
                    type: object
                    properties:
                      fileId:
                        type: integer
                        description: 文件ID（与请求中的fileId一致）
                        example: 1
                      fileName:
                        type: string
                        description: 实际生成的文档名称（.docx格式），与请求中的fileName可能不同
                        example: 初步核实审批表_张三.docx
                      filePath:
                        type: string
                        description: MinIO相对路径（指向生成的文档文件）
                        example: /615873064429507639/20251205090700/初步核实审批表_张三.docx
            msg:
              type: string
              example: ok
            isSuccess:
              type: boolean
              example: true
      1001:
        description: 模板不存在
        schema:
          type: object
          properties:
            code:
              type: integer
              example: 1001
            errorMsg:
              type: string
              example: 模板不存在
            isSuccess:
              type: boolean
              example: false
      3001:
        description: 文件生成失败
        schema:
          type: object
          properties:
            code:
              type: integer
              example: 3001
            errorMsg:
              type: string
              example: 文件生成失败
            isSuccess:
              type: boolean
              example: false
      3002:
        description: 文件保存失败
        schema:
          type: object
          properties:
            code:
              type: integer
              example: 3002
            errorMsg:
              type: string
              example: 文件保存失败
            isSuccess:
              type: boolean
              example: false
    """
    try:
        data = request.get_json()

        # 验证请求参数
        if not data:
            return error_response(400, "请求参数不能为空")

        input_data = data.get('inputData', [])
        file_list = data.get('fpolicFieldParamFileList', [])

        if not input_data or not isinstance(input_data, list):
            return error_response(400, "inputData参数必须是非空数组")

        if not file_list or not isinstance(file_list, list):
            return error_response(400, "fpolicFieldParamFileList参数必须是非空数组")

        # 将input_data转换为字典格式（用于生成文档名称）
        field_data = {}
        for item in input_data:
            field_code = item.get('fieldCode', '')
            field_value = item.get('fieldValue', '')
            if field_code:
                field_data[field_code] = field_value or ''

        # 生成文档ID
        document_id = document_service.generate_document_id()

        # 处理每个文件
        result_file_list = []
        first_document_name = None  # 用于存储第一个生成的文档名

        for file_info in file_list:
            file_id = file_info.get('fileId')
            file_name = file_info.get('fileName', '')
            template_code = file_info.get('templateCode', '')

            if not template_code:
                return error_response(1001, f"文件 {file_name} 缺少templateCode参数")

            try:
                # 生成文档
                result = document_service.generate_document(
                    template_code=template_code,
                    input_data=input_data,
                    file_info=file_info
                )

                # 使用生成的文档名称（.docx格式），而不是原始文件名
                generated_file_name = result.get('fileName', file_name)

                # 保存第一个文档名作为 documentName
                if first_document_name is None:
                    first_document_name = generated_file_name

                result_file_list.append({
                    'fileId': file_id,
                    'fileName': generated_file_name,  # 使用生成的文档名
                    'filePath': result['filePath']
                })

            except Exception as e:
                error_msg = str(e)
                if '不存在' in error_msg or '模板' in error_msg:
                    return error_response(1001, error_msg)
                elif '生成' in error_msg or '填充' in error_msg:
                    return error_response(3001, error_msg)
                elif '上传' in error_msg or '保存' in error_msg:
                    return error_response(3002, error_msg)
                else:
                    return error_response(3001, f"文件生成失败: {error_msg}")

        # 构建返回数据（不包含inputData，只返回生成的文档信息）
        return success_response({
            'documentId': document_id,
            'documentName': first_document_name or 'generated.docx',  # 使用第一个生成的文档名
            'fpolicFieldParamFileList': result_file_list
        })

    except Exception as e:
        return error_response(3001, f"文档生成失败: {str(e)}")


if __name__ == '__main__':
    # 确保static目录存在
    os.makedirs('static', exist_ok=True)

    port = int(os.getenv('PORT', 7500))
    debug = os.getenv('DEBUG', 'False').lower() == 'true'

    print(f"服务启动在 http://localhost:{port}")
    print(f"测试页面: http://localhost:{port}/")
    print(f"Swagger API文档: http://localhost:{port}/api-docs")

    app.run(host='0.0.0.0', port=port, debug=debug)