""" 数据盘点模块的数据模型 """ from typing import Optional, List, Dict, Any from pydantic import BaseModel, Field # ==================== 请求模型 ==================== class FieldInput(BaseModel): """字段输入模型""" raw_name: str = Field(..., description="字段名(英文)") type: str = Field(..., description="字段类型") comment: Optional[str] = Field(None, description="字段注释(如果有)") class TableInput(BaseModel): """表输入模型""" raw_name: str = Field(..., description="表名(英文/原始名称)") fields: List[FieldInput] = Field(..., description="字段列表", min_length=1) class AnalyzeOptions(BaseModel): """AI 分析选项""" model: Optional[str] = Field("qwen-max", description="大模型选择(qwen-max/gpt-4)") temperature: Optional[float] = Field(0.3, ge=0.0, le=1.0, description="温度参数(0.0-1.0)") enable_pii_detection: Optional[bool] = Field(True, description="是否启用 PII 识别") enable_important_data_detection: Optional[bool] = Field( True, description="是否启用重要数据识别" ) class AIAnalyzeRequest(BaseModel): """AI 分析请求模型""" tables: List[TableInput] = Field(..., description="表列表", min_length=1) project_id: str = Field(..., description="项目ID") industry: Optional[str] = Field(None, description="行业信息(如:retail-fresh)") context: Optional[str] = Field(None, description="业务背景信息") options: Optional[AnalyzeOptions] = Field(None, description="可选配置") class Config: json_schema_extra = { "example": { "tables": [ { "raw_name": "t_user_base_01", "fields": [ { "raw_name": "user_id", "type": "varchar(64)", "comment": "用户ID" }, { "raw_name": "phone", "type": "varchar(11)", "comment": "手机号" } ] } ], "project_id": "project_001", "industry": "retail-fresh", "context": "某连锁生鲜零售企业,主营水果、蔬菜等生鲜产品", "options": { "model": "qwen-max", "temperature": 0.3, "enable_pii_detection": True, "enable_important_data_detection": True } } } # ==================== 响应模型 ==================== class FieldOutput(BaseModel): """字段输出模型""" raw_name: str = Field(..., description="字段名(英文/原始名称)") ai_name: str = Field(..., description="AI 识别的中文名称") desc: str = Field(..., description="业务描述") type: str = Field(..., description="字段类型") pii: List[str] = Field(default_factory=list, description="识别的 PII 信息列表") pii_type: Optional[str] = Field(None, description="PII 类型(contact/identity/name/email等)") is_important_data: bool = Field(False, description="是否重要数据") confidence: int = Field(..., ge=0, le=100, description="置信度评分(0-100)") class TableOutput(BaseModel): """表输出模型""" raw_name: str = Field(..., description="表名(英文/原始名称)") ai_name: str = Field(..., description="AI 识别的中文名称") desc: str = Field(..., description="业务描述") confidence: int = Field(..., ge=0, le=100, description="置信度评分(0-100)") ai_completed: bool = Field(True, description="AI 识别是否完成") fields: List[FieldOutput] = Field(..., description="字段列表") pii: List[str] = Field(default_factory=list, description="表的 PII 信息汇总") important: bool = Field(False, description="表是否包含重要数据") important_data_types: List[str] = Field( default_factory=list, description="重要数据类型列表" ) class Statistics(BaseModel): """统计信息""" total_tables: int = Field(..., description="总表数") total_fields: int = Field(..., description="总字段数") pii_fields_count: int = Field(0, description="包含 PII 的字段数") important_data_fields_count: int = Field(0, description="重要数据字段数") average_confidence: float = Field(..., ge=0, le=100, description="平均置信度") class TokenUsage(BaseModel): """Token 使用情况""" prompt_tokens: int = Field(0, description="提示词 Token 数") completion_tokens: int = Field(0, description="完成 Token 数") total_tokens: int = Field(0, description="总 Token 数") class AIAnalyzeResponse(BaseModel): """AI 分析响应模型""" tables: List[TableOutput] = Field(..., description="识别结果表列表") statistics: Statistics = Field(..., description="统计信息") processing_time: float = Field(..., description="处理耗时(秒)") model_used: str = Field(..., description="使用的大模型") token_usage: Optional[TokenUsage] = Field(None, description="Token 使用情况")