126 lines
5.3 KiB
Python
126 lines
5.3 KiB
Python
"""
|
||
数据盘点模块的数据模型
|
||
"""
|
||
from typing import Optional, List, Dict, Any
|
||
from pydantic import BaseModel, Field
|
||
|
||
|
||
# ==================== 请求模型 ====================
|
||
|
||
class FieldInput(BaseModel):
|
||
"""字段输入模型"""
|
||
raw_name: str = Field(..., description="字段名(英文)")
|
||
type: str = Field(..., description="字段类型")
|
||
comment: Optional[str] = Field(None, description="字段注释(如果有)")
|
||
|
||
|
||
class TableInput(BaseModel):
|
||
"""表输入模型"""
|
||
raw_name: str = Field(..., description="表名(英文/原始名称)")
|
||
fields: List[FieldInput] = Field(..., description="字段列表", min_length=1)
|
||
|
||
|
||
class AnalyzeOptions(BaseModel):
|
||
"""AI 分析选项"""
|
||
model: Optional[str] = Field("qwen-max", description="大模型选择(qwen-max/gpt-4)")
|
||
temperature: Optional[float] = Field(0.3, ge=0.0, le=1.0, description="温度参数(0.0-1.0)")
|
||
enable_pii_detection: Optional[bool] = Field(True, description="是否启用 PII 识别")
|
||
enable_important_data_detection: Optional[bool] = Field(
|
||
True, description="是否启用重要数据识别"
|
||
)
|
||
|
||
|
||
class AIAnalyzeRequest(BaseModel):
|
||
"""AI 分析请求模型"""
|
||
tables: List[TableInput] = Field(..., description="表列表", min_length=1)
|
||
project_id: str = Field(..., description="项目ID")
|
||
industry: Optional[str] = Field(None, description="行业信息(如:retail-fresh)")
|
||
context: Optional[str] = Field(None, description="业务背景信息")
|
||
options: Optional[AnalyzeOptions] = Field(None, description="可选配置")
|
||
|
||
class Config:
|
||
json_schema_extra = {
|
||
"example": {
|
||
"tables": [
|
||
{
|
||
"raw_name": "t_user_base_01",
|
||
"fields": [
|
||
{
|
||
"raw_name": "user_id",
|
||
"type": "varchar(64)",
|
||
"comment": "用户ID"
|
||
},
|
||
{
|
||
"raw_name": "phone",
|
||
"type": "varchar(11)",
|
||
"comment": "手机号"
|
||
}
|
||
]
|
||
}
|
||
],
|
||
"project_id": "project_001",
|
||
"industry": "retail-fresh",
|
||
"context": "某连锁生鲜零售企业,主营水果、蔬菜等生鲜产品",
|
||
"options": {
|
||
"model": "qwen-max",
|
||
"temperature": 0.3,
|
||
"enable_pii_detection": True,
|
||
"enable_important_data_detection": True
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
# ==================== 响应模型 ====================
|
||
|
||
class FieldOutput(BaseModel):
|
||
"""字段输出模型"""
|
||
raw_name: str = Field(..., description="字段名(英文/原始名称)")
|
||
ai_name: str = Field(..., description="AI 识别的中文名称")
|
||
desc: str = Field(..., description="业务描述")
|
||
type: str = Field(..., description="字段类型")
|
||
pii: List[str] = Field(default_factory=list, description="识别的 PII 信息列表")
|
||
pii_type: Optional[str] = Field(None, description="PII 类型(contact/identity/name/email等)")
|
||
is_important_data: bool = Field(False, description="是否重要数据")
|
||
confidence: int = Field(..., ge=0, le=100, description="置信度评分(0-100)")
|
||
|
||
|
||
class TableOutput(BaseModel):
|
||
"""表输出模型"""
|
||
raw_name: str = Field(..., description="表名(英文/原始名称)")
|
||
ai_name: str = Field(..., description="AI 识别的中文名称")
|
||
desc: str = Field(..., description="业务描述")
|
||
confidence: int = Field(..., ge=0, le=100, description="置信度评分(0-100)")
|
||
ai_completed: bool = Field(True, description="AI 识别是否完成")
|
||
fields: List[FieldOutput] = Field(..., description="字段列表")
|
||
pii: List[str] = Field(default_factory=list, description="表的 PII 信息汇总")
|
||
important: bool = Field(False, description="表是否包含重要数据")
|
||
important_data_types: List[str] = Field(
|
||
default_factory=list, description="重要数据类型列表"
|
||
)
|
||
|
||
|
||
class Statistics(BaseModel):
|
||
"""统计信息"""
|
||
total_tables: int = Field(..., description="总表数")
|
||
total_fields: int = Field(..., description="总字段数")
|
||
pii_fields_count: int = Field(0, description="包含 PII 的字段数")
|
||
important_data_fields_count: int = Field(0, description="重要数据字段数")
|
||
average_confidence: float = Field(..., ge=0, le=100, description="平均置信度")
|
||
|
||
|
||
class TokenUsage(BaseModel):
|
||
"""Token 使用情况"""
|
||
prompt_tokens: int = Field(0, description="提示词 Token 数")
|
||
completion_tokens: int = Field(0, description="完成 Token 数")
|
||
total_tokens: int = Field(0, description="总 Token 数")
|
||
|
||
|
||
class AIAnalyzeResponse(BaseModel):
|
||
"""AI 分析响应模型"""
|
||
tables: List[TableOutput] = Field(..., description="识别结果表列表")
|
||
statistics: Statistics = Field(..., description="统计信息")
|
||
processing_time: float = Field(..., description="处理耗时(秒)")
|
||
model_used: str = Field(..., description="使用的大模型")
|
||
token_usage: Optional[TokenUsage] = Field(None, description="Token 使用情况")
|