finyx_data_ai/app/schemas/inventory.py
2026-01-11 07:48:19 +08:00

126 lines
5.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
数据盘点模块的数据模型
"""
from typing import Optional, List, Dict, Any
from pydantic import BaseModel, Field
# ==================== 请求模型 ====================
class FieldInput(BaseModel):
"""字段输入模型"""
raw_name: str = Field(..., description="字段名(英文)")
type: str = Field(..., description="字段类型")
comment: Optional[str] = Field(None, description="字段注释(如果有)")
class TableInput(BaseModel):
"""表输入模型"""
raw_name: str = Field(..., description="表名(英文/原始名称)")
fields: List[FieldInput] = Field(..., description="字段列表", min_length=1)
class AnalyzeOptions(BaseModel):
"""AI 分析选项"""
model: Optional[str] = Field("qwen-max", description="大模型选择qwen-max/gpt-4")
temperature: Optional[float] = Field(0.3, ge=0.0, le=1.0, description="温度参数0.0-1.0")
enable_pii_detection: Optional[bool] = Field(True, description="是否启用 PII 识别")
enable_important_data_detection: Optional[bool] = Field(
True, description="是否启用重要数据识别"
)
class AIAnalyzeRequest(BaseModel):
"""AI 分析请求模型"""
tables: List[TableInput] = Field(..., description="表列表", min_length=1)
project_id: str = Field(..., description="项目ID")
industry: Optional[str] = Field(None, description="行业信息retail-fresh")
context: Optional[str] = Field(None, description="业务背景信息")
options: Optional[AnalyzeOptions] = Field(None, description="可选配置")
class Config:
json_schema_extra = {
"example": {
"tables": [
{
"raw_name": "t_user_base_01",
"fields": [
{
"raw_name": "user_id",
"type": "varchar(64)",
"comment": "用户ID"
},
{
"raw_name": "phone",
"type": "varchar(11)",
"comment": "手机号"
}
]
}
],
"project_id": "project_001",
"industry": "retail-fresh",
"context": "某连锁生鲜零售企业,主营水果、蔬菜等生鲜产品",
"options": {
"model": "qwen-max",
"temperature": 0.3,
"enable_pii_detection": True,
"enable_important_data_detection": True
}
}
}
# ==================== 响应模型 ====================
class FieldOutput(BaseModel):
"""字段输出模型"""
raw_name: str = Field(..., description="字段名(英文/原始名称)")
ai_name: str = Field(..., description="AI 识别的中文名称")
desc: str = Field(..., description="业务描述")
type: str = Field(..., description="字段类型")
pii: List[str] = Field(default_factory=list, description="识别的 PII 信息列表")
pii_type: Optional[str] = Field(None, description="PII 类型contact/identity/name/email等")
is_important_data: bool = Field(False, description="是否重要数据")
confidence: int = Field(..., ge=0, le=100, description="置信度评分0-100")
class TableOutput(BaseModel):
"""表输出模型"""
raw_name: str = Field(..., description="表名(英文/原始名称)")
ai_name: str = Field(..., description="AI 识别的中文名称")
desc: str = Field(..., description="业务描述")
confidence: int = Field(..., ge=0, le=100, description="置信度评分0-100")
ai_completed: bool = Field(True, description="AI 识别是否完成")
fields: List[FieldOutput] = Field(..., description="字段列表")
pii: List[str] = Field(default_factory=list, description="表的 PII 信息汇总")
important: bool = Field(False, description="表是否包含重要数据")
important_data_types: List[str] = Field(
default_factory=list, description="重要数据类型列表"
)
class Statistics(BaseModel):
"""统计信息"""
total_tables: int = Field(..., description="总表数")
total_fields: int = Field(..., description="总字段数")
pii_fields_count: int = Field(0, description="包含 PII 的字段数")
important_data_fields_count: int = Field(0, description="重要数据字段数")
average_confidence: float = Field(..., ge=0, le=100, description="平均置信度")
class TokenUsage(BaseModel):
"""Token 使用情况"""
prompt_tokens: int = Field(0, description="提示词 Token 数")
completion_tokens: int = Field(0, description="完成 Token 数")
total_tokens: int = Field(0, description="总 Token 数")
class AIAnalyzeResponse(BaseModel):
"""AI 分析响应模型"""
tables: List[TableOutput] = Field(..., description="识别结果表列表")
statistics: Statistics = Field(..., description="统计信息")
processing_time: float = Field(..., description="处理耗时(秒)")
model_used: str = Field(..., description="使用的大模型")
token_usage: Optional[TokenUsage] = Field(None, description="Token 使用情况")