2026-01-11 07:48:19 +08:00

296 lines
11 KiB
Python

"""
数据资产盘点报告生成模块的数据模型
"""
from typing import Optional, List, Dict, Any
from pydantic import BaseModel, Field
# ==================== 请求模型 ====================
class ProjectInfo(BaseModel):
"""项目信息"""
project_name: str = Field(..., description="项目名称")
industry: str = Field(..., description="行业类型")
company_name: Optional[str] = Field(None, description="企业名称")
class StorageDistributionItem(BaseModel):
"""存储分布项"""
category: str = Field(..., description="分类名称")
volume: str = Field(..., description="数据量")
storage_type: str = Field(..., description="存储类型描述")
color: str = Field(..., description="颜色标识")
class DataSourceStructure(BaseModel):
"""数据来源结构"""
structured: int = Field(..., ge=0, le=100, description="结构化数据百分比")
semi_structured: int = Field(..., ge=0, le=100, description="半结构化数据百分比")
class IdentifiedAsset(BaseModel):
"""识别的数据资产"""
name: str = Field(..., description="资产名称")
core_tables: List[str] = Field(..., description="核心表名列表")
description: str = Field(..., description="资产描述")
class InventoryData(BaseModel):
"""数据盘点结果"""
total_tables: int = Field(..., ge=0, description="总表数")
total_fields: int = Field(..., ge=0, description="总字段数")
total_data_volume: str = Field(..., description="总数据量")
storage_distribution: List[StorageDistributionItem] = Field(..., description="存储分布")
data_source_structure: DataSourceStructure = Field(..., description="数据来源结构")
identified_assets: List[IdentifiedAsset] = Field(..., description="识别的数据资产")
class ContextData(BaseModel):
"""背景调研信息"""
enterprise_background: str = Field(..., description="企业背景")
informatization_status: str = Field(..., description="信息化建设现状")
business_flow: str = Field(..., description="业务流与数据流")
class SelectedScenario(BaseModel):
"""选中的场景"""
name: str = Field(..., description="场景名称")
description: str = Field(..., description="场景描述")
class ValueData(BaseModel):
"""价值挖掘结果"""
selected_scenarios: List[SelectedScenario] = Field(..., description="选中的场景")
class GenerateReportOptions(BaseModel):
"""报告生成选项"""
language: str = Field("zh-CN", description="语言")
detail_level: str = Field("standard", description="详细程度")
generation_mode: str = Field("full", description="生成模式")
class GenerateReportRequest(BaseModel):
"""报告生成请求"""
project_info: ProjectInfo = Field(..., description="项目信息")
inventory_data: InventoryData = Field(..., description="数据盘点结果")
context_data: ContextData = Field(..., description="背景调研信息")
value_data: ValueData = Field(..., description="价值挖掘结果")
options: Optional[GenerateReportOptions] = Field(None, description="可选配置")
class Config:
json_schema_extra = {
"example": {
"project_info": {
"project_name": "数据资产盘点项目",
"industry": "retail-fresh",
"company_name": "某连锁生鲜零售企业"
},
"inventory_data": {
"total_tables": 14582,
"total_fields": 245000,
"total_data_volume": "58 PB",
"storage_distribution": [
{
"category": "供应链物流",
"volume": "25.4 PB",
"storage_type": "主要存储于 HDFS / NoSQL",
"color": "blue"
}
],
"data_source_structure": {
"structured": 35,
"semi_structured": 65
},
"identified_assets": [
{
"name": "消费者全景画像",
"core_tables": ["Dim_Customer", "Fact_Sales"],
"description": "核心依赖客户维度表与销售事实表"
}
]
},
"context_data": {
"enterprise_background": "某连锁生鲜零售企业...",
"informatization_status": "已建立基础IT系统...",
"business_flow": "采购-仓储-销售-配送..."
},
"value_data": {
"selected_scenarios": [
{
"name": "精准会员营销",
"description": "基于用户画像实现千人千面营销"
}
]
},
"options": {
"language": "zh-CN",
"detail_level": "standard",
"generation_mode": "full"
}
}
}
# ==================== 响应模型 ====================
class ReportHeader(BaseModel):
"""报告头部"""
project_name: str = Field(..., description="项目名称")
class EnterpriseBackground(BaseModel):
"""企业背景"""
description: str = Field(..., description="企业背景描述")
class PrivateCloudInfo(BaseModel):
"""私有云信息"""
title: str = Field(..., description="标题")
description: str = Field(..., description="描述")
class PublicCloudInfo(BaseModel):
"""公有云信息"""
title: str = Field(..., description="标题")
description: str = Field(..., description="描述")
class InformatizationStatus(BaseModel):
"""信息化建设现状"""
overview: str = Field(..., description="概述")
private_cloud: PrivateCloudInfo = Field(..., description="私有云信息")
public_cloud: PublicCloudInfo = Field(..., description="公有云信息")
class BusinessFlowItem(BaseModel):
"""业务流项"""
title: str = Field(..., description="标题")
description: str = Field(..., description="描述")
class BusinessDataFlow(BaseModel):
"""业务数据流"""
overview: str = Field(..., description="概述")
manufacturing: BusinessFlowItem = Field(..., description="制造")
logistics: BusinessFlowItem = Field(..., description="物流")
retail: BusinessFlowItem = Field(..., description="零售")
data_aggregation: BusinessFlowItem = Field(..., description="数据聚合")
class Section1(BaseModel):
"""章节一:企业数字化情况简介"""
enterprise_background: EnterpriseBackground = Field(..., description="企业背景")
informatization_status: InformatizationStatus = Field(..., description="信息化建设现状")
business_data_flow: BusinessDataFlow = Field(..., description="业务数据流")
class Summary(BaseModel):
"""数据资源摘要"""
total_data_volume: str = Field(..., description="数据总量")
total_data_objects: Dict[str, str] = Field(..., description="数据对象统计")
class Section2(BaseModel):
"""章节二:数据资源统计"""
summary: Summary = Field(..., description="摘要")
storage_distribution: List[StorageDistributionItem] = Field(..., description="存储分布")
data_source_structure: DataSourceStructure = Field(..., description="数据来源结构")
class ComplianceWarning(BaseModel):
"""合规警告"""
type: str = Field(..., description="风险类型")
content: str = Field(..., description="风险描述")
highlights: Optional[List[str]] = Field(None, description="高亮信息")
class ComplianceRisks(BaseModel):
"""合规风险"""
warnings: List[ComplianceWarning] = Field(..., description="警告列表")
class ApplicationScenarios(BaseModel):
"""应用场景"""
description: str = Field(..., description="场景描述")
class AssetComposition(BaseModel):
"""资产构成"""
description: str = Field(..., description="构成描述")
core_tables: List[str] = Field(..., description="核心表")
class DataAsset(BaseModel):
"""数据资产"""
id: str = Field(..., description="资产ID")
title: str = Field(..., description="资产标题")
subtitle: str = Field(..., description="副标题")
composition: AssetComposition = Field(..., description="资产构成")
application_scenarios: ApplicationScenarios = Field(..., description="应用场景")
compliance_risks: ComplianceRisks = Field(..., description="合规风险")
class Section3Overview(BaseModel):
"""章节三概述"""
asset_count: int = Field(..., ge=0, description="资产数量")
high_value_assets: List[str] = Field(..., description="高价值资产")
description: str = Field(..., description="概述描述")
class Section3(BaseModel):
"""章节三:数据资产情况盘点"""
overview: Section3Overview = Field(..., description="概述")
assets: List[DataAsset] = Field(..., description="数据资产列表")
class ComplianceRemediationItem(BaseModel):
"""合规整改项"""
order: int = Field(..., ge=1, description="序号")
category: str = Field(..., description="分类")
description: str = Field(..., description="详细建议")
code_references: Optional[List[str]] = Field(None, description="代码引用")
class ComplianceRemediation(BaseModel):
"""合规整改"""
title: str = Field(..., description="标题")
items: List[ComplianceRemediationItem] = Field(..., description="整改项列表")
class TechnicalEvolution(BaseModel):
"""技术演进"""
title: str = Field(..., description="标题")
description: str = Field(..., description="描述")
technologies: Optional[List[str]] = Field(None, description="推荐技术")
class ValueDeepeningItem(BaseModel):
"""价值深化项"""
description: str = Field(..., description="建议描述")
scenarios: Optional[List[str]] = Field(None, description="相关场景")
class ValueDeepening(BaseModel):
"""价值深化"""
title: str = Field(..., description="标题")
items: List[ValueDeepeningItem] = Field(..., description="深化项列表")
class Section4(BaseModel):
"""章节四:专家建议与下一步计划"""
compliance_remediation: ComplianceRemediation = Field(..., description="合规整改")
technical_evolution: TechnicalEvolution = Field(..., description="技术演进")
value_deepening: ValueDeepening = Field(..., description="价值深化")
class GenerateReportResponse(BaseModel):
"""报告生成响应"""
header: ReportHeader = Field(..., description="报告头部")
section1: Section1 = Field(..., description="章节一")
section2: Section2 = Field(..., description="章节二")
section3: Section3 = Field(..., description="章节三")
section4: Section4 = Field(..., description="章节四")
generation_time: float = Field(..., description="生成耗时(秒)")
model_used: str = Field(..., description="使用的大模型")