""" 数据资产盘点报告生成模块的数据模型 """ from typing import Optional, List, Dict, Any from pydantic import BaseModel, Field # ==================== 请求模型 ==================== class ProjectInfo(BaseModel): """项目信息""" project_name: str = Field(..., description="项目名称") industry: str = Field(..., description="行业类型") company_name: Optional[str] = Field(None, description="企业名称") class StorageDistributionItem(BaseModel): """存储分布项""" category: str = Field(..., description="分类名称") volume: str = Field(..., description="数据量") storage_type: str = Field(..., description="存储类型描述") color: str = Field(..., description="颜色标识") class DataSourceStructure(BaseModel): """数据来源结构""" structured: int = Field(..., ge=0, le=100, description="结构化数据百分比") semi_structured: int = Field(..., ge=0, le=100, description="半结构化数据百分比") class IdentifiedAsset(BaseModel): """识别的数据资产""" name: str = Field(..., description="资产名称") core_tables: List[str] = Field(..., description="核心表名列表") description: str = Field(..., description="资产描述") class InventoryData(BaseModel): """数据盘点结果""" total_tables: int = Field(..., ge=0, description="总表数") total_fields: int = Field(..., ge=0, description="总字段数") total_data_volume: str = Field(..., description="总数据量") storage_distribution: List[StorageDistributionItem] = Field(..., description="存储分布") data_source_structure: DataSourceStructure = Field(..., description="数据来源结构") identified_assets: List[IdentifiedAsset] = Field(..., description="识别的数据资产") class ContextData(BaseModel): """背景调研信息""" enterprise_background: str = Field(..., description="企业背景") informatization_status: str = Field(..., description="信息化建设现状") business_flow: str = Field(..., description="业务流与数据流") class SelectedScenario(BaseModel): """选中的场景""" name: str = Field(..., description="场景名称") description: str = Field(..., description="场景描述") class ValueData(BaseModel): """价值挖掘结果""" selected_scenarios: List[SelectedScenario] = Field(..., description="选中的场景") class GenerateReportOptions(BaseModel): """报告生成选项""" language: str = Field("zh-CN", description="语言") detail_level: str = Field("standard", description="详细程度") generation_mode: str = Field("full", description="生成模式") class GenerateReportRequest(BaseModel): """报告生成请求""" project_info: ProjectInfo = Field(..., description="项目信息") inventory_data: InventoryData = Field(..., description="数据盘点结果") context_data: ContextData = Field(..., description="背景调研信息") value_data: ValueData = Field(..., description="价值挖掘结果") options: Optional[GenerateReportOptions] = Field(None, description="可选配置") class Config: json_schema_extra = { "example": { "project_info": { "project_name": "数据资产盘点项目", "industry": "retail-fresh", "company_name": "某连锁生鲜零售企业" }, "inventory_data": { "total_tables": 14582, "total_fields": 245000, "total_data_volume": "58 PB", "storage_distribution": [ { "category": "供应链物流", "volume": "25.4 PB", "storage_type": "主要存储于 HDFS / NoSQL", "color": "blue" } ], "data_source_structure": { "structured": 35, "semi_structured": 65 }, "identified_assets": [ { "name": "消费者全景画像", "core_tables": ["Dim_Customer", "Fact_Sales"], "description": "核心依赖客户维度表与销售事实表" } ] }, "context_data": { "enterprise_background": "某连锁生鲜零售企业...", "informatization_status": "已建立基础IT系统...", "business_flow": "采购-仓储-销售-配送..." }, "value_data": { "selected_scenarios": [ { "name": "精准会员营销", "description": "基于用户画像实现千人千面营销" } ] }, "options": { "language": "zh-CN", "detail_level": "standard", "generation_mode": "full" } } } # ==================== 响应模型 ==================== class ReportHeader(BaseModel): """报告头部""" project_name: str = Field(..., description="项目名称") class EnterpriseBackground(BaseModel): """企业背景""" description: str = Field(..., description="企业背景描述") class PrivateCloudInfo(BaseModel): """私有云信息""" title: str = Field(..., description="标题") description: str = Field(..., description="描述") class PublicCloudInfo(BaseModel): """公有云信息""" title: str = Field(..., description="标题") description: str = Field(..., description="描述") class InformatizationStatus(BaseModel): """信息化建设现状""" overview: str = Field(..., description="概述") private_cloud: PrivateCloudInfo = Field(..., description="私有云信息") public_cloud: PublicCloudInfo = Field(..., description="公有云信息") class BusinessFlowItem(BaseModel): """业务流项""" title: str = Field(..., description="标题") description: str = Field(..., description="描述") class BusinessDataFlow(BaseModel): """业务数据流""" overview: str = Field(..., description="概述") manufacturing: BusinessFlowItem = Field(..., description="制造") logistics: BusinessFlowItem = Field(..., description="物流") retail: BusinessFlowItem = Field(..., description="零售") data_aggregation: BusinessFlowItem = Field(..., description="数据聚合") class Section1(BaseModel): """章节一:企业数字化情况简介""" enterprise_background: EnterpriseBackground = Field(..., description="企业背景") informatization_status: InformatizationStatus = Field(..., description="信息化建设现状") business_data_flow: BusinessDataFlow = Field(..., description="业务数据流") class Summary(BaseModel): """数据资源摘要""" total_data_volume: str = Field(..., description="数据总量") total_data_objects: Dict[str, str] = Field(..., description="数据对象统计") class Section2(BaseModel): """章节二:数据资源统计""" summary: Summary = Field(..., description="摘要") storage_distribution: List[StorageDistributionItem] = Field(..., description="存储分布") data_source_structure: DataSourceStructure = Field(..., description="数据来源结构") class ComplianceWarning(BaseModel): """合规警告""" type: str = Field(..., description="风险类型") content: str = Field(..., description="风险描述") highlights: Optional[List[str]] = Field(None, description="高亮信息") class ComplianceRisks(BaseModel): """合规风险""" warnings: List[ComplianceWarning] = Field(..., description="警告列表") class ApplicationScenarios(BaseModel): """应用场景""" description: str = Field(..., description="场景描述") class AssetComposition(BaseModel): """资产构成""" description: str = Field(..., description="构成描述") core_tables: List[str] = Field(..., description="核心表") class DataAsset(BaseModel): """数据资产""" id: str = Field(..., description="资产ID") title: str = Field(..., description="资产标题") subtitle: str = Field(..., description="副标题") composition: AssetComposition = Field(..., description="资产构成") application_scenarios: ApplicationScenarios = Field(..., description="应用场景") compliance_risks: ComplianceRisks = Field(..., description="合规风险") class Section3Overview(BaseModel): """章节三概述""" asset_count: int = Field(..., ge=0, description="资产数量") high_value_assets: List[str] = Field(..., description="高价值资产") description: str = Field(..., description="概述描述") class Section3(BaseModel): """章节三:数据资产情况盘点""" overview: Section3Overview = Field(..., description="概述") assets: List[DataAsset] = Field(..., description="数据资产列表") class ComplianceRemediationItem(BaseModel): """合规整改项""" order: int = Field(..., ge=1, description="序号") category: str = Field(..., description="分类") description: str = Field(..., description="详细建议") code_references: Optional[List[str]] = Field(None, description="代码引用") class ComplianceRemediation(BaseModel): """合规整改""" title: str = Field(..., description="标题") items: List[ComplianceRemediationItem] = Field(..., description="整改项列表") class TechnicalEvolution(BaseModel): """技术演进""" title: str = Field(..., description="标题") description: str = Field(..., description="描述") technologies: Optional[List[str]] = Field(None, description="推荐技术") class ValueDeepeningItem(BaseModel): """价值深化项""" description: str = Field(..., description="建议描述") scenarios: Optional[List[str]] = Field(None, description="相关场景") class ValueDeepening(BaseModel): """价值深化""" title: str = Field(..., description="标题") items: List[ValueDeepeningItem] = Field(..., description="深化项列表") class Section4(BaseModel): """章节四:专家建议与下一步计划""" compliance_remediation: ComplianceRemediation = Field(..., description="合规整改") technical_evolution: TechnicalEvolution = Field(..., description="技术演进") value_deepening: ValueDeepening = Field(..., description="价值深化") class GenerateReportResponse(BaseModel): """报告生成响应""" header: ReportHeader = Field(..., description="报告头部") section1: Section1 = Field(..., description="章节一") section2: Section2 = Field(..., description="章节二") section3: Section3 = Field(..., description="章节三") section4: Section4 = Field(..., description="章节四") generation_time: float = Field(..., description="生成耗时(秒)") model_used: str = Field(..., description="使用的大模型")