296 lines
11 KiB
Python
296 lines
11 KiB
Python
"""
|
|
数据资产盘点报告生成模块的数据模型
|
|
"""
|
|
from typing import Optional, List, Dict, Any
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
# ==================== 请求模型 ====================
|
|
|
|
class ProjectInfo(BaseModel):
|
|
"""项目信息"""
|
|
project_name: str = Field(..., description="项目名称")
|
|
industry: str = Field(..., description="行业类型")
|
|
company_name: Optional[str] = Field(None, description="企业名称")
|
|
|
|
|
|
class StorageDistributionItem(BaseModel):
|
|
"""存储分布项"""
|
|
category: str = Field(..., description="分类名称")
|
|
volume: str = Field(..., description="数据量")
|
|
storage_type: str = Field(..., description="存储类型描述")
|
|
color: str = Field(..., description="颜色标识")
|
|
|
|
|
|
class DataSourceStructure(BaseModel):
|
|
"""数据来源结构"""
|
|
structured: int = Field(..., ge=0, le=100, description="结构化数据百分比")
|
|
semi_structured: int = Field(..., ge=0, le=100, description="半结构化数据百分比")
|
|
|
|
|
|
class IdentifiedAsset(BaseModel):
|
|
"""识别的数据资产"""
|
|
name: str = Field(..., description="资产名称")
|
|
core_tables: List[str] = Field(..., description="核心表名列表")
|
|
description: str = Field(..., description="资产描述")
|
|
|
|
|
|
class InventoryData(BaseModel):
|
|
"""数据盘点结果"""
|
|
total_tables: int = Field(..., ge=0, description="总表数")
|
|
total_fields: int = Field(..., ge=0, description="总字段数")
|
|
total_data_volume: str = Field(..., description="总数据量")
|
|
storage_distribution: List[StorageDistributionItem] = Field(..., description="存储分布")
|
|
data_source_structure: DataSourceStructure = Field(..., description="数据来源结构")
|
|
identified_assets: List[IdentifiedAsset] = Field(..., description="识别的数据资产")
|
|
|
|
|
|
class ContextData(BaseModel):
|
|
"""背景调研信息"""
|
|
enterprise_background: str = Field(..., description="企业背景")
|
|
informatization_status: str = Field(..., description="信息化建设现状")
|
|
business_flow: str = Field(..., description="业务流与数据流")
|
|
|
|
|
|
class SelectedScenario(BaseModel):
|
|
"""选中的场景"""
|
|
name: str = Field(..., description="场景名称")
|
|
description: str = Field(..., description="场景描述")
|
|
|
|
|
|
class ValueData(BaseModel):
|
|
"""价值挖掘结果"""
|
|
selected_scenarios: List[SelectedScenario] = Field(..., description="选中的场景")
|
|
|
|
|
|
class GenerateReportOptions(BaseModel):
|
|
"""报告生成选项"""
|
|
language: str = Field("zh-CN", description="语言")
|
|
detail_level: str = Field("standard", description="详细程度")
|
|
generation_mode: str = Field("full", description="生成模式")
|
|
|
|
|
|
class GenerateReportRequest(BaseModel):
|
|
"""报告生成请求"""
|
|
project_info: ProjectInfo = Field(..., description="项目信息")
|
|
inventory_data: InventoryData = Field(..., description="数据盘点结果")
|
|
context_data: ContextData = Field(..., description="背景调研信息")
|
|
value_data: ValueData = Field(..., description="价值挖掘结果")
|
|
options: Optional[GenerateReportOptions] = Field(None, description="可选配置")
|
|
|
|
class Config:
|
|
json_schema_extra = {
|
|
"example": {
|
|
"project_info": {
|
|
"project_name": "数据资产盘点项目",
|
|
"industry": "retail-fresh",
|
|
"company_name": "某连锁生鲜零售企业"
|
|
},
|
|
"inventory_data": {
|
|
"total_tables": 14582,
|
|
"total_fields": 245000,
|
|
"total_data_volume": "58 PB",
|
|
"storage_distribution": [
|
|
{
|
|
"category": "供应链物流",
|
|
"volume": "25.4 PB",
|
|
"storage_type": "主要存储于 HDFS / NoSQL",
|
|
"color": "blue"
|
|
}
|
|
],
|
|
"data_source_structure": {
|
|
"structured": 35,
|
|
"semi_structured": 65
|
|
},
|
|
"identified_assets": [
|
|
{
|
|
"name": "消费者全景画像",
|
|
"core_tables": ["Dim_Customer", "Fact_Sales"],
|
|
"description": "核心依赖客户维度表与销售事实表"
|
|
}
|
|
]
|
|
},
|
|
"context_data": {
|
|
"enterprise_background": "某连锁生鲜零售企业...",
|
|
"informatization_status": "已建立基础IT系统...",
|
|
"business_flow": "采购-仓储-销售-配送..."
|
|
},
|
|
"value_data": {
|
|
"selected_scenarios": [
|
|
{
|
|
"name": "精准会员营销",
|
|
"description": "基于用户画像实现千人千面营销"
|
|
}
|
|
]
|
|
},
|
|
"options": {
|
|
"language": "zh-CN",
|
|
"detail_level": "standard",
|
|
"generation_mode": "full"
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
# ==================== 响应模型 ====================
|
|
|
|
class ReportHeader(BaseModel):
|
|
"""报告头部"""
|
|
project_name: str = Field(..., description="项目名称")
|
|
|
|
|
|
class EnterpriseBackground(BaseModel):
|
|
"""企业背景"""
|
|
description: str = Field(..., description="企业背景描述")
|
|
|
|
|
|
class PrivateCloudInfo(BaseModel):
|
|
"""私有云信息"""
|
|
title: str = Field(..., description="标题")
|
|
description: str = Field(..., description="描述")
|
|
|
|
|
|
class PublicCloudInfo(BaseModel):
|
|
"""公有云信息"""
|
|
title: str = Field(..., description="标题")
|
|
description: str = Field(..., description="描述")
|
|
|
|
|
|
class InformatizationStatus(BaseModel):
|
|
"""信息化建设现状"""
|
|
overview: str = Field(..., description="概述")
|
|
private_cloud: PrivateCloudInfo = Field(..., description="私有云信息")
|
|
public_cloud: PublicCloudInfo = Field(..., description="公有云信息")
|
|
|
|
|
|
class BusinessFlowItem(BaseModel):
|
|
"""业务流项"""
|
|
title: str = Field(..., description="标题")
|
|
description: str = Field(..., description="描述")
|
|
|
|
|
|
class BusinessDataFlow(BaseModel):
|
|
"""业务数据流"""
|
|
overview: str = Field(..., description="概述")
|
|
manufacturing: BusinessFlowItem = Field(..., description="制造")
|
|
logistics: BusinessFlowItem = Field(..., description="物流")
|
|
retail: BusinessFlowItem = Field(..., description="零售")
|
|
data_aggregation: BusinessFlowItem = Field(..., description="数据聚合")
|
|
|
|
|
|
class Section1(BaseModel):
|
|
"""章节一:企业数字化情况简介"""
|
|
enterprise_background: EnterpriseBackground = Field(..., description="企业背景")
|
|
informatization_status: InformatizationStatus = Field(..., description="信息化建设现状")
|
|
business_data_flow: BusinessDataFlow = Field(..., description="业务数据流")
|
|
|
|
|
|
class Summary(BaseModel):
|
|
"""数据资源摘要"""
|
|
total_data_volume: str = Field(..., description="数据总量")
|
|
total_data_objects: Dict[str, str] = Field(..., description="数据对象统计")
|
|
|
|
|
|
class Section2(BaseModel):
|
|
"""章节二:数据资源统计"""
|
|
summary: Summary = Field(..., description="摘要")
|
|
storage_distribution: List[StorageDistributionItem] = Field(..., description="存储分布")
|
|
data_source_structure: DataSourceStructure = Field(..., description="数据来源结构")
|
|
|
|
|
|
class ComplianceWarning(BaseModel):
|
|
"""合规警告"""
|
|
type: str = Field(..., description="风险类型")
|
|
content: str = Field(..., description="风险描述")
|
|
highlights: Optional[List[str]] = Field(None, description="高亮信息")
|
|
|
|
|
|
class ComplianceRisks(BaseModel):
|
|
"""合规风险"""
|
|
warnings: List[ComplianceWarning] = Field(..., description="警告列表")
|
|
|
|
|
|
class ApplicationScenarios(BaseModel):
|
|
"""应用场景"""
|
|
description: str = Field(..., description="场景描述")
|
|
|
|
|
|
class AssetComposition(BaseModel):
|
|
"""资产构成"""
|
|
description: str = Field(..., description="构成描述")
|
|
core_tables: List[str] = Field(..., description="核心表")
|
|
|
|
|
|
class DataAsset(BaseModel):
|
|
"""数据资产"""
|
|
id: str = Field(..., description="资产ID")
|
|
title: str = Field(..., description="资产标题")
|
|
subtitle: str = Field(..., description="副标题")
|
|
composition: AssetComposition = Field(..., description="资产构成")
|
|
application_scenarios: ApplicationScenarios = Field(..., description="应用场景")
|
|
compliance_risks: ComplianceRisks = Field(..., description="合规风险")
|
|
|
|
|
|
class Section3Overview(BaseModel):
|
|
"""章节三概述"""
|
|
asset_count: int = Field(..., ge=0, description="资产数量")
|
|
high_value_assets: List[str] = Field(..., description="高价值资产")
|
|
description: str = Field(..., description="概述描述")
|
|
|
|
|
|
class Section3(BaseModel):
|
|
"""章节三:数据资产情况盘点"""
|
|
overview: Section3Overview = Field(..., description="概述")
|
|
assets: List[DataAsset] = Field(..., description="数据资产列表")
|
|
|
|
|
|
class ComplianceRemediationItem(BaseModel):
|
|
"""合规整改项"""
|
|
order: int = Field(..., ge=1, description="序号")
|
|
category: str = Field(..., description="分类")
|
|
description: str = Field(..., description="详细建议")
|
|
code_references: Optional[List[str]] = Field(None, description="代码引用")
|
|
|
|
|
|
class ComplianceRemediation(BaseModel):
|
|
"""合规整改"""
|
|
title: str = Field(..., description="标题")
|
|
items: List[ComplianceRemediationItem] = Field(..., description="整改项列表")
|
|
|
|
|
|
class TechnicalEvolution(BaseModel):
|
|
"""技术演进"""
|
|
title: str = Field(..., description="标题")
|
|
description: str = Field(..., description="描述")
|
|
technologies: Optional[List[str]] = Field(None, description="推荐技术")
|
|
|
|
|
|
class ValueDeepeningItem(BaseModel):
|
|
"""价值深化项"""
|
|
description: str = Field(..., description="建议描述")
|
|
scenarios: Optional[List[str]] = Field(None, description="相关场景")
|
|
|
|
|
|
class ValueDeepening(BaseModel):
|
|
"""价值深化"""
|
|
title: str = Field(..., description="标题")
|
|
items: List[ValueDeepeningItem] = Field(..., description="深化项列表")
|
|
|
|
|
|
class Section4(BaseModel):
|
|
"""章节四:专家建议与下一步计划"""
|
|
compliance_remediation: ComplianceRemediation = Field(..., description="合规整改")
|
|
technical_evolution: TechnicalEvolution = Field(..., description="技术演进")
|
|
value_deepening: ValueDeepening = Field(..., description="价值深化")
|
|
|
|
|
|
class GenerateReportResponse(BaseModel):
|
|
"""报告生成响应"""
|
|
header: ReportHeader = Field(..., description="报告头部")
|
|
section1: Section1 = Field(..., description="章节一")
|
|
section2: Section2 = Field(..., description="章节二")
|
|
section3: Section3 = Field(..., description="章节三")
|
|
section4: Section4 = Field(..., description="章节四")
|
|
generation_time: float = Field(..., description="生成耗时(秒)")
|
|
model_used: str = Field(..., description="使用的大模型")
|