finyx_data_ai/tests/test_report_generation.py

"""
报告生成接口测试
"""
import pytest
from fastapi.testclient import TestClient
from unittest.mock import patch, AsyncMock
from app.main import app
from tests.test_report_generation_helper import (
    create_mock_llm_response_1_2,
    create_mock_llm_response_3,
    create_mock_llm_response_4
)

client = TestClient(app)


@pytest.fixture
def sample_request_data():
    """示例请求数据"""
    return {
        "project_info": {
            "project_name": "数据资产盘点项目",
            "industry": "retail-fresh",
            "company_name": "某连锁生鲜零售企业"
        },
        "inventory_data": {
            "total_tables": 50,
            "total_fields": 300,
            "total_data_volume": "100TB",
            "storage_distribution": [
                {
                    "category": "交易数据",
                    "volume": "50TB",
                    "storage_type": "MySQL",
                    "color": "blue"
                }
            ],
            "data_source_structure": {
                "structured": 70,
                "semi_structured": 30
            },
            "identified_assets": [
                {
                    "name": "会员基础信息表",
                    "core_tables": ["t_user_base_01"],
                    "description": "存储C端注册用户的核心身份信息"
                }
            ]
        },
        "context_data": {
            "enterprise_background": "某连锁生鲜零售企业，主营水果、蔬菜等生鲜产品",
            "informatization_status": "信息化建设处于中期阶段",
            "business_flow": "采购-仓储-销售-配送"
        },
        "value_data": {
            "selected_scenarios": [
                {
                    "name": "智能推荐系统",
                    "description": "基于用户历史行为推荐商品"
                }
            ]
        },
        "options": {
            "language": "zh-CN",
            "detail_level": "standard",
            "generation_mode": "full"
        }
    }


@pytest.fixture
def mock_llm_response():
    """模拟大模型响应"""
    return {
        "chapter1": """# 企业数字化情况简介

## 企业背景
某连锁生鲜零售企业，主营水果、蔬菜等生鲜产品，在全国拥有500家门店。

## 信息化建设现状
企业已建立完善的信息化系统，包括ERP系统、会员系统、供应链管理系统等，实现了业务流程的数字化。

## 业务流与数据流
业务流程：采购-仓储-销售-配送
数据流程：业务系统数据实时同步到数据仓库，支持决策分析。""",
        "chapter2": """# 数据资源统计

## 数据总量统计
企业累计数据总量约100TB，包括交易数据、会员数据、供应链数据等。

## 存储分布分析
数据主要存储在MySQL数据库和Hadoop数据仓库中，其中交易数据占比60%。

## 数据来源结构
数据来源包括：交易系统（50%）、会员系统（30%）、供应链系统（20%）。""",
        "chapter3": """# 数据资产情况盘点

## 资产构成分析
企业共识别出50张核心数据表，涵盖会员、交易、供应链等业务领域。

## 应用场景描述
已应用场景包括会员画像分析、销售预测、库存优化等。

## 合规风险提示
发现部分数据表包含敏感信息（手机号、身份证号），需加强数据安全管理，符合PIPL要求。""",
        "chapter4": """# 专家建议与下一步计划

## 合规整改建议
1. 建立数据分类分级制度
2. 加强敏感数据加密存储
3. 完善数据访问权限控制

## 技术演进建议
1. 引入实时数据处理技术
2. 构建数据中台，提升数据共享能力
3. 探索AI技术应用，提升智能化水平

## 价值深化建议
1. 拓展数据应用场景，提升数据价值
2. 建立数据运营体系，持续优化数据质量
3. 加强数据人才培养，提升数据能力。"""
    }


@pytest.mark.asyncio
async def test_report_generation_success(sample_request_data, mock_llm_response):
    """测试报告生成成功"""
    import json
    with patch('app.services.report_generation_service.llm_client.call') as mock_call:
        # 模拟大模型返回 JSON 字符串（报告生成会调用多次，每次返回不同的章节）
        # 第一次调用返回章节1和2（section1 和 section2）
        # 第二次调用返回章节3（section3）
        # 第三次调用返回章节4（section4）
        response_1_2_data = {
            "section1": {"chapter1": mock_llm_response["chapter1"]},
            "section2": {
                "chapter2": mock_llm_response["chapter2"],
                "data_source_structure": {
                    "structured": 70,
                    "semi_structured": 30
                }
            }
        }
        response_3_data = {
            "section3": {
                "chapter3": mock_llm_response["chapter3"],
                "assets": [{
                    "title": "会员基础信息表",
                    "compliance_risks": {
                        "warnings": ["测试警告"]
                    }
                }]
            }
        }
        response_4_data = {
            "section4": {"chapter4": mock_llm_response["chapter4"]}
        }

        mock_call.side_effect = [
            create_mock_llm_response_1_2(70, 30),
            create_mock_llm_response_3(),
            create_mock_llm_response_4()
        ]

        response = client.post(
            "/api/v1/delivery/generate-report",
            json=sample_request_data
        )

        assert response.status_code == 200
        data = response.json()
        assert data["success"] is True
        assert data["code"] == 200
        assert "data" in data
        # 验证响应包含所有必需的字段
        assert "section1" in data["data"]
        assert "section2" in data["data"]
        assert "section3" in data["data"]
        assert "section4" in data["data"]
        assert "generation_time" in data["data"]
        assert "model_used" in data["data"]


def test_report_generation_request_validation():
    """测试请求验证"""
    # 测试缺少必需字段
    invalid_request = {
        "project_id": "project_001"
    }

    response = client.post(
        "/api/v1/delivery/generate-report",
        json=invalid_request
    )

    assert response.status_code == 422  # 验证错误


def test_report_generation_empty_inventory():
    """测试空数据资产"""
    request_data = {
        "project_info": {
            "project_name": "数据资产盘点项目",
            "industry": "retail-fresh",
            "company_name": "某连锁生鲜零售企业"
        },
        "inventory_data": {
            "total_tables": 0,
            "total_fields": 0,
            "total_data_volume": "0TB",
            "storage_distribution": [],
            "data_source_structure": {
                "structured": 50,
                "semi_structured": 50
            },
            "identified_assets": []
        },
        "context_data": {
            "enterprise_background": "某连锁生鲜零售企业",
            "informatization_status": "信息化建设处于初期阶段",
            "business_flow": "采购-仓储-销售-配送"
        },
        "value_data": {
            "selected_scenarios": []
        }
    }

    with patch('app.services.report_generation_service.llm_client.call') as mock_call:
        mock_call.side_effect = [
            create_mock_llm_response_1_2(50, 50),
            create_mock_llm_response_3(),
            create_mock_llm_response_4()
        ]

        response = client.post(
            "/api/v1/delivery/generate-report",
            json=request_data
        )

        # 应该返回 200
        assert response.status_code == 200


def test_report_generation_with_options():
    """测试带选项的请求"""
    import json
    request_data = {
        "project_info": {
            "project_name": "数据资产盘点项目",
            "industry": "retail-fresh",
            "company_name": "某连锁生鲜零售企业"
        },
        "inventory_data": {
            "total_tables": 10,
            "total_fields": 50,
            "total_data_volume": "10TB",
            "storage_distribution": [],
            "data_source_structure": {
                "structured": 80,
                "semi_structured": 20
            },
            "identified_assets": []
        },
        "context_data": {
            "enterprise_background": "某连锁生鲜零售企业",
            "informatization_status": "信息化建设处于中期阶段",
            "business_flow": "采购-仓储-销售-配送"
        },
        "value_data": {
            "selected_scenarios": []
        },
        "options": {
            "language": "zh-CN",
            "detail_level": "detailed",
            "generation_mode": "full"
        }
    }

    with patch('app.services.report_generation_service.llm_client.call') as mock_call:
        mock_call.side_effect = [
            create_mock_llm_response_1_2(80, 20),
            create_mock_llm_response_3(),
            create_mock_llm_response_4()
        ]

        response = client.post(
            "/api/v1/delivery/generate-report",
            json=request_data
        )

        assert response.status_code == 200


def test_report_generation_chapter_structure():
    """测试报告章节结构"""
    request_data = {
        "project_info": {
            "project_name": "数据资产盘点项目",
            "industry": "retail-fresh",
            "company_name": "某连锁生鲜零售企业"
        },
        "inventory_data": {
            "total_tables": 10,
            "total_fields": 50,
            "total_data_volume": "10TB",
            "storage_distribution": [],
            "data_source_structure": {
                "structured": 80,
                "semi_structured": 20
            },
            "identified_assets": []
        },
        "context_data": {
            "enterprise_background": "某连锁生鲜零售企业",
            "informatization_status": "信息化建设处于中期阶段",
            "business_flow": "采购-仓储-销售-配送"
        },
        "value_data": {
            "selected_scenarios": []
        }
    }

    with patch('app.services.report_generation_service.llm_client.call') as mock_call:
        # 模拟多次调用（需要包含正确的数据结构）
        mock_call.side_effect = [
            create_mock_llm_response_1_2(80, 20),
            create_mock_llm_response_3(),
            create_mock_llm_response_4()
        ]

        response = client.post(
            "/api/v1/delivery/generate-report",
            json=request_data
        )

        assert response.status_code == 200
        data = response.json()
        report_data = data["data"]

        # 验证报告包含四个章节（section1-4）
        assert "section1" in report_data
        assert "section2" in report_data
        assert "section3" in report_data
        assert "section4" in report_data


if __name__ == "__main__":
    pytest.main([__file__, "-v"])