增强调试信息，添加对AI返回结果和字段映射的打印，改进字段名清理逻辑以避免空字段名的处理错误，确保数据提取的准确性和完整性。

2025-12-09 12:45:11 +08:00 · 2025-12-09 12:45:11 +08:00 · b8d89c28ec
commit b8d89c28ec
parent e1d8d27dc4
2 changed files with 52 additions and 14 deletions
--- a/app.py
+++ b/app.py
@ -254,6 +254,12 @@ def extract():
        if not ai_result:
            return error_response(2002, "AI解析失败，请检查输入文本质量")
        # 调试：打印AI返回的结果
        print(f"[API] AI返回结果包含 {len(ai_result)} 个字段")
        for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
            if key in ai_result:
                print(f"[API] AI返回 {key} = '{ai_result[key]}'")
        # 构建返回数据（按照outputData中的字段顺序返回）
        out_data = []
        # 创建一个字段编码到字段信息的映射
@ -264,6 +270,9 @@ def extract():
        # 默认值信息在文档中说明，由前端根据业务需求决定是否应用
        for field_code in output_field_codes:
            field_value = ai_result.get(field_code, '')
            # 调试：打印关键字段的映射
            if field_code in ['target_name', 'target_gender', 'target_age']:
                print(f"[API] 构建返回数据: {field_code} = '{field_value}' (从ai_result获取)")
            out_data.append({
                'fieldCode': field_code,
                'fieldValue': field_value
--- a/services/ai_service.py
+++ b/services/ai_service.py
@ -295,8 +295,16 @@ class AIService:
                # 规范化字段名并映射到正确的字段编码
                normalized_data = self._normalize_field_names(extracted_data, output_fields)
                print(f"[AI服务] 规范化后的字段名: {list(normalized_data.keys())}")
                # 打印关键字段的值用于调试
                for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
                    if key in normalized_data:
                        print(f"[AI服务] 规范化后 {key} = '{normalized_data[key]}'")
                # 规范化日期格式
                normalized_data = self._normalize_date_formats(normalized_data, output_fields)
                # 再次打印关键字段的值用于调试
                for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
                    if key in normalized_data:
                        print(f"[AI服务] 日期格式化后 {key} = '{normalized_data[key]}'")
                return normalized_data
            # 如果无法提取JSON，记录错误
@ -569,6 +577,11 @@ class AIService:
            if key.startswith('@'):
                continue
            # 跳过空字段名
            if not key or not key.strip():
                print(f"[AI服务] 跳过空字段名，值为: '{value}'")
                continue
            # 处理嵌套对象（如 description: {violationOfFamilyPlanningPolicies: "..."}）
            if isinstance(value, dict):
                # 尝试从嵌套对象中提取值
@ -583,6 +596,11 @@ class AIService:
            # 清理字段名：去掉前导点、空格等
            cleaned_key = key.strip().lstrip('.').rstrip()
            # 如果清理后字段名为空，跳过
            if not cleaned_key:
                print(f"[AI服务] 跳过清理后为空字段名，原始key: '{key}', 值为: '{value}'")
                continue
            # 尝试直接匹配
            if cleaned_key in name_to_code_map:
                correct_code = name_to_code_map[cleaned_key]
@ -622,23 +640,34 @@ class AIService:
            if not matched:
                # 如果找不到匹配，尝试模糊匹配
-                # 检查是否包含字段编码的关键部分
+                # 但跳过空字段名，避免错误匹配
-                for field_code in field_code_map.keys():
+                if cleaned_key:
-                    # 如果清理后的key包含字段编码的关键部分，或者字段编码包含key的关键部分
+                    # 检查是否包含字段编码的关键部分
-                    key_parts = cleaned_key.lower().replace('_', '').replace('-', '').replace('targets', 'target')
+                    for field_code in field_code_map.keys():
-                    code_parts = field_code.lower().replace('_', '').replace('-', '')
+                        # 如果清理后的key包含字段编码的关键部分，或者字段编码包含key的关键部分
                        key_parts = cleaned_key.lower().replace('_', '').replace('-', '').replace('targets', 'target')
                        code_parts = field_code.lower().replace('_', '').replace('-', '')
-                    # 检查相似度（简单匹配）
+                        # 检查相似度（简单匹配），但要求key_parts不为空
-                    if key_parts in code_parts or code_parts in key_parts:
+                        if key_parts and (key_parts in code_parts or code_parts in key_parts):
-                        normalized_data[field_code] = value
+                            # 如果该字段已经有值，且新值为空，则不覆盖
-                        matched = True
+                            if field_code in normalized_data and normalized_data[field_code] and not value:
-                        print(f"[AI服务] 模糊匹配: '{cleaned_key}' -> '{field_code}'")
+                                print(f"[AI服务] 跳过模糊匹配（已有非空值）: '{cleaned_key}' -> '{field_code}' (已有值: '{normalized_data[field_code]}')")
-                        break
+                                matched = True
                                break
                            normalized_data[field_code] = value
                            matched = True
                            print(f"[AI服务] 模糊匹配: '{cleaned_key}' -> '{field_code}'")
                            break
                if not matched:
                    # 如果仍然找不到匹配，保留原字段名（可能模型返回了意外的字段）
-                    print(f"[AI服务] 警告：无法匹配字段名 '{cleaned_key}'，保留原字段名")
+                    # 但跳过空字段名
-                    normalized_data[cleaned_key] = value
+                    if cleaned_key:
                        print(f"[AI服务] 警告：无法匹配字段名 '{cleaned_key}'，保留原字段名")
                        normalized_data[cleaned_key] = value
                    else:
                        print(f"[AI服务] 跳过空字段名，无法匹配")
        # 确保所有输出字段都有对应的值（即使为空字符串）
        for field_code in field_code_map.keys():