增强调试信息,添加对AI返回结果和字段映射的打印,改进字段名清理逻辑以避免空字段名的处理错误,确保数据提取的准确性和完整性。
This commit is contained in:
parent
e1d8d27dc4
commit
b8d89c28ec
9
app.py
9
app.py
@ -254,6 +254,12 @@ def extract():
|
||||
if not ai_result:
|
||||
return error_response(2002, "AI解析失败,请检查输入文本质量")
|
||||
|
||||
# 调试:打印AI返回的结果
|
||||
print(f"[API] AI返回结果包含 {len(ai_result)} 个字段")
|
||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||
if key in ai_result:
|
||||
print(f"[API] AI返回 {key} = '{ai_result[key]}'")
|
||||
|
||||
# 构建返回数据(按照outputData中的字段顺序返回)
|
||||
out_data = []
|
||||
# 创建一个字段编码到字段信息的映射
|
||||
@ -264,6 +270,9 @@ def extract():
|
||||
# 默认值信息在文档中说明,由前端根据业务需求决定是否应用
|
||||
for field_code in output_field_codes:
|
||||
field_value = ai_result.get(field_code, '')
|
||||
# 调试:打印关键字段的映射
|
||||
if field_code in ['target_name', 'target_gender', 'target_age']:
|
||||
print(f"[API] 构建返回数据: {field_code} = '{field_value}' (从ai_result获取)")
|
||||
out_data.append({
|
||||
'fieldCode': field_code,
|
||||
'fieldValue': field_value
|
||||
|
||||
@ -295,8 +295,16 @@ class AIService:
|
||||
# 规范化字段名并映射到正确的字段编码
|
||||
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
||||
print(f"[AI服务] 规范化后的字段名: {list(normalized_data.keys())}")
|
||||
# 打印关键字段的值用于调试
|
||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||
if key in normalized_data:
|
||||
print(f"[AI服务] 规范化后 {key} = '{normalized_data[key]}'")
|
||||
# 规范化日期格式
|
||||
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
||||
# 再次打印关键字段的值用于调试
|
||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||
if key in normalized_data:
|
||||
print(f"[AI服务] 日期格式化后 {key} = '{normalized_data[key]}'")
|
||||
return normalized_data
|
||||
|
||||
# 如果无法提取JSON,记录错误
|
||||
@ -569,6 +577,11 @@ class AIService:
|
||||
if key.startswith('@'):
|
||||
continue
|
||||
|
||||
# 跳过空字段名
|
||||
if not key or not key.strip():
|
||||
print(f"[AI服务] 跳过空字段名,值为: '{value}'")
|
||||
continue
|
||||
|
||||
# 处理嵌套对象(如 description: {violationOfFamilyPlanningPolicies: "..."})
|
||||
if isinstance(value, dict):
|
||||
# 尝试从嵌套对象中提取值
|
||||
@ -583,6 +596,11 @@ class AIService:
|
||||
# 清理字段名:去掉前导点、空格等
|
||||
cleaned_key = key.strip().lstrip('.').rstrip()
|
||||
|
||||
# 如果清理后字段名为空,跳过
|
||||
if not cleaned_key:
|
||||
print(f"[AI服务] 跳过清理后为空字段名,原始key: '{key}', 值为: '{value}'")
|
||||
continue
|
||||
|
||||
# 尝试直接匹配
|
||||
if cleaned_key in name_to_code_map:
|
||||
correct_code = name_to_code_map[cleaned_key]
|
||||
@ -622,14 +640,21 @@ class AIService:
|
||||
|
||||
if not matched:
|
||||
# 如果找不到匹配,尝试模糊匹配
|
||||
# 但跳过空字段名,避免错误匹配
|
||||
if cleaned_key:
|
||||
# 检查是否包含字段编码的关键部分
|
||||
for field_code in field_code_map.keys():
|
||||
# 如果清理后的key包含字段编码的关键部分,或者字段编码包含key的关键部分
|
||||
key_parts = cleaned_key.lower().replace('_', '').replace('-', '').replace('targets', 'target')
|
||||
code_parts = field_code.lower().replace('_', '').replace('-', '')
|
||||
|
||||
# 检查相似度(简单匹配)
|
||||
if key_parts in code_parts or code_parts in key_parts:
|
||||
# 检查相似度(简单匹配),但要求key_parts不为空
|
||||
if key_parts and (key_parts in code_parts or code_parts in key_parts):
|
||||
# 如果该字段已经有值,且新值为空,则不覆盖
|
||||
if field_code in normalized_data and normalized_data[field_code] and not value:
|
||||
print(f"[AI服务] 跳过模糊匹配(已有非空值): '{cleaned_key}' -> '{field_code}' (已有值: '{normalized_data[field_code]}')")
|
||||
matched = True
|
||||
break
|
||||
normalized_data[field_code] = value
|
||||
matched = True
|
||||
print(f"[AI服务] 模糊匹配: '{cleaned_key}' -> '{field_code}'")
|
||||
@ -637,8 +662,12 @@ class AIService:
|
||||
|
||||
if not matched:
|
||||
# 如果仍然找不到匹配,保留原字段名(可能模型返回了意外的字段)
|
||||
# 但跳过空字段名
|
||||
if cleaned_key:
|
||||
print(f"[AI服务] 警告:无法匹配字段名 '{cleaned_key}',保留原字段名")
|
||||
normalized_data[cleaned_key] = value
|
||||
else:
|
||||
print(f"[AI服务] 跳过空字段名,无法匹配")
|
||||
|
||||
# 确保所有输出字段都有对应的值(即使为空字符串)
|
||||
for field_code in field_code_map.keys():
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user