增强调试信息,添加对AI返回结果和字段映射的打印,改进字段名清理逻辑以避免空字段名的处理错误,确保数据提取的准确性和完整性。
This commit is contained in:
parent
e1d8d27dc4
commit
b8d89c28ec
9
app.py
9
app.py
@ -254,6 +254,12 @@ def extract():
|
|||||||
if not ai_result:
|
if not ai_result:
|
||||||
return error_response(2002, "AI解析失败,请检查输入文本质量")
|
return error_response(2002, "AI解析失败,请检查输入文本质量")
|
||||||
|
|
||||||
|
# 调试:打印AI返回的结果
|
||||||
|
print(f"[API] AI返回结果包含 {len(ai_result)} 个字段")
|
||||||
|
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||||
|
if key in ai_result:
|
||||||
|
print(f"[API] AI返回 {key} = '{ai_result[key]}'")
|
||||||
|
|
||||||
# 构建返回数据(按照outputData中的字段顺序返回)
|
# 构建返回数据(按照outputData中的字段顺序返回)
|
||||||
out_data = []
|
out_data = []
|
||||||
# 创建一个字段编码到字段信息的映射
|
# 创建一个字段编码到字段信息的映射
|
||||||
@ -264,6 +270,9 @@ def extract():
|
|||||||
# 默认值信息在文档中说明,由前端根据业务需求决定是否应用
|
# 默认值信息在文档中说明,由前端根据业务需求决定是否应用
|
||||||
for field_code in output_field_codes:
|
for field_code in output_field_codes:
|
||||||
field_value = ai_result.get(field_code, '')
|
field_value = ai_result.get(field_code, '')
|
||||||
|
# 调试:打印关键字段的映射
|
||||||
|
if field_code in ['target_name', 'target_gender', 'target_age']:
|
||||||
|
print(f"[API] 构建返回数据: {field_code} = '{field_value}' (从ai_result获取)")
|
||||||
out_data.append({
|
out_data.append({
|
||||||
'fieldCode': field_code,
|
'fieldCode': field_code,
|
||||||
'fieldValue': field_value
|
'fieldValue': field_value
|
||||||
|
|||||||
@ -295,8 +295,16 @@ class AIService:
|
|||||||
# 规范化字段名并映射到正确的字段编码
|
# 规范化字段名并映射到正确的字段编码
|
||||||
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
||||||
print(f"[AI服务] 规范化后的字段名: {list(normalized_data.keys())}")
|
print(f"[AI服务] 规范化后的字段名: {list(normalized_data.keys())}")
|
||||||
|
# 打印关键字段的值用于调试
|
||||||
|
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||||
|
if key in normalized_data:
|
||||||
|
print(f"[AI服务] 规范化后 {key} = '{normalized_data[key]}'")
|
||||||
# 规范化日期格式
|
# 规范化日期格式
|
||||||
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
||||||
|
# 再次打印关键字段的值用于调试
|
||||||
|
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||||
|
if key in normalized_data:
|
||||||
|
print(f"[AI服务] 日期格式化后 {key} = '{normalized_data[key]}'")
|
||||||
return normalized_data
|
return normalized_data
|
||||||
|
|
||||||
# 如果无法提取JSON,记录错误
|
# 如果无法提取JSON,记录错误
|
||||||
@ -569,6 +577,11 @@ class AIService:
|
|||||||
if key.startswith('@'):
|
if key.startswith('@'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
|
# 跳过空字段名
|
||||||
|
if not key or not key.strip():
|
||||||
|
print(f"[AI服务] 跳过空字段名,值为: '{value}'")
|
||||||
|
continue
|
||||||
|
|
||||||
# 处理嵌套对象(如 description: {violationOfFamilyPlanningPolicies: "..."})
|
# 处理嵌套对象(如 description: {violationOfFamilyPlanningPolicies: "..."})
|
||||||
if isinstance(value, dict):
|
if isinstance(value, dict):
|
||||||
# 尝试从嵌套对象中提取值
|
# 尝试从嵌套对象中提取值
|
||||||
@ -583,6 +596,11 @@ class AIService:
|
|||||||
# 清理字段名:去掉前导点、空格等
|
# 清理字段名:去掉前导点、空格等
|
||||||
cleaned_key = key.strip().lstrip('.').rstrip()
|
cleaned_key = key.strip().lstrip('.').rstrip()
|
||||||
|
|
||||||
|
# 如果清理后字段名为空,跳过
|
||||||
|
if not cleaned_key:
|
||||||
|
print(f"[AI服务] 跳过清理后为空字段名,原始key: '{key}', 值为: '{value}'")
|
||||||
|
continue
|
||||||
|
|
||||||
# 尝试直接匹配
|
# 尝试直接匹配
|
||||||
if cleaned_key in name_to_code_map:
|
if cleaned_key in name_to_code_map:
|
||||||
correct_code = name_to_code_map[cleaned_key]
|
correct_code = name_to_code_map[cleaned_key]
|
||||||
@ -622,23 +640,34 @@ class AIService:
|
|||||||
|
|
||||||
if not matched:
|
if not matched:
|
||||||
# 如果找不到匹配,尝试模糊匹配
|
# 如果找不到匹配,尝试模糊匹配
|
||||||
# 检查是否包含字段编码的关键部分
|
# 但跳过空字段名,避免错误匹配
|
||||||
for field_code in field_code_map.keys():
|
if cleaned_key:
|
||||||
# 如果清理后的key包含字段编码的关键部分,或者字段编码包含key的关键部分
|
# 检查是否包含字段编码的关键部分
|
||||||
key_parts = cleaned_key.lower().replace('_', '').replace('-', '').replace('targets', 'target')
|
for field_code in field_code_map.keys():
|
||||||
code_parts = field_code.lower().replace('_', '').replace('-', '')
|
# 如果清理后的key包含字段编码的关键部分,或者字段编码包含key的关键部分
|
||||||
|
key_parts = cleaned_key.lower().replace('_', '').replace('-', '').replace('targets', 'target')
|
||||||
|
code_parts = field_code.lower().replace('_', '').replace('-', '')
|
||||||
|
|
||||||
# 检查相似度(简单匹配)
|
# 检查相似度(简单匹配),但要求key_parts不为空
|
||||||
if key_parts in code_parts or code_parts in key_parts:
|
if key_parts and (key_parts in code_parts or code_parts in key_parts):
|
||||||
normalized_data[field_code] = value
|
# 如果该字段已经有值,且新值为空,则不覆盖
|
||||||
matched = True
|
if field_code in normalized_data and normalized_data[field_code] and not value:
|
||||||
print(f"[AI服务] 模糊匹配: '{cleaned_key}' -> '{field_code}'")
|
print(f"[AI服务] 跳过模糊匹配(已有非空值): '{cleaned_key}' -> '{field_code}' (已有值: '{normalized_data[field_code]}')")
|
||||||
break
|
matched = True
|
||||||
|
break
|
||||||
|
normalized_data[field_code] = value
|
||||||
|
matched = True
|
||||||
|
print(f"[AI服务] 模糊匹配: '{cleaned_key}' -> '{field_code}'")
|
||||||
|
break
|
||||||
|
|
||||||
if not matched:
|
if not matched:
|
||||||
# 如果仍然找不到匹配,保留原字段名(可能模型返回了意外的字段)
|
# 如果仍然找不到匹配,保留原字段名(可能模型返回了意外的字段)
|
||||||
print(f"[AI服务] 警告:无法匹配字段名 '{cleaned_key}',保留原字段名")
|
# 但跳过空字段名
|
||||||
normalized_data[cleaned_key] = value
|
if cleaned_key:
|
||||||
|
print(f"[AI服务] 警告:无法匹配字段名 '{cleaned_key}',保留原字段名")
|
||||||
|
normalized_data[cleaned_key] = value
|
||||||
|
else:
|
||||||
|
print(f"[AI服务] 跳过空字段名,无法匹配")
|
||||||
|
|
||||||
# 确保所有输出字段都有对应的值(即使为空字符串)
|
# 确保所有输出字段都有对应的值(即使为空字符串)
|
||||||
for field_code in field_code_map.keys():
|
for field_code in field_code_map.keys():
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user