优化AI服务的内容提取逻辑,增强对API返回结果的处理能力,改进JSON解析和错误处理机制,确保在提取数据失败时能够返回空结果而不抛出异常,同时记录详细的调试信息以提高容错性和可维护性。
This commit is contained in:
parent
315301fc0b
commit
9bf1dd1210
Binary file not shown.
@ -309,165 +309,179 @@ class AIService:
|
||||
raise Exception(error_message)
|
||||
|
||||
result = response.json()
|
||||
|
||||
# 提取AI返回的内容
|
||||
if 'choices' in result and len(result['choices']) > 0:
|
||||
raw_content = result['choices'][0]['message']['content']
|
||||
|
||||
# 调试:打印原始返回内容(前500字符)
|
||||
print(f"[AI服务] API返回的原始内容(前500字符): {raw_content[:500]}")
|
||||
|
||||
# 处理思考过程标签(支持多种可能的标签格式)
|
||||
content = raw_content
|
||||
|
||||
# 处理 </think> 标签(DeepSeek-R1常用格式)
|
||||
if '</think>' in content:
|
||||
parts = content.split('</think>')
|
||||
if len(parts) > 1:
|
||||
content = parts[-1].strip()
|
||||
print(f"[AI服务] 检测到 </think> 标签,提取标签后的内容")
|
||||
|
||||
# 处理 </think> 标签
|
||||
elif '</think>' in content:
|
||||
parts = content.split('</think>')
|
||||
if len(parts) > 1:
|
||||
content = parts[-1].strip()
|
||||
print(f"[AI服务] 检测到 </think> 标签,提取标签后的内容")
|
||||
|
||||
# 处理 <reasoning>...</reasoning> 标签
|
||||
elif '<reasoning>' in content and '</reasoning>' in content:
|
||||
reasoning_start = content.find('</reasoning>')
|
||||
if reasoning_start != -1:
|
||||
content = content[reasoning_start + 11:].strip()
|
||||
print(f"[AI服务] 检测到 <reasoning> 标签,提取标签后的内容")
|
||||
|
||||
# 清理后的内容(前500字符)
|
||||
print(f"[AI服务] 清理后的内容(前500字符): {content[:500]}")
|
||||
|
||||
# 尝试解析JSON(使用增强的修复机制)
|
||||
extracted_data = self._extract_json_from_text(content)
|
||||
if extracted_data:
|
||||
print(f"[AI服务] JSON解析成功,提取到 {len(extracted_data)} 个字段")
|
||||
print(f"[AI服务] 原始字段名: {list(extracted_data.keys())}")
|
||||
# 规范化字段名并映射到正确的字段编码
|
||||
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
||||
print(f"[AI服务] 规范化后的字段名: {list(normalized_data.keys())}")
|
||||
# 打印关键字段的值用于调试
|
||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||
if key in normalized_data:
|
||||
print(f"[AI服务] 规范化后 {key} = '{normalized_data[key]}'")
|
||||
# 规范化日期格式
|
||||
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
||||
# 再次打印关键字段的值用于调试
|
||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||
if key in normalized_data:
|
||||
print(f"[AI服务] 日期格式化后 {key} = '{normalized_data[key]}'")
|
||||
# 后处理:从已有信息推断缺失字段
|
||||
normalized_data = self._post_process_inferred_fields(normalized_data, output_fields)
|
||||
# 打印后处理后的关键字段
|
||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth', 'target_organization', 'target_position']:
|
||||
if key in normalized_data:
|
||||
print(f"[AI服务] 后处理后 {key} = '{normalized_data[key]}'")
|
||||
# 即使提取的字段不完整,也返回结果(更宽容的处理)
|
||||
if any(v for v in normalized_data.values() if v): # 至少有一个非空字段
|
||||
print(f"[AI服务] 返回提取的数据(包含 {sum(1 for v in normalized_data.values() if v)} 个非空字段)")
|
||||
# 记录成功的对话
|
||||
# 提取AI返回的内容
|
||||
if 'choices' in result and len(result['choices']) > 0:
|
||||
raw_content = result['choices'][0]['message']['content']
|
||||
|
||||
# 调试:打印原始返回内容(前500字符)
|
||||
print(f"[AI服务] API返回的原始内容(前500字符): {raw_content[:500]}")
|
||||
|
||||
# 处理思考过程标签(支持多种可能的标签格式)
|
||||
content = raw_content
|
||||
|
||||
# 处理 </think> 标签(DeepSeek-R1常用格式)
|
||||
if '</think>' in content:
|
||||
parts = content.split('</think>')
|
||||
if len(parts) > 1:
|
||||
content = parts[-1].strip()
|
||||
print(f"[AI服务] 检测到 </think> 标签,提取标签后的内容")
|
||||
|
||||
# 处理 </think> 标签
|
||||
elif '</think>' in content:
|
||||
parts = content.split('</think>')
|
||||
if len(parts) > 1:
|
||||
content = parts[-1].strip()
|
||||
print(f"[AI服务] 检测到 </think> 标签,提取标签后的内容")
|
||||
|
||||
# 处理 <reasoning>...</reasoning> 标签
|
||||
elif '<reasoning>' in content and '</reasoning>' in content:
|
||||
reasoning_start = content.find('</reasoning>')
|
||||
if reasoning_start != -1:
|
||||
content = content[reasoning_start + 11:].strip()
|
||||
print(f"[AI服务] 检测到 <reasoning> 标签,提取标签后的内容")
|
||||
|
||||
# 清理后的内容(前500字符)
|
||||
print(f"[AI服务] 清理后的内容(前500字符): {content[:500]}")
|
||||
|
||||
# 尝试解析JSON(使用增强的修复机制)
|
||||
extracted_data = self._extract_json_from_text(content)
|
||||
if extracted_data:
|
||||
print(f"[AI服务] JSON解析成功,提取到 {len(extracted_data)} 个字段")
|
||||
print(f"[AI服务] 原始字段名: {list(extracted_data.keys())}")
|
||||
# 规范化字段名并映射到正确的字段编码
|
||||
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
||||
print(f"[AI服务] 规范化后的字段名: {list(normalized_data.keys())}")
|
||||
# 打印关键字段的值用于调试
|
||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||
if key in normalized_data:
|
||||
print(f"[AI服务] 规范化后 {key} = '{normalized_data[key]}'")
|
||||
# 规范化日期格式
|
||||
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
||||
# 再次打印关键字段的值用于调试
|
||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||
if key in normalized_data:
|
||||
print(f"[AI服务] 日期格式化后 {key} = '{normalized_data[key]}'")
|
||||
# 后处理:从已有信息推断缺失字段
|
||||
normalized_data = self._post_process_inferred_fields(normalized_data, output_fields)
|
||||
# 打印后处理后的关键字段
|
||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth', 'target_organization', 'target_position']:
|
||||
if key in normalized_data:
|
||||
print(f"[AI服务] 后处理后 {key} = '{normalized_data[key]}'")
|
||||
# 即使提取的字段不完整,也返回结果(更宽容的处理)
|
||||
if any(v for v in normalized_data.values() if v): # 至少有一个非空字段
|
||||
print(f"[AI服务] 返回提取的数据(包含 {sum(1 for v in normalized_data.values() if v)} 个非空字段)")
|
||||
# 记录成功的对话
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
api_request=api_request_info,
|
||||
api_response=result,
|
||||
extracted_data=normalized_data,
|
||||
error=None,
|
||||
session_id=session_id
|
||||
)
|
||||
return normalized_data
|
||||
else:
|
||||
print(f"[AI服务] 警告:提取的数据全部为空,但继续返回(允许部分字段为空)")
|
||||
# 记录对话(即使数据为空)
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
api_request=api_request_info,
|
||||
api_response=result,
|
||||
extracted_data=normalized_data,
|
||||
error="提取的数据全部为空",
|
||||
session_id=session_id
|
||||
)
|
||||
return normalized_data
|
||||
|
||||
# 如果无法提取JSON,记录错误但尝试更宽容的处理
|
||||
print(f"[AI服务] 警告:无法从内容中提取完整JSON,尝试备用解析方法")
|
||||
print(f"[AI服务] 清理后的内容(前500字符): {content[:500]}")
|
||||
|
||||
# 尝试从文本中提取
|
||||
parsed_data = self._parse_text_response(content, output_fields)
|
||||
if parsed_data and any(v for v in parsed_data.values() if v): # 至少有一个非空字段
|
||||
print(f"[AI服务] 使用备用方法解析成功,提取到 {len(parsed_data)} 个字段")
|
||||
# 记录对话
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
api_request=api_request_info,
|
||||
api_response=result,
|
||||
extracted_data=normalized_data,
|
||||
extracted_data=parsed_data,
|
||||
error=None,
|
||||
session_id=session_id
|
||||
)
|
||||
return normalized_data
|
||||
else:
|
||||
print(f"[AI服务] 警告:提取的数据全部为空,但继续返回(允许部分字段为空)")
|
||||
# 记录对话(即使数据为空)
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
api_request=api_request_info,
|
||||
api_response=result,
|
||||
extracted_data=normalized_data,
|
||||
error="提取的数据全部为空",
|
||||
session_id=session_id
|
||||
)
|
||||
return normalized_data
|
||||
|
||||
# 如果无法提取JSON,记录错误但尝试更宽容的处理
|
||||
print(f"[AI服务] 警告:无法从内容中提取完整JSON,尝试备用解析方法")
|
||||
print(f"[AI服务] 清理后的内容(前500字符): {content[:500]}")
|
||||
|
||||
# 尝试从文本中提取
|
||||
parsed_data = self._parse_text_response(content, output_fields)
|
||||
if parsed_data and any(v for v in parsed_data.values() if v): # 至少有一个非空字段
|
||||
print(f"[AI服务] 使用备用方法解析成功,提取到 {len(parsed_data)} 个字段")
|
||||
# 记录对话
|
||||
return parsed_data
|
||||
|
||||
# 如果所有方法都失败,尝试最后一次修复尝试
|
||||
print(f"[AI服务] 所有解析方法都失败,尝试最后一次修复...")
|
||||
# 尝试使用jsonrepair(如果可用)进行最后修复
|
||||
if JSONREPAIR_AVAILABLE:
|
||||
try:
|
||||
repaired_content = repair_json(content)
|
||||
if repaired_content:
|
||||
try:
|
||||
extracted_data = json.loads(repaired_content)
|
||||
if extracted_data and isinstance(extracted_data, dict):
|
||||
print(f"[AI服务] 使用jsonrepair最后修复成功,提取到 {len(extracted_data)} 个字段")
|
||||
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
||||
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
||||
normalized_data = self._post_process_inferred_fields(normalized_data, output_fields)
|
||||
# 记录对话
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
api_request=api_request_info,
|
||||
api_response=result,
|
||||
extracted_data=normalized_data,
|
||||
error=None,
|
||||
session_id=session_id
|
||||
)
|
||||
return normalized_data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"[AI服务] jsonrepair最后修复也失败: {e}")
|
||||
|
||||
# 如果所有方法都失败,返回空字典而不是抛出异常(更宽容)
|
||||
# 这样至少不会导致整个调用失败,前端可以显示部分结果
|
||||
error_msg = f"无法从API返回内容中提取JSON数据。原始内容长度: {len(raw_content)}, 清理后内容长度: {len(content)}"
|
||||
print(f"[AI服务] 警告:{error_msg}")
|
||||
print(f"[AI服务] 完整内容: {content}")
|
||||
# 返回一个包含所有输出字段的空字典,而不是抛出异常
|
||||
empty_result = {field['field_code']: '' for field in output_fields}
|
||||
print(f"[AI服务] 返回空结果(包含 {len(empty_result)} 个字段,全部为空)")
|
||||
# 记录失败的对话
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
api_request=api_request_info,
|
||||
api_response=result,
|
||||
extracted_data=parsed_data,
|
||||
error=None,
|
||||
extracted_data=empty_result,
|
||||
error=error_msg,
|
||||
session_id=session_id
|
||||
)
|
||||
return parsed_data
|
||||
|
||||
# 如果所有方法都失败,尝试最后一次修复尝试
|
||||
print(f"[AI服务] 所有解析方法都失败,尝试最后一次修复...")
|
||||
# 尝试使用jsonrepair(如果可用)进行最后修复
|
||||
if JSONREPAIR_AVAILABLE:
|
||||
try:
|
||||
repaired_content = repair_json(content)
|
||||
if repaired_content:
|
||||
try:
|
||||
extracted_data = json.loads(repaired_content)
|
||||
if extracted_data and isinstance(extracted_data, dict):
|
||||
print(f"[AI服务] 使用jsonrepair最后修复成功,提取到 {len(extracted_data)} 个字段")
|
||||
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
||||
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
||||
normalized_data = self._post_process_inferred_fields(normalized_data, output_fields)
|
||||
# 记录对话
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
api_request=api_request_info,
|
||||
api_response=result,
|
||||
extracted_data=normalized_data,
|
||||
error=None,
|
||||
session_id=session_id
|
||||
)
|
||||
return normalized_data
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
except Exception as e:
|
||||
print(f"[AI服务] jsonrepair最后修复也失败: {e}")
|
||||
|
||||
# 如果所有方法都失败,返回空字典而不是抛出异常(更宽容)
|
||||
# 这样至少不会导致整个调用失败,前端可以显示部分结果
|
||||
error_msg = f"无法从API返回内容中提取JSON数据。原始内容长度: {len(raw_content)}, 清理后内容长度: {len(content)}"
|
||||
print(f"[AI服务] 警告:{error_msg}")
|
||||
print(f"[AI服务] 完整内容: {content}")
|
||||
# 返回一个包含所有输出字段的空字典,而不是抛出异常
|
||||
empty_result = {field['field_code']: '' for field in output_fields}
|
||||
print(f"[AI服务] 返回空结果(包含 {len(empty_result)} 个字段,全部为空)")
|
||||
# 记录失败的对话
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
api_request=api_request_info,
|
||||
api_response=result,
|
||||
extracted_data=empty_result,
|
||||
error=error_msg,
|
||||
session_id=session_id
|
||||
)
|
||||
return empty_result
|
||||
else:
|
||||
error_msg = "API返回格式异常:未找到choices字段或choices为空"
|
||||
# 记录错误
|
||||
return empty_result
|
||||
else:
|
||||
error_msg = "API返回格式异常:未找到choices字段或choices为空"
|
||||
# 记录错误
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
api_request=api_request_info,
|
||||
api_response=result,
|
||||
extracted_data=None,
|
||||
error=error_msg,
|
||||
session_id=session_id
|
||||
)
|
||||
raise Exception(error_msg)
|
||||
|
||||
except Exception as e:
|
||||
# 如果发生异常,记录错误日志
|
||||
error_msg = str(e)
|
||||
if self.ai_logger:
|
||||
self.ai_logger.log_conversation(
|
||||
prompt=prompt,
|
||||
@ -477,7 +491,8 @@ class AIService:
|
||||
error=error_msg,
|
||||
session_id=session_id
|
||||
)
|
||||
raise Exception(error_msg)
|
||||
# 重新抛出异常,让上层处理
|
||||
raise
|
||||
|
||||
def _extract_json_from_text(self, text: str) -> Optional[Dict]:
|
||||
"""
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user