优化AI服务的内容提取逻辑,增强对API返回结果的处理能力,改进JSON解析和错误处理机制,确保在提取数据失败时能够返回空结果而不抛出异常,同时记录详细的调试信息以提高容错性和可维护性。
This commit is contained in:
parent
315301fc0b
commit
9bf1dd1210
Binary file not shown.
@ -309,165 +309,179 @@ class AIService:
|
|||||||
raise Exception(error_message)
|
raise Exception(error_message)
|
||||||
|
|
||||||
result = response.json()
|
result = response.json()
|
||||||
|
|
||||||
# 提取AI返回的内容
|
|
||||||
if 'choices' in result and len(result['choices']) > 0:
|
|
||||||
raw_content = result['choices'][0]['message']['content']
|
|
||||||
|
|
||||||
# 调试:打印原始返回内容(前500字符)
|
# 提取AI返回的内容
|
||||||
print(f"[AI服务] API返回的原始内容(前500字符): {raw_content[:500]}")
|
if 'choices' in result and len(result['choices']) > 0:
|
||||||
|
raw_content = result['choices'][0]['message']['content']
|
||||||
# 处理思考过程标签(支持多种可能的标签格式)
|
|
||||||
content = raw_content
|
# 调试:打印原始返回内容(前500字符)
|
||||||
|
print(f"[AI服务] API返回的原始内容(前500字符): {raw_content[:500]}")
|
||||||
# 处理 </think> 标签(DeepSeek-R1常用格式)
|
|
||||||
if '</think>' in content:
|
# 处理思考过程标签(支持多种可能的标签格式)
|
||||||
parts = content.split('</think>')
|
content = raw_content
|
||||||
if len(parts) > 1:
|
|
||||||
content = parts[-1].strip()
|
# 处理 </think> 标签(DeepSeek-R1常用格式)
|
||||||
print(f"[AI服务] 检测到 </think> 标签,提取标签后的内容")
|
if '</think>' in content:
|
||||||
|
parts = content.split('</think>')
|
||||||
# 处理 </think> 标签
|
if len(parts) > 1:
|
||||||
elif '</think>' in content:
|
content = parts[-1].strip()
|
||||||
parts = content.split('</think>')
|
print(f"[AI服务] 检测到 </think> 标签,提取标签后的内容")
|
||||||
if len(parts) > 1:
|
|
||||||
content = parts[-1].strip()
|
# 处理 </think> 标签
|
||||||
print(f"[AI服务] 检测到 </think> 标签,提取标签后的内容")
|
elif '</think>' in content:
|
||||||
|
parts = content.split('</think>')
|
||||||
# 处理 <reasoning>...</reasoning> 标签
|
if len(parts) > 1:
|
||||||
elif '<reasoning>' in content and '</reasoning>' in content:
|
content = parts[-1].strip()
|
||||||
reasoning_start = content.find('</reasoning>')
|
print(f"[AI服务] 检测到 </think> 标签,提取标签后的内容")
|
||||||
if reasoning_start != -1:
|
|
||||||
content = content[reasoning_start + 11:].strip()
|
# 处理 <reasoning>...</reasoning> 标签
|
||||||
print(f"[AI服务] 检测到 <reasoning> 标签,提取标签后的内容")
|
elif '<reasoning>' in content and '</reasoning>' in content:
|
||||||
|
reasoning_start = content.find('</reasoning>')
|
||||||
# 清理后的内容(前500字符)
|
if reasoning_start != -1:
|
||||||
print(f"[AI服务] 清理后的内容(前500字符): {content[:500]}")
|
content = content[reasoning_start + 11:].strip()
|
||||||
|
print(f"[AI服务] 检测到 <reasoning> 标签,提取标签后的内容")
|
||||||
# 尝试解析JSON(使用增强的修复机制)
|
|
||||||
extracted_data = self._extract_json_from_text(content)
|
# 清理后的内容(前500字符)
|
||||||
if extracted_data:
|
print(f"[AI服务] 清理后的内容(前500字符): {content[:500]}")
|
||||||
print(f"[AI服务] JSON解析成功,提取到 {len(extracted_data)} 个字段")
|
|
||||||
print(f"[AI服务] 原始字段名: {list(extracted_data.keys())}")
|
# 尝试解析JSON(使用增强的修复机制)
|
||||||
# 规范化字段名并映射到正确的字段编码
|
extracted_data = self._extract_json_from_text(content)
|
||||||
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
if extracted_data:
|
||||||
print(f"[AI服务] 规范化后的字段名: {list(normalized_data.keys())}")
|
print(f"[AI服务] JSON解析成功,提取到 {len(extracted_data)} 个字段")
|
||||||
# 打印关键字段的值用于调试
|
print(f"[AI服务] 原始字段名: {list(extracted_data.keys())}")
|
||||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
# 规范化字段名并映射到正确的字段编码
|
||||||
if key in normalized_data:
|
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
||||||
print(f"[AI服务] 规范化后 {key} = '{normalized_data[key]}'")
|
print(f"[AI服务] 规范化后的字段名: {list(normalized_data.keys())}")
|
||||||
# 规范化日期格式
|
# 打印关键字段的值用于调试
|
||||||
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||||
# 再次打印关键字段的值用于调试
|
if key in normalized_data:
|
||||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
print(f"[AI服务] 规范化后 {key} = '{normalized_data[key]}'")
|
||||||
if key in normalized_data:
|
# 规范化日期格式
|
||||||
print(f"[AI服务] 日期格式化后 {key} = '{normalized_data[key]}'")
|
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
||||||
# 后处理:从已有信息推断缺失字段
|
# 再次打印关键字段的值用于调试
|
||||||
normalized_data = self._post_process_inferred_fields(normalized_data, output_fields)
|
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth']:
|
||||||
# 打印后处理后的关键字段
|
if key in normalized_data:
|
||||||
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth', 'target_organization', 'target_position']:
|
print(f"[AI服务] 日期格式化后 {key} = '{normalized_data[key]}'")
|
||||||
if key in normalized_data:
|
# 后处理:从已有信息推断缺失字段
|
||||||
print(f"[AI服务] 后处理后 {key} = '{normalized_data[key]}'")
|
normalized_data = self._post_process_inferred_fields(normalized_data, output_fields)
|
||||||
# 即使提取的字段不完整,也返回结果(更宽容的处理)
|
# 打印后处理后的关键字段
|
||||||
if any(v for v in normalized_data.values() if v): # 至少有一个非空字段
|
for key in ['target_name', 'target_gender', 'target_age', 'target_date_of_birth', 'target_organization', 'target_position']:
|
||||||
print(f"[AI服务] 返回提取的数据(包含 {sum(1 for v in normalized_data.values() if v)} 个非空字段)")
|
if key in normalized_data:
|
||||||
# 记录成功的对话
|
print(f"[AI服务] 后处理后 {key} = '{normalized_data[key]}'")
|
||||||
|
# 即使提取的字段不完整,也返回结果(更宽容的处理)
|
||||||
|
if any(v for v in normalized_data.values() if v): # 至少有一个非空字段
|
||||||
|
print(f"[AI服务] 返回提取的数据(包含 {sum(1 for v in normalized_data.values() if v)} 个非空字段)")
|
||||||
|
# 记录成功的对话
|
||||||
|
if self.ai_logger:
|
||||||
|
self.ai_logger.log_conversation(
|
||||||
|
prompt=prompt,
|
||||||
|
api_request=api_request_info,
|
||||||
|
api_response=result,
|
||||||
|
extracted_data=normalized_data,
|
||||||
|
error=None,
|
||||||
|
session_id=session_id
|
||||||
|
)
|
||||||
|
return normalized_data
|
||||||
|
else:
|
||||||
|
print(f"[AI服务] 警告:提取的数据全部为空,但继续返回(允许部分字段为空)")
|
||||||
|
# 记录对话(即使数据为空)
|
||||||
|
if self.ai_logger:
|
||||||
|
self.ai_logger.log_conversation(
|
||||||
|
prompt=prompt,
|
||||||
|
api_request=api_request_info,
|
||||||
|
api_response=result,
|
||||||
|
extracted_data=normalized_data,
|
||||||
|
error="提取的数据全部为空",
|
||||||
|
session_id=session_id
|
||||||
|
)
|
||||||
|
return normalized_data
|
||||||
|
|
||||||
|
# 如果无法提取JSON,记录错误但尝试更宽容的处理
|
||||||
|
print(f"[AI服务] 警告:无法从内容中提取完整JSON,尝试备用解析方法")
|
||||||
|
print(f"[AI服务] 清理后的内容(前500字符): {content[:500]}")
|
||||||
|
|
||||||
|
# 尝试从文本中提取
|
||||||
|
parsed_data = self._parse_text_response(content, output_fields)
|
||||||
|
if parsed_data and any(v for v in parsed_data.values() if v): # 至少有一个非空字段
|
||||||
|
print(f"[AI服务] 使用备用方法解析成功,提取到 {len(parsed_data)} 个字段")
|
||||||
|
# 记录对话
|
||||||
if self.ai_logger:
|
if self.ai_logger:
|
||||||
self.ai_logger.log_conversation(
|
self.ai_logger.log_conversation(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
api_request=api_request_info,
|
api_request=api_request_info,
|
||||||
api_response=result,
|
api_response=result,
|
||||||
extracted_data=normalized_data,
|
extracted_data=parsed_data,
|
||||||
error=None,
|
error=None,
|
||||||
session_id=session_id
|
session_id=session_id
|
||||||
)
|
)
|
||||||
return normalized_data
|
return parsed_data
|
||||||
else:
|
|
||||||
print(f"[AI服务] 警告:提取的数据全部为空,但继续返回(允许部分字段为空)")
|
# 如果所有方法都失败,尝试最后一次修复尝试
|
||||||
# 记录对话(即使数据为空)
|
print(f"[AI服务] 所有解析方法都失败,尝试最后一次修复...")
|
||||||
if self.ai_logger:
|
# 尝试使用jsonrepair(如果可用)进行最后修复
|
||||||
self.ai_logger.log_conversation(
|
if JSONREPAIR_AVAILABLE:
|
||||||
prompt=prompt,
|
try:
|
||||||
api_request=api_request_info,
|
repaired_content = repair_json(content)
|
||||||
api_response=result,
|
if repaired_content:
|
||||||
extracted_data=normalized_data,
|
try:
|
||||||
error="提取的数据全部为空",
|
extracted_data = json.loads(repaired_content)
|
||||||
session_id=session_id
|
if extracted_data and isinstance(extracted_data, dict):
|
||||||
)
|
print(f"[AI服务] 使用jsonrepair最后修复成功,提取到 {len(extracted_data)} 个字段")
|
||||||
return normalized_data
|
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
||||||
|
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
||||||
# 如果无法提取JSON,记录错误但尝试更宽容的处理
|
normalized_data = self._post_process_inferred_fields(normalized_data, output_fields)
|
||||||
print(f"[AI服务] 警告:无法从内容中提取完整JSON,尝试备用解析方法")
|
# 记录对话
|
||||||
print(f"[AI服务] 清理后的内容(前500字符): {content[:500]}")
|
if self.ai_logger:
|
||||||
|
self.ai_logger.log_conversation(
|
||||||
# 尝试从文本中提取
|
prompt=prompt,
|
||||||
parsed_data = self._parse_text_response(content, output_fields)
|
api_request=api_request_info,
|
||||||
if parsed_data and any(v for v in parsed_data.values() if v): # 至少有一个非空字段
|
api_response=result,
|
||||||
print(f"[AI服务] 使用备用方法解析成功,提取到 {len(parsed_data)} 个字段")
|
extracted_data=normalized_data,
|
||||||
# 记录对话
|
error=None,
|
||||||
|
session_id=session_id
|
||||||
|
)
|
||||||
|
return normalized_data
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
pass
|
||||||
|
except Exception as e:
|
||||||
|
print(f"[AI服务] jsonrepair最后修复也失败: {e}")
|
||||||
|
|
||||||
|
# 如果所有方法都失败,返回空字典而不是抛出异常(更宽容)
|
||||||
|
# 这样至少不会导致整个调用失败,前端可以显示部分结果
|
||||||
|
error_msg = f"无法从API返回内容中提取JSON数据。原始内容长度: {len(raw_content)}, 清理后内容长度: {len(content)}"
|
||||||
|
print(f"[AI服务] 警告:{error_msg}")
|
||||||
|
print(f"[AI服务] 完整内容: {content}")
|
||||||
|
# 返回一个包含所有输出字段的空字典,而不是抛出异常
|
||||||
|
empty_result = {field['field_code']: '' for field in output_fields}
|
||||||
|
print(f"[AI服务] 返回空结果(包含 {len(empty_result)} 个字段,全部为空)")
|
||||||
|
# 记录失败的对话
|
||||||
if self.ai_logger:
|
if self.ai_logger:
|
||||||
self.ai_logger.log_conversation(
|
self.ai_logger.log_conversation(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
api_request=api_request_info,
|
api_request=api_request_info,
|
||||||
api_response=result,
|
api_response=result,
|
||||||
extracted_data=parsed_data,
|
extracted_data=empty_result,
|
||||||
error=None,
|
error=error_msg,
|
||||||
session_id=session_id
|
session_id=session_id
|
||||||
)
|
)
|
||||||
return parsed_data
|
return empty_result
|
||||||
|
else:
|
||||||
# 如果所有方法都失败,尝试最后一次修复尝试
|
error_msg = "API返回格式异常:未找到choices字段或choices为空"
|
||||||
print(f"[AI服务] 所有解析方法都失败,尝试最后一次修复...")
|
# 记录错误
|
||||||
# 尝试使用jsonrepair(如果可用)进行最后修复
|
if self.ai_logger:
|
||||||
if JSONREPAIR_AVAILABLE:
|
self.ai_logger.log_conversation(
|
||||||
try:
|
prompt=prompt,
|
||||||
repaired_content = repair_json(content)
|
api_request=api_request_info,
|
||||||
if repaired_content:
|
api_response=result,
|
||||||
try:
|
extracted_data=None,
|
||||||
extracted_data = json.loads(repaired_content)
|
error=error_msg,
|
||||||
if extracted_data and isinstance(extracted_data, dict):
|
session_id=session_id
|
||||||
print(f"[AI服务] 使用jsonrepair最后修复成功,提取到 {len(extracted_data)} 个字段")
|
)
|
||||||
normalized_data = self._normalize_field_names(extracted_data, output_fields)
|
raise Exception(error_msg)
|
||||||
normalized_data = self._normalize_date_formats(normalized_data, output_fields)
|
|
||||||
normalized_data = self._post_process_inferred_fields(normalized_data, output_fields)
|
except Exception as e:
|
||||||
# 记录对话
|
# 如果发生异常,记录错误日志
|
||||||
if self.ai_logger:
|
error_msg = str(e)
|
||||||
self.ai_logger.log_conversation(
|
|
||||||
prompt=prompt,
|
|
||||||
api_request=api_request_info,
|
|
||||||
api_response=result,
|
|
||||||
extracted_data=normalized_data,
|
|
||||||
error=None,
|
|
||||||
session_id=session_id
|
|
||||||
)
|
|
||||||
return normalized_data
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
pass
|
|
||||||
except Exception as e:
|
|
||||||
print(f"[AI服务] jsonrepair最后修复也失败: {e}")
|
|
||||||
|
|
||||||
# 如果所有方法都失败,返回空字典而不是抛出异常(更宽容)
|
|
||||||
# 这样至少不会导致整个调用失败,前端可以显示部分结果
|
|
||||||
error_msg = f"无法从API返回内容中提取JSON数据。原始内容长度: {len(raw_content)}, 清理后内容长度: {len(content)}"
|
|
||||||
print(f"[AI服务] 警告:{error_msg}")
|
|
||||||
print(f"[AI服务] 完整内容: {content}")
|
|
||||||
# 返回一个包含所有输出字段的空字典,而不是抛出异常
|
|
||||||
empty_result = {field['field_code']: '' for field in output_fields}
|
|
||||||
print(f"[AI服务] 返回空结果(包含 {len(empty_result)} 个字段,全部为空)")
|
|
||||||
# 记录失败的对话
|
|
||||||
if self.ai_logger:
|
|
||||||
self.ai_logger.log_conversation(
|
|
||||||
prompt=prompt,
|
|
||||||
api_request=api_request_info,
|
|
||||||
api_response=result,
|
|
||||||
extracted_data=empty_result,
|
|
||||||
error=error_msg,
|
|
||||||
session_id=session_id
|
|
||||||
)
|
|
||||||
return empty_result
|
|
||||||
else:
|
|
||||||
error_msg = "API返回格式异常:未找到choices字段或choices为空"
|
|
||||||
# 记录错误
|
|
||||||
if self.ai_logger:
|
if self.ai_logger:
|
||||||
self.ai_logger.log_conversation(
|
self.ai_logger.log_conversation(
|
||||||
prompt=prompt,
|
prompt=prompt,
|
||||||
@ -477,7 +491,8 @@ class AIService:
|
|||||||
error=error_msg,
|
error=error_msg,
|
||||||
session_id=session_id
|
session_id=session_id
|
||||||
)
|
)
|
||||||
raise Exception(error_msg)
|
# 重新抛出异常,让上层处理
|
||||||
|
raise
|
||||||
|
|
||||||
def _extract_json_from_text(self, text: str) -> Optional[Dict]:
|
def _extract_json_from_text(self, text: str) -> Optional[Dict]:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user