264 lines
9.9 KiB
Python
264 lines
9.9 KiB
Python
"""
|
||
字段服务 - 从数据库获取字段配置
|
||
"""
|
||
import pymysql
|
||
import os
|
||
import json
|
||
from typing import List, Dict, Optional
|
||
from pathlib import Path
|
||
|
||
|
||
class FieldService:
|
||
"""字段服务类"""
|
||
|
||
def __init__(self):
|
||
self.db_config = {
|
||
'host': os.getenv('DB_HOST', '152.136.177.240'),
|
||
'port': int(os.getenv('DB_PORT', 5012)),
|
||
'user': os.getenv('DB_USER', 'finyx'),
|
||
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
|
||
'database': os.getenv('DB_NAME', 'finyx'),
|
||
'charset': 'utf8mb4'
|
||
}
|
||
self.tenant_id = 615873064429507639
|
||
|
||
# 加载提示词配置文件
|
||
self.prompt_config = self._load_prompt_config()
|
||
|
||
def _load_prompt_config(self) -> Dict:
|
||
"""
|
||
加载提示词配置文件
|
||
|
||
Returns:
|
||
配置字典
|
||
"""
|
||
# 获取项目根目录
|
||
current_dir = Path(__file__).parent
|
||
project_root = current_dir.parent
|
||
config_path = project_root / 'config' / 'prompt_config.json'
|
||
|
||
try:
|
||
with open(config_path, 'r', encoding='utf-8') as f:
|
||
config = json.load(f)
|
||
return config
|
||
except FileNotFoundError:
|
||
# 如果配置文件不存在,使用默认配置
|
||
print(f"警告: 配置文件 {config_path} 不存在,使用默认配置")
|
||
return self._get_default_config()
|
||
except json.JSONDecodeError as e:
|
||
print(f"错误: 配置文件 {config_path} JSON格式错误: {e}")
|
||
return self._get_default_config()
|
||
|
||
def _get_default_config(self) -> Dict:
|
||
"""获取默认配置(作为后备方案)"""
|
||
return {
|
||
"prompt_template": {
|
||
"intro": "请从以下输入文本中提取结构化信息。",
|
||
"input_text_label": "输入文本:",
|
||
"output_fields_label": "需要提取的字段:",
|
||
"json_format_label": "请严格按照以下JSON格式返回结果,只返回JSON,不要包含其他文字说明:",
|
||
"requirements_label": "要求:",
|
||
"requirements": [
|
||
"仔细分析输入文本,准确提取每个字段的值",
|
||
"如果某个字段在输入文本中找不到对应信息,该字段值设为空字符串\"\"",
|
||
"日期格式统一为YYYYMM(如:198005表示1980年5月)",
|
||
"性别统一为\"男\"或\"女\"",
|
||
"政治面貌使用标准表述(如:中共党员、群众等)",
|
||
"只返回JSON对象,不要包含markdown代码块标记"
|
||
]
|
||
},
|
||
"field_formatting": {
|
||
"input_field_format": "{field_code}: {field_value}",
|
||
"output_field_format": "- {field_name} (字段编码: {field_code})"
|
||
},
|
||
"business_type_rules": {
|
||
"INVESTIGATION": {
|
||
"description": "调查核实业务类型的特殊规则",
|
||
"additional_requirements": []
|
||
}
|
||
}
|
||
}
|
||
|
||
def get_connection(self):
|
||
"""获取数据库连接"""
|
||
return pymysql.connect(**self.db_config)
|
||
|
||
def get_output_fields_by_business_type(self, business_type: str) -> List[Dict]:
|
||
"""
|
||
根据业务类型获取输出字段列表
|
||
|
||
Args:
|
||
business_type: 业务类型,如 'INVESTIGATION'
|
||
|
||
Returns:
|
||
字段列表,每个字段包含: id, name, field_code, field_type
|
||
"""
|
||
conn = self.get_connection()
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
try:
|
||
# 查询"初步核实审批表"相关的输出字段(field_type=2)
|
||
# 目前只支持初步核实审批表,后续可以根据business_type扩展
|
||
sql = """
|
||
SELECT f.id, f.name, f.filed_code as field_code, f.field_type
|
||
FROM f_polic_field f
|
||
INNER JOIN f_polic_file_field ff ON f.id = ff.filed_id
|
||
INNER JOIN f_polic_file_config fc ON ff.file_id = fc.id
|
||
WHERE f.tenant_id = %s
|
||
AND f.field_type = 2
|
||
AND fc.name = '初步核实审批表'
|
||
ORDER BY f.id
|
||
"""
|
||
cursor.execute(sql, (self.tenant_id,))
|
||
fields = cursor.fetchall()
|
||
|
||
# 转换为字典列表
|
||
result = []
|
||
for field in fields:
|
||
result.append({
|
||
'id': field['id'],
|
||
'name': field['name'],
|
||
'field_code': field['field_code'],
|
||
'field_type': field['field_type']
|
||
})
|
||
|
||
return result
|
||
|
||
finally:
|
||
cursor.close()
|
||
conn.close()
|
||
|
||
def get_fields_by_business_type(self, business_type: str) -> Dict:
|
||
"""
|
||
获取业务类型的所有字段(包括输入和输出字段)
|
||
用于测试页面展示
|
||
"""
|
||
conn = self.get_connection()
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
try:
|
||
# 获取输入字段(field_type=1)
|
||
sql_input = """
|
||
SELECT f.id, f.name, f.filed_code as field_code, f.field_type
|
||
FROM f_polic_field f
|
||
WHERE f.tenant_id = %s
|
||
AND f.field_type = 1
|
||
AND (f.filed_code = 'clue_info' OR f.filed_code = 'target_basic_info_clue')
|
||
ORDER BY f.id
|
||
"""
|
||
cursor.execute(sql_input, (self.tenant_id,))
|
||
input_fields = cursor.fetchall()
|
||
|
||
# 获取输出字段(field_type=2)
|
||
sql_output = """
|
||
SELECT f.id, f.name, f.filed_code as field_code, f.field_type
|
||
FROM f_polic_field f
|
||
INNER JOIN f_polic_file_field ff ON f.id = ff.filed_id
|
||
INNER JOIN f_polic_file_config fc ON ff.file_id = fc.id
|
||
WHERE f.tenant_id = %s
|
||
AND f.field_type = 2
|
||
AND fc.name = '初步核实审批表'
|
||
ORDER BY f.id
|
||
"""
|
||
cursor.execute(sql_output, (self.tenant_id,))
|
||
output_fields = cursor.fetchall()
|
||
|
||
return {
|
||
'input_fields': [
|
||
{
|
||
'id': f['id'],
|
||
'name': f['name'],
|
||
'field_code': f['field_code'],
|
||
'field_type': f['field_type']
|
||
}
|
||
for f in input_fields
|
||
],
|
||
'output_fields': [
|
||
{
|
||
'id': f['id'],
|
||
'name': f['name'],
|
||
'field_code': f['field_code'],
|
||
'field_type': f['field_type']
|
||
}
|
||
for f in output_fields
|
||
]
|
||
}
|
||
|
||
finally:
|
||
cursor.close()
|
||
conn.close()
|
||
|
||
def build_extract_prompt(self, input_data: List[Dict], output_fields: List[Dict], business_type: str = 'INVESTIGATION') -> str:
|
||
"""
|
||
构建AI提取提示词
|
||
|
||
Args:
|
||
input_data: 输入数据列表,格式: [{'fieldCode': 'xxx', 'fieldValue': 'xxx'}]
|
||
output_fields: 输出字段列表
|
||
business_type: 业务类型,用于获取特定规则
|
||
|
||
Returns:
|
||
构建好的提示词
|
||
"""
|
||
# 获取配置
|
||
template = self.prompt_config.get('prompt_template', {})
|
||
formatting = self.prompt_config.get('field_formatting', {})
|
||
business_rules = self.prompt_config.get('business_type_rules', {}).get(business_type, {})
|
||
|
||
# 构建输入文本
|
||
input_field_format = formatting.get('input_field_format', '{field_code}: {field_value}')
|
||
input_text = ""
|
||
for item in input_data:
|
||
field_code = item.get('fieldCode', '')
|
||
field_value = item.get('fieldValue', '')
|
||
input_text += input_field_format.format(
|
||
field_code=field_code,
|
||
field_value=field_value
|
||
) + "\n"
|
||
|
||
# 构建输出字段说明
|
||
output_field_format = formatting.get('output_field_format', '- {field_name} (字段编码: {field_code})')
|
||
output_fields_desc = ""
|
||
for field in output_fields:
|
||
field_name = field['name']
|
||
field_code = field['field_code']
|
||
output_fields_desc += output_field_format.format(
|
||
field_name=field_name,
|
||
field_code=field_code
|
||
) + "\n"
|
||
|
||
# 构建JSON格式示例
|
||
json_example = {}
|
||
for field in output_fields:
|
||
json_example[field['field_code']] = ""
|
||
|
||
# 获取要求列表
|
||
requirements = template.get('requirements', [])
|
||
# 添加业务类型特定的要求
|
||
additional_requirements = business_rules.get('additional_requirements', [])
|
||
all_requirements = requirements + additional_requirements
|
||
|
||
# 构建要求文本
|
||
requirements_text = ""
|
||
for i, req in enumerate(all_requirements, 1):
|
||
requirements_text += f"{i}. {req}\n"
|
||
|
||
# 构建完整提示词
|
||
prompt = f"""{template.get('intro', '请从以下输入文本中提取结构化信息。')}
|
||
|
||
{template.get('input_text_label', '输入文本:')}
|
||
{input_text.strip()}
|
||
|
||
{template.get('output_fields_label', '需要提取的字段:')}
|
||
{output_fields_desc.strip()}
|
||
|
||
{template.get('json_format_label', '请严格按照以下JSON格式返回结果,只返回JSON,不要包含其他文字说明:')}
|
||
{json.dumps(json_example, ensure_ascii=False, indent=2)}
|
||
|
||
{template.get('requirements_label', '要求:')}
|
||
{requirements_text.strip()}
|
||
"""
|
||
|
||
return prompt
|
||
|