""" 字段服务 - 从数据库获取字段配置 """ import pymysql import os import json from typing import List, Dict, Optional from pathlib import Path class FieldService: """字段服务类""" def __init__(self): # 从环境变量读取数据库配置,不设置默认值,确保必须通过.env文件配置 db_host = os.getenv('DB_HOST') db_port = os.getenv('DB_PORT') db_user = os.getenv('DB_USER') db_password = os.getenv('DB_PASSWORD') db_name = os.getenv('DB_NAME') if not all([db_host, db_port, db_user, db_password, db_name]): raise ValueError( "数据库配置不完整,请在.env文件中配置以下环境变量:\n" "DB_HOST, DB_PORT, DB_USER, DB_PASSWORD, DB_NAME" ) self.db_config = { 'host': db_host, 'port': int(db_port), 'user': db_user, 'password': db_password, 'database': db_name, 'charset': 'utf8mb4' } # 加载提示词配置文件 self.prompt_config = self._load_prompt_config() # 加载字段默认值配置 self.field_defaults = self._load_field_defaults() def _load_prompt_config(self) -> Dict: """ 加载提示词配置文件 Returns: 配置字典 """ # 获取项目根目录 current_dir = Path(__file__).parent project_root = current_dir.parent config_path = project_root / 'config' / 'prompt_config.json' try: with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) return config except FileNotFoundError: # 如果配置文件不存在,使用默认配置 print(f"警告: 配置文件 {config_path} 不存在,使用默认配置") return self._get_default_config() except json.JSONDecodeError as e: print(f"错误: 配置文件 {config_path} JSON格式错误: {e}") return self._get_default_config() def _get_default_config(self) -> Dict: """获取默认配置(作为后备方案)""" return { "prompt_template": { "intro": "请从以下输入文本中提取结构化信息。", "input_text_label": "输入文本:", "output_fields_label": "需要提取的字段:", "json_format_label": "请严格按照以下JSON格式返回结果,只返回JSON,不要包含其他文字说明:", "requirements_label": "要求:", "requirements": [ "仔细分析输入文本,准确提取每个字段的值", "如果某个字段在输入文本中找不到对应信息,该字段值设为空字符串\"\"", "日期格式统一为YYYYMM(如:198005表示1980年5月)", "性别统一为\"男\"或\"女\"", "政治面貌使用标准表述(如:中共党员、群众等)", "只返回JSON对象,不要包含markdown代码块标记" ] }, "field_formatting": { "input_field_format": "{field_code}: {field_value}", "output_field_format": "- {field_name} (字段编码: {field_code})" }, "business_type_rules": { "INVESTIGATION": { "description": "调查核实业务类型的特殊规则", "additional_requirements": [] } } } def _load_field_defaults(self) -> Dict: """ 加载字段默认值配置文件 Returns: 字段默认值字典 """ current_dir = Path(__file__).parent project_root = current_dir.parent config_path = project_root / 'config' / 'field_defaults.json' try: with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) return config.get('field_defaults', {}) except FileNotFoundError: print(f"警告: 默认值配置文件 {config_path} 不存在,使用空默认值") return {} except json.JSONDecodeError as e: print(f"错误: 默认值配置文件 {config_path} JSON格式错误: {e}") return {} def get_field_default_value(self, field_code: str) -> Optional[str]: """ 获取字段的默认值 Args: field_code: 字段编码 Returns: 默认值字符串,如果不存在则返回None """ return self.field_defaults.get(field_code) def get_connection(self): """获取数据库连接""" return pymysql.connect(**self.db_config) def get_output_fields_by_field_codes(self, field_codes: List[str]) -> List[Dict]: """ 根据字段编码列表获取输出字段列表 Args: field_codes: 字段编码列表,如 ['userName', 'userAge'] Returns: 字段列表,每个字段包含: id, name, field_code, field_type """ if not field_codes: return [] conn = self.get_connection() cursor = conn.cursor(pymysql.cursors.DictCursor) try: # 根据字段编码查询字段信息(不限制tenant_id) placeholders = ','.join(['%s'] * len(field_codes)) sql = f""" SELECT f.id, f.name, f.filed_code as field_code, f.field_type FROM f_polic_field f WHERE f.filed_code IN ({placeholders}) AND f.field_type = 2 ORDER BY f.id """ cursor.execute(sql, field_codes) fields = cursor.fetchall() # 转换为字典列表 result = [] for field in fields: result.append({ 'id': field['id'], 'name': field['name'], 'field_code': field['field_code'], 'field_type': field['field_type'] }) return result finally: cursor.close() conn.close() def get_input_field_by_field_code(self, field_code: str) -> Optional[Dict]: """ 根据字段编码获取输入字段信息 Args: field_code: 字段编码 Returns: 字段信息字典,如果不存在返回None """ conn = self.get_connection() cursor = conn.cursor(pymysql.cursors.DictCursor) try: sql = """ SELECT f.id, f.name, f.filed_code as field_code, f.field_type FROM f_polic_field f WHERE f.filed_code = %s AND f.field_type = 1 LIMIT 1 """ cursor.execute(sql, (field_code,)) field = cursor.fetchone() if field: return { 'id': field['id'], 'name': field['name'], 'field_code': field['field_code'], 'field_type': field['field_type'] } return None finally: cursor.close() conn.close() def get_fields_by_business_type(self, business_type: str) -> Dict: """ 获取业务类型的所有字段(包括输入和输出字段) 用于测试页面展示 Args: business_type: 业务类型,如 'INVESTIGATION' Returns: 包含input_fields和output_fields的字典 """ import json conn = self.get_connection() cursor = conn.cursor(pymysql.cursors.DictCursor) try: # 获取输入字段(field_type=1) sql_input = """ SELECT f.id, f.name, f.filed_code as field_code, f.field_type FROM f_polic_field f WHERE f.field_type = 1 AND (f.filed_code = 'clue_info' OR f.filed_code = 'target_basic_info_clue') ORDER BY f.id """ cursor.execute(sql_input) input_fields = cursor.fetchall() # 获取输出字段(field_type=2) # 根据business_type从input_data的JSON中查找匹配的文件配置 sql_output = """ SELECT f.id, f.name, f.filed_code as field_code, f.field_type FROM f_polic_field f INNER JOIN f_polic_file_field ff ON f.id = ff.filed_id INNER JOIN f_polic_file_config fc ON ff.file_id = fc.id WHERE f.field_type = 2 AND fc.state = 1 ORDER BY f.id """ cursor.execute(sql_output) all_output_fields = cursor.fetchall() # 根据business_type过滤输出字段 # 需要查询文件配置的input_data来匹配business_type sql_file_configs = """ SELECT id, name, input_data FROM f_polic_file_config WHERE state = 1 """ cursor.execute(sql_file_configs) file_configs = cursor.fetchall() # 找到匹配business_type的文件配置ID列表 matching_file_ids = [] for fc in file_configs: try: input_data = json.loads(fc['input_data']) if fc['input_data'] else {} if input_data.get('business_type') == business_type: matching_file_ids.append(fc['id']) except (json.JSONDecodeError, TypeError): continue # 过滤输出字段:只返回匹配的文件配置关联的字段 output_fields = [] if matching_file_ids: # 获取这些文件配置关联的字段 placeholders = ','.join(['%s'] * len(matching_file_ids)) sql_filtered = f""" SELECT DISTINCT f.id, f.name, f.filed_code as field_code, f.field_type FROM f_polic_field f INNER JOIN f_polic_file_field ff ON f.id = ff.filed_id WHERE f.field_type = 2 AND ff.file_id IN ({placeholders}) ORDER BY f.id """ cursor.execute(sql_filtered, matching_file_ids) output_fields = cursor.fetchall() return { 'input_fields': [ { 'id': f['id'], 'name': f['name'], 'field_code': f['field_code'], 'field_type': f['field_type'] } for f in input_fields ], 'output_fields': [ { 'id': f['id'], 'name': f['name'], 'field_code': f['field_code'], 'field_type': f['field_type'] } for f in output_fields ] } finally: cursor.close() conn.close() def build_extract_prompt(self, input_data: List[Dict], output_fields: List[Dict]) -> str: """ 构建AI提取提示词 Args: input_data: 输入数据列表,格式: [{'fieldCode': 'xxx', 'fieldValue': 'xxx'}] output_fields: 输出字段列表 Returns: 构建好的提示词 """ # 获取配置 template = self.prompt_config.get('prompt_template', {}) formatting = self.prompt_config.get('field_formatting', {}) # 构建输入文本 input_field_format = formatting.get('input_field_format', '{field_code}: {field_value}') input_text = "" for item in input_data: field_code = item.get('fieldCode', '') field_value = item.get('fieldValue', '') input_text += input_field_format.format( field_code=field_code, field_value=field_value ) + "\n" # 构建输出字段说明(包含字段特定规则) output_field_format = formatting.get('output_field_format', '- {field_name} (字段编码: {field_code})') field_specific_rules = self.prompt_config.get('field_specific_rules', {}) output_fields_desc = "" for field in output_fields: field_name = field['name'] field_code = field['field_code'] field_desc = output_field_format.format( field_name=field_name, field_code=field_code ) # 如果字段有特定规则,添加到说明中 if field_code in field_specific_rules: field_rule = field_specific_rules[field_code] field_desc += f"\n 说明:{field_rule.get('description', '')}" if 'rules' in field_rule and field_rule['rules']: field_desc += "\n 特殊要求:" for rule in field_rule['rules']: field_desc += f"\n - {rule}" output_fields_desc += field_desc + "\n" # 构建JSON格式示例 json_example = {} for field in output_fields: json_example[field['field_code']] = "" # 获取要求列表 requirements = template.get('requirements', []) # 构建要求文本 requirements_text = "" for i, req in enumerate(requirements, 1): requirements_text += f"{i}. {req}\n" # 构建完整提示词 prompt = f"""{template.get('intro', '请从以下输入文本中提取结构化信息。')} {template.get('input_text_label', '输入文本:')} {input_text.strip()} {template.get('output_fields_label', '需要提取的字段:')} {output_fields_desc.strip()} {template.get('json_format_label', '请严格按照以下JSON格式返回结果,只返回JSON,不要包含其他文字说明:')} {json.dumps(json_example, ensure_ascii=False, indent=2)} {template.get('requirements_label', '要求:')} {requirements_text.strip()} """ return prompt