""" 测试模板路径更新和占位符识别 1. 扫描本地模板文件 2. 检查数据库中的模板记录 3. 测试文档生成接口 4. 扫描模板中的占位符 """ import os import pymysql from pathlib import Path from typing import Dict, List, Set from dotenv import load_dotenv import re from docx import Document # 加载环境变量 load_dotenv() # 数据库配置 DB_CONFIG = { 'host': os.getenv('DB_HOST', '152.136.177.240'), 'port': int(os.getenv('DB_PORT', 5012)), 'user': os.getenv('DB_USER', 'finyx'), 'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), 'database': os.getenv('DB_NAME', 'finyx'), 'charset': 'utf8mb4' } TENANT_ID = 615873064429507639 # 项目根目录 PROJECT_ROOT = Path(__file__).parent TEMPLATES_DIR = PROJECT_ROOT / "template_finish" def print_section(title): """打印章节标题""" print("\n" + "="*70) print(f" {title}") print("="*70) def print_result(success, message): """打印结果""" status = "[OK]" if success else "[FAIL]" print(f"{status} {message}") def scan_local_templates(base_dir: Path) -> Dict[str, Path]: """ 扫描本地模板文件 Returns: 字典,key为相对路径,value为Path对象 """ templates = {} if not base_dir.exists(): print_result(False, f"模板目录不存在: {base_dir}") return templates # 遍历所有文件 for file_path in base_dir.rglob('*'): if file_path.is_file(): # 只处理文档文件 if file_path.suffix.lower() in ['.doc', '.docx', '.wps']: # 获取相对路径(相对于项目根目录) relative_path = file_path.relative_to(PROJECT_ROOT) # 使用正斜杠作为路径分隔符(跨平台兼容) relative_path_str = str(relative_path).replace('\\', '/') templates[relative_path_str] = file_path return templates def get_db_templates(conn) -> Dict[str, Dict]: """ 从数据库获取所有模板配置(包括已禁用和未禁用的) Returns: 字典,key为file_path,value为模板信息 """ cursor = conn.cursor(pymysql.cursors.DictCursor) try: sql = """ SELECT id, name, file_path, state FROM f_polic_file_config WHERE tenant_id = %s AND file_path IS NOT NULL AND file_path != '' """ cursor.execute(sql, (TENANT_ID,)) templates = cursor.fetchall() result = {} for template in templates: file_path = template['file_path'] if file_path: result[file_path] = { 'id': template['id'], 'name': template['name'], 'file_path': file_path, 'state': template['state'] } return result finally: cursor.close() def extract_placeholders_from_docx(file_path: Path) -> Set[str]: """ 从docx文件中提取所有占位符 Args: file_path: docx文件路径 Returns: 占位符集合,格式: {'field_code1', 'field_code2', ...} """ placeholders = set() placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}') try: doc = Document(file_path) # 从段落中提取占位符 for paragraph in doc.paragraphs: text = paragraph.text matches = placeholder_pattern.findall(text) for match in matches: field_code = match.strip() if field_code: placeholders.add(field_code) # 从表格中提取占位符 for table in doc.tables: try: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: text = paragraph.text matches = placeholder_pattern.findall(text) for match in matches: field_code = match.strip() if field_code: placeholders.add(field_code) except Exception as e: # 某些表格结构可能导致错误,跳过 continue except Exception as e: print(f" [错误] 读取文件失败: {str(e)}") return placeholders def analyze_templates(): """分析模板和数据库记录""" print_section("模板路径更新和占位符识别测试") # 1. 扫描本地模板 print_section("1. 扫描本地模板文件") local_templates = scan_local_templates(TEMPLATES_DIR) print_result(True, f"找到 {len(local_templates)} 个本地模板文件") if not local_templates: print_result(False, "未找到本地模板文件") return # 显示前10个模板文件 print("\n前10个模板文件示例:") for i, (rel_path, file_path) in enumerate(list(local_templates.items())[:10]): print(f" {i+1}. {rel_path}") # 2. 连接数据库 print_section("2. 连接数据库") try: conn = pymysql.connect(**DB_CONFIG) print_result(True, "数据库连接成功") except Exception as e: print_result(False, f"数据库连接失败: {str(e)}") return try: # 3. 获取数据库中的模板 print_section("3. 获取数据库中的模板配置") db_templates = get_db_templates(conn) print_result(True, f"找到 {len(db_templates)} 条数据库模板记录") # 统计状态 enabled_count = sum(1 for t in db_templates.values() if t['state'] == 1) disabled_count = len(db_templates) - enabled_count print(f" - 已启用: {enabled_count} 条") print(f" - 已禁用: {disabled_count} 条") # 4. 匹配分析 print_section("4. 模板路径匹配分析") matched_count = 0 unmatched_local = [] unmatched_db = [] # 检查数据库中的模板是否在本地存在 for db_path, db_info in db_templates.items(): if db_path in local_templates: matched_count += 1 else: unmatched_db.append((db_path, db_info)) # 检查本地模板是否在数据库中存在 for local_path in local_templates.keys(): if local_path not in db_templates: unmatched_local.append(local_path) print(f" - 匹配成功: {matched_count} 条") print(f" - 数据库中有但本地不存在: {len(unmatched_db)} 条") print(f" - 本地有但数据库中不存在: {len(unmatched_local)} 条") if unmatched_db: print("\n 数据库中有但本地不存在的模板:") for db_path, db_info in unmatched_db[:5]: print(f" - ID={db_info['id']}, 名称={db_info['name']}") print(f" 路径: {db_path}") if unmatched_local: print(f"\n 本地有但数据库中不存在的模板(显示前10个):") for local_path in unmatched_local[:10]: print(f" - {local_path}") # 5. 扫描占位符 print_section("5. 扫描模板中的占位符") # 选择几个模板进行占位符扫描 sample_templates = list(local_templates.items())[:10] all_placeholders = set() template_placeholders = {} for rel_path, file_path in sample_templates: placeholders = extract_placeholders_from_docx(file_path) template_placeholders[rel_path] = placeholders all_placeholders.update(placeholders) print(f"\n 模板: {Path(rel_path).name}") print(f" 路径: {rel_path}") print(f" 占位符数量: {len(placeholders)}") if placeholders: print(f" 占位符列表: {sorted(placeholders)}") else: print(f" [警告] 未找到占位符") print(f"\n 扫描的 {len(sample_templates)} 个模板中共发现 {len(all_placeholders)} 个不同的占位符") print(f" 所有占位符: {sorted(all_placeholders)}") # 6. 生成测试建议 print_section("6. 测试建议") if matched_count > 0: # 选择一个已匹配的模板进行测试 test_template = None for db_path, db_info in db_templates.items(): if db_path in local_templates and db_info['state'] == 1: test_template = { 'id': db_info['id'], 'name': db_info['name'], 'file_path': db_path, 'local_path': local_templates[db_path] } break if test_template: print(f"\n 推荐测试模板:") print(f" - ID: {test_template['id']}") print(f" - 名称: {test_template['name']}") print(f" - 路径: {test_template['file_path']}") # 获取该模板的占位符 if test_template['file_path'] in template_placeholders: test_placeholders = template_placeholders[test_template['file_path']] else: test_placeholders = extract_placeholders_from_docx(test_template['local_path']) if test_placeholders: print(f" - 占位符: {sorted(test_placeholders)}") print(f"\n 测试API调用示例:") print(f" POST /api/document/generate") print(f" {{") print(f" \"fileId\": {test_template['id']},") print(f" \"inputData\": [") for placeholder in sorted(test_placeholders)[:5]: print(f" {{\"fieldCode\": \"{placeholder}\", \"fieldValue\": \"测试值\"}},") print(f" ...") print(f" ]") print(f" }}") else: print("\n [警告] 没有找到已匹配的模板,无法进行API测试") print(" 建议:") print(" 1. 运行 update_template_paths_to_local.py 更新数据库路径") print(" 2. 或者手动在数据库中插入模板记录") finally: conn.close() print_result(True, "数据库连接已关闭") if __name__ == "__main__": analyze_templates()