""" 更新数据库模板路径并测试文档生成接口 1. 更新数据库中的模板路径(MinIO路径改为本地路径) 2. 为本地有但数据库中没有的模板创建记录 3. 测试文档生成接口 4. 验证占位符识别 """ import os import pymysql from pathlib import Path from typing import Dict, List, Set from dotenv import load_dotenv import re from docx import Document import requests import json # 加载环境变量 load_dotenv() # 数据库配置 DB_CONFIG = { 'host': os.getenv('DB_HOST', '152.136.177.240'), 'port': int(os.getenv('DB_PORT', 5012)), 'user': os.getenv('DB_USER', 'finyx'), 'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), 'database': os.getenv('DB_NAME', 'finyx'), 'charset': 'utf8mb4' } # 实际使用的 tenant_id(从数据库中发现是1) TENANT_ID = 1 CREATED_BY = 655162080928945152 UPDATED_BY = 655162080928945152 # 项目根目录 PROJECT_ROOT = Path(__file__).parent TEMPLATES_DIR = PROJECT_ROOT / "template_finish" # API配置(如果需要测试接口) API_BASE_URL = os.getenv('API_BASE_URL', 'http://localhost:5000') def print_section(title): """打印章节标题""" print("\n" + "="*70) print(f" {title}") print("="*70) def print_result(success, message): """打印结果""" status = "[OK]" if success else "[FAIL]" print(f"{status} {message}") def generate_id(): """生成ID""" import time return int(time.time() * 1000000) def scan_local_templates(base_dir: Path) -> Dict[str, Path]: """扫描本地模板文件""" templates = {} if not base_dir.exists(): return templates for file_path in base_dir.rglob('*'): if file_path.is_file() and file_path.suffix.lower() in ['.doc', '.docx', '.wps']: relative_path = file_path.relative_to(PROJECT_ROOT) relative_path_str = str(relative_path).replace('\\', '/') templates[relative_path_str] = file_path return templates def get_db_templates(conn) -> Dict[str, Dict]: """从数据库获取所有模板配置""" cursor = conn.cursor(pymysql.cursors.DictCursor) try: sql = """ SELECT id, name, file_path, state, parent_id FROM f_polic_file_config WHERE tenant_id = %s """ cursor.execute(sql, (TENANT_ID,)) templates = cursor.fetchall() result = {} for template in templates: file_path = template['file_path'] if file_path: result[file_path] = { 'id': template['id'], 'name': template['name'], 'file_path': file_path, 'state': template['state'], 'parent_id': template['parent_id'] } return result finally: cursor.close() def extract_placeholders_from_docx(file_path: Path) -> Set[str]: """从docx文件中提取所有占位符""" placeholders = set() placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}') try: doc = Document(file_path) # 从段落中提取 for paragraph in doc.paragraphs: text = paragraph.text matches = placeholder_pattern.findall(text) for match in matches: field_code = match.strip() if field_code: placeholders.add(field_code) # 从表格中提取 for table in doc.tables: try: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: text = paragraph.text matches = placeholder_pattern.findall(text) for match in matches: field_code = match.strip() if field_code: placeholders.add(field_code) except: continue except Exception as e: print(f" [错误] 读取文件失败: {str(e)}") return placeholders def update_minio_paths_to_local(conn): """更新MinIO路径为本地路径""" print_section("更新MinIO路径为本地路径") cursor = conn.cursor(pymysql.cursors.DictCursor) try: # 查找所有MinIO路径 sql = """ SELECT id, name, file_path FROM f_polic_file_config WHERE tenant_id = %s AND file_path LIKE '/%%TEMPLATE/%%' """ cursor.execute(sql, (TENANT_ID,)) minio_templates = cursor.fetchall() if not minio_templates: print_result(True, "没有找到MinIO路径,无需更新") return 0 print(f" 找到 {len(minio_templates)} 条MinIO路径记录") # 扫描本地模板 local_templates = scan_local_templates(TEMPLATES_DIR) updated_count = 0 for template in minio_templates: old_path = template['file_path'] # 从MinIO路径中提取文件名 file_name = Path(old_path).name # 在本地模板中查找匹配的文件 matched_path = None for local_path, local_file in local_templates.items(): if local_file.name == file_name: matched_path = local_path break if matched_path: # 更新路径 update_cursor = conn.cursor() try: update_cursor.execute(""" UPDATE f_polic_file_config SET file_path = %s WHERE id = %s """, (matched_path, template['id'])) conn.commit() print(f" [更新] ID={template['id']}, 名称={template['name']}") print(f" 旧路径: {old_path}") print(f" 新路径: {matched_path}") updated_count += 1 except Exception as e: conn.rollback() print(f" [错误] 更新失败: {str(e)}") finally: update_cursor.close() else: print(f" [未找到] ID={template['id']}, 名称={template['name']}, 文件名={file_name}") return updated_count finally: cursor.close() def create_missing_templates(conn, local_templates: Dict[str, Path], db_templates: Dict[str, Dict]): """为本地有但数据库中没有的模板创建记录""" print_section("创建缺失的模板记录") missing_templates = [] for local_path in local_templates.keys(): if local_path not in db_templates: missing_templates.append(local_path) if not missing_templates: print_result(True, "所有本地模板都已存在于数据库中") return 0 print(f" 找到 {len(missing_templates)} 个缺失的模板") created_count = 0 cursor = conn.cursor() try: for local_path in missing_templates: file_path = local_templates[local_path] file_name = file_path.stem # 不含扩展名的文件名 # 生成模板ID template_id = generate_id() # 插入记录 try: cursor.execute(""" INSERT INTO f_polic_file_config (id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state) VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1) """, ( template_id, TENANT_ID, None, # parent_id,可以根据目录结构设置 file_name, '{}', # input_data local_path, CREATED_BY, UPDATED_BY )) conn.commit() print(f" [创建] ID={template_id}, 名称={file_name}, 路径={local_path}") created_count += 1 except Exception as e: conn.rollback() print(f" [错误] 创建失败: {str(e)}") finally: cursor.close() return created_count def test_document_generation(conn, api_url: str = None): """测试文档生成接口""" print_section("测试文档生成接口") # 获取一个有本地路径的模板 cursor = conn.cursor(pymysql.cursors.DictCursor) try: sql = """ SELECT id, name, file_path FROM f_polic_file_config WHERE tenant_id = %s AND file_path IS NOT NULL AND file_path != '' AND file_path LIKE 'template_finish/%%' AND state = 1 LIMIT 1 """ cursor.execute(sql, (TENANT_ID,)) template = cursor.fetchone() if not template: print_result(False, "没有找到可测试的模板") return print(f" 测试模板:") print(f" ID: {template['id']}") print(f" 名称: {template['name']}") print(f" 路径: {template['file_path']}") # 检查本地文件是否存在 local_file = PROJECT_ROOT / template['file_path'] if not local_file.exists(): print_result(False, f"本地文件不存在: {local_file}") return # 提取占位符 placeholders = extract_placeholders_from_docx(local_file) print(f" 占位符: {sorted(placeholders)}") if not placeholders: print_result(False, "模板中没有找到占位符,无法测试") return # 如果提供了API URL,测试接口 if api_url: print(f"\n 测试API接口: {api_url}/api/document/generate") # 构建测试数据 input_data = [] for placeholder in sorted(placeholders)[:10]: # 只测试前10个占位符 input_data.append({ "fieldCode": placeholder, "fieldValue": f"测试值_{placeholder}" }) payload = { "fileId": template['id'], "inputData": input_data } try: response = requests.post( f"{api_url}/api/document/generate", json=payload, timeout=30 ) if response.status_code == 200: result = response.json() print_result(True, "API调用成功") print(f" 响应: {json.dumps(result, ensure_ascii=False, indent=2)}") else: print_result(False, f"API调用失败: {response.status_code}") print(f" 响应: {response.text}") except requests.exceptions.RequestException as e: print_result(False, f"API请求异常: {str(e)}") print(" [提示] 请确保服务已启动,或手动测试接口") else: print("\n [提示] 未提供API URL,跳过接口测试") print(" 可以手动测试以下请求:") print(f" POST {API_BASE_URL}/api/document/generate") print(f" {{") print(f" \"fileId\": {template['id']},") print(f" \"inputData\": [") for placeholder in sorted(placeholders)[:5]: print(f" {{\"fieldCode\": \"{placeholder}\", \"fieldValue\": \"测试值\"}},") print(f" ...") print(f" ]") print(f" }}") finally: cursor.close() def scan_all_placeholders(local_templates: Dict[str, Path]): """扫描所有模板的占位符""" print_section("扫描所有模板的占位符") all_placeholders = set() template_placeholders = {} templates_with_placeholders = 0 templates_without_placeholders = 0 for rel_path, file_path in local_templates.items(): placeholders = extract_placeholders_from_docx(file_path) template_placeholders[rel_path] = placeholders all_placeholders.update(placeholders) if placeholders: templates_with_placeholders += 1 else: templates_without_placeholders += 1 print(f" 扫描了 {len(local_templates)} 个模板") print(f" - 有占位符的模板: {templates_with_placeholders} 个") print(f" - 无占位符的模板: {templates_without_placeholders} 个") print(f" - 发现的占位符总数: {len(all_placeholders)} 个") print(f"\n 所有占位符列表:") for placeholder in sorted(all_placeholders): print(f" - {placeholder}") return template_placeholders, all_placeholders def main(): """主函数""" print_section("模板路径更新和接口测试") # 1. 扫描本地模板 print_section("1. 扫描本地模板文件") local_templates = scan_local_templates(TEMPLATES_DIR) print_result(True, f"找到 {len(local_templates)} 个本地模板文件") if not local_templates: print_result(False, "未找到本地模板文件") return # 2. 连接数据库 print_section("2. 连接数据库") try: conn = pymysql.connect(**DB_CONFIG) print_result(True, "数据库连接成功") except Exception as e: print_result(False, f"数据库连接失败: {str(e)}") return try: # 3. 获取数据库中的模板 print_section("3. 获取数据库中的模板配置") db_templates = get_db_templates(conn) print_result(True, f"找到 {len(db_templates)} 条数据库模板记录(有file_path的)") # 4. 更新MinIO路径 updated_count = update_minio_paths_to_local(conn) if updated_count > 0: print_result(True, f"成功更新 {updated_count} 条MinIO路径") # 5. 创建缺失的模板记录 created_count = create_missing_templates(conn, local_templates, db_templates) if created_count > 0: print_result(True, f"成功创建 {created_count} 条模板记录") # 6. 扫描所有占位符 template_placeholders, all_placeholders = scan_all_placeholders(local_templates) # 7. 测试文档生成接口 test_document_generation(conn, API_BASE_URL if API_BASE_URL != 'http://localhost:5000' else None) finally: conn.close() print_result(True, "数据库连接已关闭") print_section("完成") if __name__ == "__main__": main()