""" 直接测试模板读取和占位符识别功能(不依赖API服务) 1. 测试所有模板文件是否能正确读取 2. 验证占位符识别功能 3. 测试占位符替换功能(使用DocumentService) """ import os import sys from pathlib import Path from typing import Dict, List, Set from dotenv import load_dotenv import re from docx import Document # 添加项目根目录到路径 PROJECT_ROOT = Path(__file__).parent sys.path.insert(0, str(PROJECT_ROOT)) from services.document_service import DocumentService # 加载环境变量 load_dotenv() TEMPLATES_DIR = PROJECT_ROOT / "template_finish" def print_section(title): """打印章节标题""" print("\n" + "="*70) print(f" {title}") print("="*70) def print_result(success, message): """打印结果""" status = "[OK]" if success else "[FAIL]" print(f"{status} {message}") def scan_local_templates(base_dir: Path) -> Dict[str, Path]: """扫描本地模板文件""" templates = {} if not base_dir.exists(): return templates for file_path in base_dir.rglob('*'): if file_path.is_file() and file_path.suffix.lower() in ['.docx']: relative_path = file_path.relative_to(PROJECT_ROOT) relative_path_str = str(relative_path).replace('\\', '/') templates[relative_path_str] = file_path return templates def extract_placeholders_from_docx(file_path: Path) -> tuple[Set[str], bool, str]: """从docx文件中提取所有占位符""" placeholders = set() placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}') error_msg = "" try: doc = Document(file_path) # 从段落中提取 for paragraph in doc.paragraphs: text = paragraph.text matches = placeholder_pattern.findall(text) for match in matches: field_code = match.strip() if field_code: placeholders.add(field_code) # 从表格中提取 for table in doc.tables: try: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: text = paragraph.text matches = placeholder_pattern.findall(text) for match in matches: field_code = match.strip() if field_code: placeholders.add(field_code) except Exception as e: # 某些表格结构可能导致错误,跳过 continue return placeholders, True, "" except Exception as e: error_msg = str(e) return placeholders, False, error_msg def test_template_reading_and_placeholders(): """测试模板读取和占位符识别""" print_section("测试模板读取和占位符识别") # 1. 扫描模板 print_section("1. 扫描本地模板文件") local_templates = scan_local_templates(TEMPLATES_DIR) print_result(True, f"找到 {len(local_templates)} 个.docx模板文件") if not local_templates: print_result(False, "未找到模板文件") return # 2. 测试每个模板 print_section("2. 测试模板读取和占位符识别") all_placeholders = set() read_success_count = 0 read_failed_count = 0 with_placeholders_count = 0 without_placeholders_count = 0 template_results = [] for i, (rel_path, file_path) in enumerate(local_templates.items(), 1): placeholders, success, error = extract_placeholders_from_docx(file_path) if success: read_success_count += 1 all_placeholders.update(placeholders) if placeholders: with_placeholders_count += 1 else: without_placeholders_count += 1 else: read_failed_count += 1 template_results.append({ 'path': rel_path, 'name': file_path.name, 'read_success': success, 'placeholders': placeholders, 'error': error }) # 每10个模板显示一次进度 if i % 10 == 0: print(f" 已处理: {i}/{len(local_templates)}") # 3. 统计结果 print_section("3. 测试结果统计") print(f" 总模板数: {len(local_templates)}") print(f" 读取成功: {read_success_count}") print(f" 读取失败: {read_failed_count}") print(f" 有占位符: {with_placeholders_count}") print(f" 无占位符: {without_placeholders_count}") print(f" 发现的占位符总数: {len(all_placeholders)} 个不同的占位符") if read_failed_count > 0: print(f"\n 读取失败的模板:") for result in template_results: if not result['read_success']: print(f" - {result['name']}: {result['error']}") # 4. 显示所有占位符 print_section("4. 所有占位符列表") if all_placeholders: for placeholder in sorted(all_placeholders): print(f" - {placeholder}") else: print(" 未发现占位符") # 5. 测试DocumentService的模板读取功能 print_section("5. 测试DocumentService模板读取功能") try: document_service = DocumentService() print_result(True, "DocumentService初始化成功") # 选择几个有占位符的模板进行测试 test_templates = [r for r in template_results if r['read_success'] and r['placeholders']][:3] if test_templates: print(f"\n 测试 {len(test_templates)} 个模板的读取功能:") for template_result in test_templates: rel_path = template_result['path'] placeholders = template_result['placeholders'] print(f"\n 模板: {template_result['name']}") print(f" 路径: {rel_path}") print(f" 占位符: {sorted(placeholders)}") try: # 测试download_template_from_minio方法(现在从本地读取) temp_path = document_service.download_template_from_minio(rel_path) if temp_path and Path(temp_path).exists(): print_result(True, f"模板读取成功: {temp_path}") # 验证文件内容 try: doc = Document(temp_path) print(f" 文档段落数: {len(doc.paragraphs)}") print(f" 文档表格数: {len(doc.tables)}") # 清理临时文件 try: Path(temp_path).unlink() except: pass except Exception as e: print_result(False, f"验证文档内容失败: {str(e)}") else: print_result(False, "模板读取失败:文件不存在") except Exception as e: print_result(False, f"模板读取失败: {str(e)}") else: print_result(False, "没有找到有占位符的模板进行测试") except Exception as e: print_result(False, f"DocumentService初始化失败: {str(e)}") import traceback traceback.print_exc() # 6. 测试占位符替换功能 print_section("6. 测试占位符替换功能") try: document_service = DocumentService() # 选择一个有占位符的模板 test_template = None for template_result in template_results: if template_result['read_success'] and template_result['placeholders']: test_template = template_result break if test_template: rel_path = test_template['path'] placeholders = test_template['placeholders'] print(f" 测试模板: {test_template['name']}") print(f" 路径: {rel_path}") print(f" 占位符: {sorted(placeholders)}") # 准备测试数据 field_data = {} test_values = { 'target_name': '测试姓名', 'target_organization': '测试单位', 'target_position': '测试职务', 'target_organization_and_position': '测试单位-测试职务', 'investigation_team_code': 'DC2025001', 'appointment_time': '2025-12-16 14:00', 'appointment_location': '会议室A', } for placeholder in placeholders: field_data[placeholder] = test_values.get(placeholder, f'测试值_{placeholder}') print(f" 测试数据: {field_data}") try: # 读取模板 template_path = document_service.download_template_from_minio(rel_path) if template_path and Path(template_path).exists(): # 填充模板 filled_doc_path = document_service.fill_template(template_path, field_data) if filled_doc_path and Path(filled_doc_path).exists(): print_result(True, f"文档生成成功: {filled_doc_path}") # 验证生成的文档 try: filled_doc = Document(filled_doc_path) # 检查是否还有未替换的占位符 remaining_placeholders = set() placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}') for paragraph in filled_doc.paragraphs: text = paragraph.text matches = placeholder_pattern.findall(text) for match in matches: field_code = match.strip() if field_code: remaining_placeholders.add(field_code) if remaining_placeholders: print_result(False, f"仍有未替换的占位符: {sorted(remaining_placeholders)}") else: print_result(True, "所有占位符已成功替换") # 清理临时文件 try: Path(template_path).unlink() Path(filled_doc_path).unlink() except: pass except Exception as e: print_result(False, f"验证生成的文档失败: {str(e)}") else: print_result(False, "文档生成失败:文件不存在") else: print_result(False, "模板读取失败:文件不存在") except Exception as e: print_result(False, f"占位符替换测试失败: {str(e)}") import traceback traceback.print_exc() else: print_result(False, "没有找到有占位符的模板进行测试") except Exception as e: print_result(False, f"占位符替换测试初始化失败: {str(e)}") import traceback traceback.print_exc() print_section("测试完成") if __name__ == "__main__": test_template_reading_and_placeholders()