318 lines
12 KiB
Python
318 lines
12 KiB
Python
"""
|
||
直接测试模板读取和占位符识别功能(不依赖API服务)
|
||
1. 测试所有模板文件是否能正确读取
|
||
2. 验证占位符识别功能
|
||
3. 测试占位符替换功能(使用DocumentService)
|
||
"""
|
||
import os
|
||
import sys
|
||
from pathlib import Path
|
||
from typing import Dict, List, Set
|
||
from dotenv import load_dotenv
|
||
import re
|
||
from docx import Document
|
||
|
||
# 添加项目根目录到路径
|
||
PROJECT_ROOT = Path(__file__).parent
|
||
sys.path.insert(0, str(PROJECT_ROOT))
|
||
|
||
from services.document_service import DocumentService
|
||
|
||
# 加载环境变量
|
||
load_dotenv()
|
||
|
||
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
|
||
|
||
|
||
def print_section(title):
|
||
"""打印章节标题"""
|
||
print("\n" + "="*70)
|
||
print(f" {title}")
|
||
print("="*70)
|
||
|
||
|
||
def print_result(success, message):
|
||
"""打印结果"""
|
||
status = "[OK]" if success else "[FAIL]"
|
||
print(f"{status} {message}")
|
||
|
||
|
||
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
|
||
"""扫描本地模板文件"""
|
||
templates = {}
|
||
if not base_dir.exists():
|
||
return templates
|
||
|
||
for file_path in base_dir.rglob('*'):
|
||
if file_path.is_file() and file_path.suffix.lower() in ['.docx']:
|
||
relative_path = file_path.relative_to(PROJECT_ROOT)
|
||
relative_path_str = str(relative_path).replace('\\', '/')
|
||
templates[relative_path_str] = file_path
|
||
|
||
return templates
|
||
|
||
|
||
def extract_placeholders_from_docx(file_path: Path) -> tuple[Set[str], bool, str]:
|
||
"""从docx文件中提取所有占位符"""
|
||
placeholders = set()
|
||
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
|
||
error_msg = ""
|
||
|
||
try:
|
||
doc = Document(file_path)
|
||
|
||
# 从段落中提取
|
||
for paragraph in doc.paragraphs:
|
||
text = paragraph.text
|
||
matches = placeholder_pattern.findall(text)
|
||
for match in matches:
|
||
field_code = match.strip()
|
||
if field_code:
|
||
placeholders.add(field_code)
|
||
|
||
# 从表格中提取
|
||
for table in doc.tables:
|
||
try:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for paragraph in cell.paragraphs:
|
||
text = paragraph.text
|
||
matches = placeholder_pattern.findall(text)
|
||
for match in matches:
|
||
field_code = match.strip()
|
||
if field_code:
|
||
placeholders.add(field_code)
|
||
except Exception as e:
|
||
# 某些表格结构可能导致错误,跳过
|
||
continue
|
||
|
||
return placeholders, True, ""
|
||
except Exception as e:
|
||
error_msg = str(e)
|
||
return placeholders, False, error_msg
|
||
|
||
|
||
def test_template_reading_and_placeholders():
|
||
"""测试模板读取和占位符识别"""
|
||
print_section("测试模板读取和占位符识别")
|
||
|
||
# 1. 扫描模板
|
||
print_section("1. 扫描本地模板文件")
|
||
local_templates = scan_local_templates(TEMPLATES_DIR)
|
||
print_result(True, f"找到 {len(local_templates)} 个.docx模板文件")
|
||
|
||
if not local_templates:
|
||
print_result(False, "未找到模板文件")
|
||
return
|
||
|
||
# 2. 测试每个模板
|
||
print_section("2. 测试模板读取和占位符识别")
|
||
|
||
all_placeholders = set()
|
||
read_success_count = 0
|
||
read_failed_count = 0
|
||
with_placeholders_count = 0
|
||
without_placeholders_count = 0
|
||
template_results = []
|
||
|
||
for i, (rel_path, file_path) in enumerate(local_templates.items(), 1):
|
||
placeholders, success, error = extract_placeholders_from_docx(file_path)
|
||
|
||
if success:
|
||
read_success_count += 1
|
||
all_placeholders.update(placeholders)
|
||
if placeholders:
|
||
with_placeholders_count += 1
|
||
else:
|
||
without_placeholders_count += 1
|
||
else:
|
||
read_failed_count += 1
|
||
|
||
template_results.append({
|
||
'path': rel_path,
|
||
'name': file_path.name,
|
||
'read_success': success,
|
||
'placeholders': placeholders,
|
||
'error': error
|
||
})
|
||
|
||
# 每10个模板显示一次进度
|
||
if i % 10 == 0:
|
||
print(f" 已处理: {i}/{len(local_templates)}")
|
||
|
||
# 3. 统计结果
|
||
print_section("3. 测试结果统计")
|
||
print(f" 总模板数: {len(local_templates)}")
|
||
print(f" 读取成功: {read_success_count}")
|
||
print(f" 读取失败: {read_failed_count}")
|
||
print(f" 有占位符: {with_placeholders_count}")
|
||
print(f" 无占位符: {without_placeholders_count}")
|
||
print(f" 发现的占位符总数: {len(all_placeholders)} 个不同的占位符")
|
||
|
||
if read_failed_count > 0:
|
||
print(f"\n 读取失败的模板:")
|
||
for result in template_results:
|
||
if not result['read_success']:
|
||
print(f" - {result['name']}: {result['error']}")
|
||
|
||
# 4. 显示所有占位符
|
||
print_section("4. 所有占位符列表")
|
||
if all_placeholders:
|
||
for placeholder in sorted(all_placeholders):
|
||
print(f" - {placeholder}")
|
||
else:
|
||
print(" 未发现占位符")
|
||
|
||
# 5. 测试DocumentService的模板读取功能
|
||
print_section("5. 测试DocumentService模板读取功能")
|
||
|
||
try:
|
||
document_service = DocumentService()
|
||
print_result(True, "DocumentService初始化成功")
|
||
|
||
# 选择几个有占位符的模板进行测试
|
||
test_templates = [r for r in template_results if r['read_success'] and r['placeholders']][:3]
|
||
|
||
if test_templates:
|
||
print(f"\n 测试 {len(test_templates)} 个模板的读取功能:")
|
||
|
||
for template_result in test_templates:
|
||
rel_path = template_result['path']
|
||
placeholders = template_result['placeholders']
|
||
|
||
print(f"\n 模板: {template_result['name']}")
|
||
print(f" 路径: {rel_path}")
|
||
print(f" 占位符: {sorted(placeholders)}")
|
||
|
||
try:
|
||
# 测试download_template_from_minio方法(现在从本地读取)
|
||
temp_path = document_service.download_template_from_minio(rel_path)
|
||
|
||
if temp_path and Path(temp_path).exists():
|
||
print_result(True, f"模板读取成功: {temp_path}")
|
||
|
||
# 验证文件内容
|
||
try:
|
||
doc = Document(temp_path)
|
||
print(f" 文档段落数: {len(doc.paragraphs)}")
|
||
print(f" 文档表格数: {len(doc.tables)}")
|
||
|
||
# 清理临时文件
|
||
try:
|
||
Path(temp_path).unlink()
|
||
except:
|
||
pass
|
||
except Exception as e:
|
||
print_result(False, f"验证文档内容失败: {str(e)}")
|
||
else:
|
||
print_result(False, "模板读取失败:文件不存在")
|
||
except Exception as e:
|
||
print_result(False, f"模板读取失败: {str(e)}")
|
||
else:
|
||
print_result(False, "没有找到有占位符的模板进行测试")
|
||
|
||
except Exception as e:
|
||
print_result(False, f"DocumentService初始化失败: {str(e)}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
# 6. 测试占位符替换功能
|
||
print_section("6. 测试占位符替换功能")
|
||
|
||
try:
|
||
document_service = DocumentService()
|
||
|
||
# 选择一个有占位符的模板
|
||
test_template = None
|
||
for template_result in template_results:
|
||
if template_result['read_success'] and template_result['placeholders']:
|
||
test_template = template_result
|
||
break
|
||
|
||
if test_template:
|
||
rel_path = test_template['path']
|
||
placeholders = test_template['placeholders']
|
||
|
||
print(f" 测试模板: {test_template['name']}")
|
||
print(f" 路径: {rel_path}")
|
||
print(f" 占位符: {sorted(placeholders)}")
|
||
|
||
# 准备测试数据
|
||
field_data = {}
|
||
test_values = {
|
||
'target_name': '测试姓名',
|
||
'target_organization': '测试单位',
|
||
'target_position': '测试职务',
|
||
'target_organization_and_position': '测试单位-测试职务',
|
||
'investigation_team_code': 'DC2025001',
|
||
'appointment_time': '2025-12-16 14:00',
|
||
'appointment_location': '会议室A',
|
||
}
|
||
|
||
for placeholder in placeholders:
|
||
field_data[placeholder] = test_values.get(placeholder, f'测试值_{placeholder}')
|
||
|
||
print(f" 测试数据: {field_data}")
|
||
|
||
try:
|
||
# 读取模板
|
||
template_path = document_service.download_template_from_minio(rel_path)
|
||
|
||
if template_path and Path(template_path).exists():
|
||
# 填充模板
|
||
filled_doc_path = document_service.fill_template(template_path, field_data)
|
||
|
||
if filled_doc_path and Path(filled_doc_path).exists():
|
||
print_result(True, f"文档生成成功: {filled_doc_path}")
|
||
|
||
# 验证生成的文档
|
||
try:
|
||
filled_doc = Document(filled_doc_path)
|
||
|
||
# 检查是否还有未替换的占位符
|
||
remaining_placeholders = set()
|
||
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
|
||
|
||
for paragraph in filled_doc.paragraphs:
|
||
text = paragraph.text
|
||
matches = placeholder_pattern.findall(text)
|
||
for match in matches:
|
||
field_code = match.strip()
|
||
if field_code:
|
||
remaining_placeholders.add(field_code)
|
||
|
||
if remaining_placeholders:
|
||
print_result(False, f"仍有未替换的占位符: {sorted(remaining_placeholders)}")
|
||
else:
|
||
print_result(True, "所有占位符已成功替换")
|
||
|
||
# 清理临时文件
|
||
try:
|
||
Path(template_path).unlink()
|
||
Path(filled_doc_path).unlink()
|
||
except:
|
||
pass
|
||
except Exception as e:
|
||
print_result(False, f"验证生成的文档失败: {str(e)}")
|
||
else:
|
||
print_result(False, "文档生成失败:文件不存在")
|
||
else:
|
||
print_result(False, "模板读取失败:文件不存在")
|
||
except Exception as e:
|
||
print_result(False, f"占位符替换测试失败: {str(e)}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
else:
|
||
print_result(False, "没有找到有占位符的模板进行测试")
|
||
|
||
except Exception as e:
|
||
print_result(False, f"占位符替换测试初始化失败: {str(e)}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
|
||
print_section("测试完成")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
test_template_reading_and_placeholders()
|