ai-business-write/test_template_and_placeholders.py
2025-12-26 09:16:31 +08:00

310 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
测试模板路径更新和占位符识别
1. 扫描本地模板文件
2. 检查数据库中的模板记录
3. 测试文档生成接口
4. 扫描模板中的占位符
"""
import os
import pymysql
from pathlib import Path
from typing import Dict, List, Set
from dotenv import load_dotenv
import re
from docx import Document
# 加载环境变量
load_dotenv()
# 数据库配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
TENANT_ID = 615873064429507639
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
"""
扫描本地模板文件
Returns:
字典key为相对路径value为Path对象
"""
templates = {}
if not base_dir.exists():
print_result(False, f"模板目录不存在: {base_dir}")
return templates
# 遍历所有文件
for file_path in base_dir.rglob('*'):
if file_path.is_file():
# 只处理文档文件
if file_path.suffix.lower() in ['.doc', '.docx', '.wps']:
# 获取相对路径(相对于项目根目录)
relative_path = file_path.relative_to(PROJECT_ROOT)
# 使用正斜杠作为路径分隔符(跨平台兼容)
relative_path_str = str(relative_path).replace('\\', '/')
templates[relative_path_str] = file_path
return templates
def get_db_templates(conn) -> Dict[str, Dict]:
"""
从数据库获取所有模板配置(包括已禁用和未禁用的)
Returns:
字典key为file_pathvalue为模板信息
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path, state
FROM f_polic_file_config
WHERE tenant_id = %s
AND file_path IS NOT NULL
AND file_path != ''
"""
cursor.execute(sql, (TENANT_ID,))
templates = cursor.fetchall()
result = {}
for template in templates:
file_path = template['file_path']
if file_path:
result[file_path] = {
'id': template['id'],
'name': template['name'],
'file_path': file_path,
'state': template['state']
}
return result
finally:
cursor.close()
def extract_placeholders_from_docx(file_path: Path) -> Set[str]:
"""
从docx文件中提取所有占位符
Args:
file_path: docx文件路径
Returns:
占位符集合,格式: {'field_code1', 'field_code2', ...}
"""
placeholders = set()
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
try:
doc = Document(file_path)
# 从段落中提取占位符
for paragraph in doc.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
# 从表格中提取占位符
for table in doc.tables:
try:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
except Exception as e:
# 某些表格结构可能导致错误,跳过
continue
except Exception as e:
print(f" [错误] 读取文件失败: {str(e)}")
return placeholders
def analyze_templates():
"""分析模板和数据库记录"""
print_section("模板路径更新和占位符识别测试")
# 1. 扫描本地模板
print_section("1. 扫描本地模板文件")
local_templates = scan_local_templates(TEMPLATES_DIR)
print_result(True, f"找到 {len(local_templates)} 个本地模板文件")
if not local_templates:
print_result(False, "未找到本地模板文件")
return
# 显示前10个模板文件
print("\n前10个模板文件示例")
for i, (rel_path, file_path) in enumerate(list(local_templates.items())[:10]):
print(f" {i+1}. {rel_path}")
# 2. 连接数据库
print_section("2. 连接数据库")
try:
conn = pymysql.connect(**DB_CONFIG)
print_result(True, "数据库连接成功")
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return
try:
# 3. 获取数据库中的模板
print_section("3. 获取数据库中的模板配置")
db_templates = get_db_templates(conn)
print_result(True, f"找到 {len(db_templates)} 条数据库模板记录")
# 统计状态
enabled_count = sum(1 for t in db_templates.values() if t['state'] == 1)
disabled_count = len(db_templates) - enabled_count
print(f" - 已启用: {enabled_count}")
print(f" - 已禁用: {disabled_count}")
# 4. 匹配分析
print_section("4. 模板路径匹配分析")
matched_count = 0
unmatched_local = []
unmatched_db = []
# 检查数据库中的模板是否在本地存在
for db_path, db_info in db_templates.items():
if db_path in local_templates:
matched_count += 1
else:
unmatched_db.append((db_path, db_info))
# 检查本地模板是否在数据库中存在
for local_path in local_templates.keys():
if local_path not in db_templates:
unmatched_local.append(local_path)
print(f" - 匹配成功: {matched_count}")
print(f" - 数据库中有但本地不存在: {len(unmatched_db)}")
print(f" - 本地有但数据库中不存在: {len(unmatched_local)}")
if unmatched_db:
print("\n 数据库中有但本地不存在的模板:")
for db_path, db_info in unmatched_db[:5]:
print(f" - ID={db_info['id']}, 名称={db_info['name']}")
print(f" 路径: {db_path}")
if unmatched_local:
print(f"\n 本地有但数据库中不存在的模板显示前10个")
for local_path in unmatched_local[:10]:
print(f" - {local_path}")
# 5. 扫描占位符
print_section("5. 扫描模板中的占位符")
# 选择几个模板进行占位符扫描
sample_templates = list(local_templates.items())[:10]
all_placeholders = set()
template_placeholders = {}
for rel_path, file_path in sample_templates:
placeholders = extract_placeholders_from_docx(file_path)
template_placeholders[rel_path] = placeholders
all_placeholders.update(placeholders)
print(f"\n 模板: {Path(rel_path).name}")
print(f" 路径: {rel_path}")
print(f" 占位符数量: {len(placeholders)}")
if placeholders:
print(f" 占位符列表: {sorted(placeholders)}")
else:
print(f" [警告] 未找到占位符")
print(f"\n 扫描的 {len(sample_templates)} 个模板中共发现 {len(all_placeholders)} 个不同的占位符")
print(f" 所有占位符: {sorted(all_placeholders)}")
# 6. 生成测试建议
print_section("6. 测试建议")
if matched_count > 0:
# 选择一个已匹配的模板进行测试
test_template = None
for db_path, db_info in db_templates.items():
if db_path in local_templates and db_info['state'] == 1:
test_template = {
'id': db_info['id'],
'name': db_info['name'],
'file_path': db_path,
'local_path': local_templates[db_path]
}
break
if test_template:
print(f"\n 推荐测试模板:")
print(f" - ID: {test_template['id']}")
print(f" - 名称: {test_template['name']}")
print(f" - 路径: {test_template['file_path']}")
# 获取该模板的占位符
if test_template['file_path'] in template_placeholders:
test_placeholders = template_placeholders[test_template['file_path']]
else:
test_placeholders = extract_placeholders_from_docx(test_template['local_path'])
if test_placeholders:
print(f" - 占位符: {sorted(test_placeholders)}")
print(f"\n 测试API调用示例")
print(f" POST /api/document/generate")
print(f" {{")
print(f" \"fileId\": {test_template['id']},")
print(f" \"inputData\": [")
for placeholder in sorted(test_placeholders)[:5]:
print(f" {{\"fieldCode\": \"{placeholder}\", \"fieldValue\": \"测试值\"}},")
print(f" ...")
print(f" ]")
print(f" }}")
else:
print("\n [警告] 没有找到已匹配的模板无法进行API测试")
print(" 建议:")
print(" 1. 运行 update_template_paths_to_local.py 更新数据库路径")
print(" 2. 或者手动在数据库中插入模板记录")
finally:
conn.close()
print_result(True, "数据库连接已关闭")
if __name__ == "__main__":
analyze_templates()