310 lines
11 KiB
Python
310 lines
11 KiB
Python
"""
|
||
测试模板路径更新和占位符识别
|
||
1. 扫描本地模板文件
|
||
2. 检查数据库中的模板记录
|
||
3. 测试文档生成接口
|
||
4. 扫描模板中的占位符
|
||
"""
|
||
import os
|
||
import pymysql
|
||
from pathlib import Path
|
||
from typing import Dict, List, Set
|
||
from dotenv import load_dotenv
|
||
import re
|
||
from docx import Document
|
||
|
||
# 加载环境变量
|
||
load_dotenv()
|
||
|
||
# 数据库配置
|
||
DB_CONFIG = {
|
||
'host': os.getenv('DB_HOST', '152.136.177.240'),
|
||
'port': int(os.getenv('DB_PORT', 5012)),
|
||
'user': os.getenv('DB_USER', 'finyx'),
|
||
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
|
||
'database': os.getenv('DB_NAME', 'finyx'),
|
||
'charset': 'utf8mb4'
|
||
}
|
||
|
||
TENANT_ID = 615873064429507639
|
||
|
||
# 项目根目录
|
||
PROJECT_ROOT = Path(__file__).parent
|
||
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
|
||
|
||
|
||
def print_section(title):
|
||
"""打印章节标题"""
|
||
print("\n" + "="*70)
|
||
print(f" {title}")
|
||
print("="*70)
|
||
|
||
|
||
def print_result(success, message):
|
||
"""打印结果"""
|
||
status = "[OK]" if success else "[FAIL]"
|
||
print(f"{status} {message}")
|
||
|
||
|
||
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
|
||
"""
|
||
扫描本地模板文件
|
||
|
||
Returns:
|
||
字典,key为相对路径,value为Path对象
|
||
"""
|
||
templates = {}
|
||
|
||
if not base_dir.exists():
|
||
print_result(False, f"模板目录不存在: {base_dir}")
|
||
return templates
|
||
|
||
# 遍历所有文件
|
||
for file_path in base_dir.rglob('*'):
|
||
if file_path.is_file():
|
||
# 只处理文档文件
|
||
if file_path.suffix.lower() in ['.doc', '.docx', '.wps']:
|
||
# 获取相对路径(相对于项目根目录)
|
||
relative_path = file_path.relative_to(PROJECT_ROOT)
|
||
# 使用正斜杠作为路径分隔符(跨平台兼容)
|
||
relative_path_str = str(relative_path).replace('\\', '/')
|
||
templates[relative_path_str] = file_path
|
||
|
||
return templates
|
||
|
||
|
||
def get_db_templates(conn) -> Dict[str, Dict]:
|
||
"""
|
||
从数据库获取所有模板配置(包括已禁用和未禁用的)
|
||
|
||
Returns:
|
||
字典,key为file_path,value为模板信息
|
||
"""
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
try:
|
||
sql = """
|
||
SELECT id, name, file_path, state
|
||
FROM f_polic_file_config
|
||
WHERE tenant_id = %s
|
||
AND file_path IS NOT NULL
|
||
AND file_path != ''
|
||
"""
|
||
cursor.execute(sql, (TENANT_ID,))
|
||
templates = cursor.fetchall()
|
||
|
||
result = {}
|
||
for template in templates:
|
||
file_path = template['file_path']
|
||
if file_path:
|
||
result[file_path] = {
|
||
'id': template['id'],
|
||
'name': template['name'],
|
||
'file_path': file_path,
|
||
'state': template['state']
|
||
}
|
||
|
||
return result
|
||
|
||
finally:
|
||
cursor.close()
|
||
|
||
|
||
def extract_placeholders_from_docx(file_path: Path) -> Set[str]:
|
||
"""
|
||
从docx文件中提取所有占位符
|
||
|
||
Args:
|
||
file_path: docx文件路径
|
||
|
||
Returns:
|
||
占位符集合,格式: {'field_code1', 'field_code2', ...}
|
||
"""
|
||
placeholders = set()
|
||
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
|
||
|
||
try:
|
||
doc = Document(file_path)
|
||
|
||
# 从段落中提取占位符
|
||
for paragraph in doc.paragraphs:
|
||
text = paragraph.text
|
||
matches = placeholder_pattern.findall(text)
|
||
for match in matches:
|
||
field_code = match.strip()
|
||
if field_code:
|
||
placeholders.add(field_code)
|
||
|
||
# 从表格中提取占位符
|
||
for table in doc.tables:
|
||
try:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for paragraph in cell.paragraphs:
|
||
text = paragraph.text
|
||
matches = placeholder_pattern.findall(text)
|
||
for match in matches:
|
||
field_code = match.strip()
|
||
if field_code:
|
||
placeholders.add(field_code)
|
||
except Exception as e:
|
||
# 某些表格结构可能导致错误,跳过
|
||
continue
|
||
|
||
except Exception as e:
|
||
print(f" [错误] 读取文件失败: {str(e)}")
|
||
|
||
return placeholders
|
||
|
||
|
||
def analyze_templates():
|
||
"""分析模板和数据库记录"""
|
||
print_section("模板路径更新和占位符识别测试")
|
||
|
||
# 1. 扫描本地模板
|
||
print_section("1. 扫描本地模板文件")
|
||
local_templates = scan_local_templates(TEMPLATES_DIR)
|
||
print_result(True, f"找到 {len(local_templates)} 个本地模板文件")
|
||
|
||
if not local_templates:
|
||
print_result(False, "未找到本地模板文件")
|
||
return
|
||
|
||
# 显示前10个模板文件
|
||
print("\n前10个模板文件示例:")
|
||
for i, (rel_path, file_path) in enumerate(list(local_templates.items())[:10]):
|
||
print(f" {i+1}. {rel_path}")
|
||
|
||
# 2. 连接数据库
|
||
print_section("2. 连接数据库")
|
||
try:
|
||
conn = pymysql.connect(**DB_CONFIG)
|
||
print_result(True, "数据库连接成功")
|
||
except Exception as e:
|
||
print_result(False, f"数据库连接失败: {str(e)}")
|
||
return
|
||
|
||
try:
|
||
# 3. 获取数据库中的模板
|
||
print_section("3. 获取数据库中的模板配置")
|
||
db_templates = get_db_templates(conn)
|
||
print_result(True, f"找到 {len(db_templates)} 条数据库模板记录")
|
||
|
||
# 统计状态
|
||
enabled_count = sum(1 for t in db_templates.values() if t['state'] == 1)
|
||
disabled_count = len(db_templates) - enabled_count
|
||
print(f" - 已启用: {enabled_count} 条")
|
||
print(f" - 已禁用: {disabled_count} 条")
|
||
|
||
# 4. 匹配分析
|
||
print_section("4. 模板路径匹配分析")
|
||
|
||
matched_count = 0
|
||
unmatched_local = []
|
||
unmatched_db = []
|
||
|
||
# 检查数据库中的模板是否在本地存在
|
||
for db_path, db_info in db_templates.items():
|
||
if db_path in local_templates:
|
||
matched_count += 1
|
||
else:
|
||
unmatched_db.append((db_path, db_info))
|
||
|
||
# 检查本地模板是否在数据库中存在
|
||
for local_path in local_templates.keys():
|
||
if local_path not in db_templates:
|
||
unmatched_local.append(local_path)
|
||
|
||
print(f" - 匹配成功: {matched_count} 条")
|
||
print(f" - 数据库中有但本地不存在: {len(unmatched_db)} 条")
|
||
print(f" - 本地有但数据库中不存在: {len(unmatched_local)} 条")
|
||
|
||
if unmatched_db:
|
||
print("\n 数据库中有但本地不存在的模板:")
|
||
for db_path, db_info in unmatched_db[:5]:
|
||
print(f" - ID={db_info['id']}, 名称={db_info['name']}")
|
||
print(f" 路径: {db_path}")
|
||
|
||
if unmatched_local:
|
||
print(f"\n 本地有但数据库中不存在的模板(显示前10个):")
|
||
for local_path in unmatched_local[:10]:
|
||
print(f" - {local_path}")
|
||
|
||
# 5. 扫描占位符
|
||
print_section("5. 扫描模板中的占位符")
|
||
|
||
# 选择几个模板进行占位符扫描
|
||
sample_templates = list(local_templates.items())[:10]
|
||
all_placeholders = set()
|
||
template_placeholders = {}
|
||
|
||
for rel_path, file_path in sample_templates:
|
||
placeholders = extract_placeholders_from_docx(file_path)
|
||
template_placeholders[rel_path] = placeholders
|
||
all_placeholders.update(placeholders)
|
||
|
||
print(f"\n 模板: {Path(rel_path).name}")
|
||
print(f" 路径: {rel_path}")
|
||
print(f" 占位符数量: {len(placeholders)}")
|
||
if placeholders:
|
||
print(f" 占位符列表: {sorted(placeholders)}")
|
||
else:
|
||
print(f" [警告] 未找到占位符")
|
||
|
||
print(f"\n 扫描的 {len(sample_templates)} 个模板中共发现 {len(all_placeholders)} 个不同的占位符")
|
||
print(f" 所有占位符: {sorted(all_placeholders)}")
|
||
|
||
# 6. 生成测试建议
|
||
print_section("6. 测试建议")
|
||
|
||
if matched_count > 0:
|
||
# 选择一个已匹配的模板进行测试
|
||
test_template = None
|
||
for db_path, db_info in db_templates.items():
|
||
if db_path in local_templates and db_info['state'] == 1:
|
||
test_template = {
|
||
'id': db_info['id'],
|
||
'name': db_info['name'],
|
||
'file_path': db_path,
|
||
'local_path': local_templates[db_path]
|
||
}
|
||
break
|
||
|
||
if test_template:
|
||
print(f"\n 推荐测试模板:")
|
||
print(f" - ID: {test_template['id']}")
|
||
print(f" - 名称: {test_template['name']}")
|
||
print(f" - 路径: {test_template['file_path']}")
|
||
|
||
# 获取该模板的占位符
|
||
if test_template['file_path'] in template_placeholders:
|
||
test_placeholders = template_placeholders[test_template['file_path']]
|
||
else:
|
||
test_placeholders = extract_placeholders_from_docx(test_template['local_path'])
|
||
|
||
if test_placeholders:
|
||
print(f" - 占位符: {sorted(test_placeholders)}")
|
||
print(f"\n 测试API调用示例:")
|
||
print(f" POST /api/document/generate")
|
||
print(f" {{")
|
||
print(f" \"fileId\": {test_template['id']},")
|
||
print(f" \"inputData\": [")
|
||
for placeholder in sorted(test_placeholders)[:5]:
|
||
print(f" {{\"fieldCode\": \"{placeholder}\", \"fieldValue\": \"测试值\"}},")
|
||
print(f" ...")
|
||
print(f" ]")
|
||
print(f" }}")
|
||
else:
|
||
print("\n [警告] 没有找到已匹配的模板,无法进行API测试")
|
||
print(" 建议:")
|
||
print(" 1. 运行 update_template_paths_to_local.py 更新数据库路径")
|
||
print(" 2. 或者手动在数据库中插入模板记录")
|
||
|
||
finally:
|
||
conn.close()
|
||
print_result(True, "数据库连接已关闭")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
analyze_templates()
|