ai-business-write/update_and_test_templates.py
2025-12-26 09:16:31 +08:00

440 lines
15 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
更新数据库模板路径并测试文档生成接口
1. 更新数据库中的模板路径MinIO路径改为本地路径
2. 为本地有但数据库中没有的模板创建记录
3. 测试文档生成接口
4. 验证占位符识别
"""
import os
import pymysql
from pathlib import Path
from typing import Dict, List, Set
from dotenv import load_dotenv
import re
from docx import Document
import requests
import json
# 加载环境变量
load_dotenv()
# 数据库配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
# 实际使用的 tenant_id从数据库中发现是1
TENANT_ID = 1
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
# API配置如果需要测试接口
API_BASE_URL = os.getenv('API_BASE_URL', 'http://localhost:5000')
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def generate_id():
"""生成ID"""
import time
return int(time.time() * 1000000)
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
"""扫描本地模板文件"""
templates = {}
if not base_dir.exists():
return templates
for file_path in base_dir.rglob('*'):
if file_path.is_file() and file_path.suffix.lower() in ['.doc', '.docx', '.wps']:
relative_path = file_path.relative_to(PROJECT_ROOT)
relative_path_str = str(relative_path).replace('\\', '/')
templates[relative_path_str] = file_path
return templates
def get_db_templates(conn) -> Dict[str, Dict]:
"""从数据库获取所有模板配置"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path, state, parent_id
FROM f_polic_file_config
WHERE tenant_id = %s
"""
cursor.execute(sql, (TENANT_ID,))
templates = cursor.fetchall()
result = {}
for template in templates:
file_path = template['file_path']
if file_path:
result[file_path] = {
'id': template['id'],
'name': template['name'],
'file_path': file_path,
'state': template['state'],
'parent_id': template['parent_id']
}
return result
finally:
cursor.close()
def extract_placeholders_from_docx(file_path: Path) -> Set[str]:
"""从docx文件中提取所有占位符"""
placeholders = set()
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
try:
doc = Document(file_path)
# 从段落中提取
for paragraph in doc.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
# 从表格中提取
for table in doc.tables:
try:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
except:
continue
except Exception as e:
print(f" [错误] 读取文件失败: {str(e)}")
return placeholders
def update_minio_paths_to_local(conn):
"""更新MinIO路径为本地路径"""
print_section("更新MinIO路径为本地路径")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 查找所有MinIO路径
sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND file_path LIKE '/%%TEMPLATE/%%'
"""
cursor.execute(sql, (TENANT_ID,))
minio_templates = cursor.fetchall()
if not minio_templates:
print_result(True, "没有找到MinIO路径无需更新")
return 0
print(f" 找到 {len(minio_templates)} 条MinIO路径记录")
# 扫描本地模板
local_templates = scan_local_templates(TEMPLATES_DIR)
updated_count = 0
for template in minio_templates:
old_path = template['file_path']
# 从MinIO路径中提取文件名
file_name = Path(old_path).name
# 在本地模板中查找匹配的文件
matched_path = None
for local_path, local_file in local_templates.items():
if local_file.name == file_name:
matched_path = local_path
break
if matched_path:
# 更新路径
update_cursor = conn.cursor()
try:
update_cursor.execute("""
UPDATE f_polic_file_config
SET file_path = %s
WHERE id = %s
""", (matched_path, template['id']))
conn.commit()
print(f" [更新] ID={template['id']}, 名称={template['name']}")
print(f" 旧路径: {old_path}")
print(f" 新路径: {matched_path}")
updated_count += 1
except Exception as e:
conn.rollback()
print(f" [错误] 更新失败: {str(e)}")
finally:
update_cursor.close()
else:
print(f" [未找到] ID={template['id']}, 名称={template['name']}, 文件名={file_name}")
return updated_count
finally:
cursor.close()
def create_missing_templates(conn, local_templates: Dict[str, Path], db_templates: Dict[str, Dict]):
"""为本地有但数据库中没有的模板创建记录"""
print_section("创建缺失的模板记录")
missing_templates = []
for local_path in local_templates.keys():
if local_path not in db_templates:
missing_templates.append(local_path)
if not missing_templates:
print_result(True, "所有本地模板都已存在于数据库中")
return 0
print(f" 找到 {len(missing_templates)} 个缺失的模板")
created_count = 0
cursor = conn.cursor()
try:
for local_path in missing_templates:
file_path = local_templates[local_path]
file_name = file_path.stem # 不含扩展名的文件名
# 生成模板ID
template_id = generate_id()
# 插入记录
try:
cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
template_id,
TENANT_ID,
None, # parent_id可以根据目录结构设置
file_name,
'{}', # input_data
local_path,
CREATED_BY,
UPDATED_BY
))
conn.commit()
print(f" [创建] ID={template_id}, 名称={file_name}, 路径={local_path}")
created_count += 1
except Exception as e:
conn.rollback()
print(f" [错误] 创建失败: {str(e)}")
finally:
cursor.close()
return created_count
def test_document_generation(conn, api_url: str = None):
"""测试文档生成接口"""
print_section("测试文档生成接口")
# 获取一个有本地路径的模板
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND file_path IS NOT NULL
AND file_path != ''
AND file_path LIKE 'template_finish/%%'
AND state = 1
LIMIT 1
"""
cursor.execute(sql, (TENANT_ID,))
template = cursor.fetchone()
if not template:
print_result(False, "没有找到可测试的模板")
return
print(f" 测试模板:")
print(f" ID: {template['id']}")
print(f" 名称: {template['name']}")
print(f" 路径: {template['file_path']}")
# 检查本地文件是否存在
local_file = PROJECT_ROOT / template['file_path']
if not local_file.exists():
print_result(False, f"本地文件不存在: {local_file}")
return
# 提取占位符
placeholders = extract_placeholders_from_docx(local_file)
print(f" 占位符: {sorted(placeholders)}")
if not placeholders:
print_result(False, "模板中没有找到占位符,无法测试")
return
# 如果提供了API URL测试接口
if api_url:
print(f"\n 测试API接口: {api_url}/api/document/generate")
# 构建测试数据
input_data = []
for placeholder in sorted(placeholders)[:10]: # 只测试前10个占位符
input_data.append({
"fieldCode": placeholder,
"fieldValue": f"测试值_{placeholder}"
})
payload = {
"fileId": template['id'],
"inputData": input_data
}
try:
response = requests.post(
f"{api_url}/api/document/generate",
json=payload,
timeout=30
)
if response.status_code == 200:
result = response.json()
print_result(True, "API调用成功")
print(f" 响应: {json.dumps(result, ensure_ascii=False, indent=2)}")
else:
print_result(False, f"API调用失败: {response.status_code}")
print(f" 响应: {response.text}")
except requests.exceptions.RequestException as e:
print_result(False, f"API请求异常: {str(e)}")
print(" [提示] 请确保服务已启动,或手动测试接口")
else:
print("\n [提示] 未提供API URL跳过接口测试")
print(" 可以手动测试以下请求:")
print(f" POST {API_BASE_URL}/api/document/generate")
print(f" {{")
print(f" \"fileId\": {template['id']},")
print(f" \"inputData\": [")
for placeholder in sorted(placeholders)[:5]:
print(f" {{\"fieldCode\": \"{placeholder}\", \"fieldValue\": \"测试值\"}},")
print(f" ...")
print(f" ]")
print(f" }}")
finally:
cursor.close()
def scan_all_placeholders(local_templates: Dict[str, Path]):
"""扫描所有模板的占位符"""
print_section("扫描所有模板的占位符")
all_placeholders = set()
template_placeholders = {}
templates_with_placeholders = 0
templates_without_placeholders = 0
for rel_path, file_path in local_templates.items():
placeholders = extract_placeholders_from_docx(file_path)
template_placeholders[rel_path] = placeholders
all_placeholders.update(placeholders)
if placeholders:
templates_with_placeholders += 1
else:
templates_without_placeholders += 1
print(f" 扫描了 {len(local_templates)} 个模板")
print(f" - 有占位符的模板: {templates_with_placeholders}")
print(f" - 无占位符的模板: {templates_without_placeholders}")
print(f" - 发现的占位符总数: {len(all_placeholders)}")
print(f"\n 所有占位符列表:")
for placeholder in sorted(all_placeholders):
print(f" - {placeholder}")
return template_placeholders, all_placeholders
def main():
"""主函数"""
print_section("模板路径更新和接口测试")
# 1. 扫描本地模板
print_section("1. 扫描本地模板文件")
local_templates = scan_local_templates(TEMPLATES_DIR)
print_result(True, f"找到 {len(local_templates)} 个本地模板文件")
if not local_templates:
print_result(False, "未找到本地模板文件")
return
# 2. 连接数据库
print_section("2. 连接数据库")
try:
conn = pymysql.connect(**DB_CONFIG)
print_result(True, "数据库连接成功")
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return
try:
# 3. 获取数据库中的模板
print_section("3. 获取数据库中的模板配置")
db_templates = get_db_templates(conn)
print_result(True, f"找到 {len(db_templates)} 条数据库模板记录有file_path的")
# 4. 更新MinIO路径
updated_count = update_minio_paths_to_local(conn)
if updated_count > 0:
print_result(True, f"成功更新 {updated_count} 条MinIO路径")
# 5. 创建缺失的模板记录
created_count = create_missing_templates(conn, local_templates, db_templates)
if created_count > 0:
print_result(True, f"成功创建 {created_count} 条模板记录")
# 6. 扫描所有占位符
template_placeholders, all_placeholders = scan_all_placeholders(local_templates)
# 7. 测试文档生成接口
test_document_generation(conn, API_BASE_URL if API_BASE_URL != 'http://localhost:5000' else None)
finally:
conn.close()
print_result(True, "数据库连接已关闭")
print_section("完成")
if __name__ == "__main__":
main()