440 lines
15 KiB
Python
440 lines
15 KiB
Python
"""
|
||
更新数据库模板路径并测试文档生成接口
|
||
1. 更新数据库中的模板路径(MinIO路径改为本地路径)
|
||
2. 为本地有但数据库中没有的模板创建记录
|
||
3. 测试文档生成接口
|
||
4. 验证占位符识别
|
||
"""
|
||
import os
|
||
import pymysql
|
||
from pathlib import Path
|
||
from typing import Dict, List, Set
|
||
from dotenv import load_dotenv
|
||
import re
|
||
from docx import Document
|
||
import requests
|
||
import json
|
||
|
||
# 加载环境变量
|
||
load_dotenv()
|
||
|
||
# 数据库配置
|
||
DB_CONFIG = {
|
||
'host': os.getenv('DB_HOST', '152.136.177.240'),
|
||
'port': int(os.getenv('DB_PORT', 5012)),
|
||
'user': os.getenv('DB_USER', 'finyx'),
|
||
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
|
||
'database': os.getenv('DB_NAME', 'finyx'),
|
||
'charset': 'utf8mb4'
|
||
}
|
||
|
||
# 实际使用的 tenant_id(从数据库中发现是1)
|
||
TENANT_ID = 1
|
||
CREATED_BY = 655162080928945152
|
||
UPDATED_BY = 655162080928945152
|
||
|
||
# 项目根目录
|
||
PROJECT_ROOT = Path(__file__).parent
|
||
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
|
||
|
||
# API配置(如果需要测试接口)
|
||
API_BASE_URL = os.getenv('API_BASE_URL', 'http://localhost:5000')
|
||
|
||
|
||
def print_section(title):
|
||
"""打印章节标题"""
|
||
print("\n" + "="*70)
|
||
print(f" {title}")
|
||
print("="*70)
|
||
|
||
|
||
def print_result(success, message):
|
||
"""打印结果"""
|
||
status = "[OK]" if success else "[FAIL]"
|
||
print(f"{status} {message}")
|
||
|
||
|
||
def generate_id():
|
||
"""生成ID"""
|
||
import time
|
||
return int(time.time() * 1000000)
|
||
|
||
|
||
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
|
||
"""扫描本地模板文件"""
|
||
templates = {}
|
||
if not base_dir.exists():
|
||
return templates
|
||
|
||
for file_path in base_dir.rglob('*'):
|
||
if file_path.is_file() and file_path.suffix.lower() in ['.doc', '.docx', '.wps']:
|
||
relative_path = file_path.relative_to(PROJECT_ROOT)
|
||
relative_path_str = str(relative_path).replace('\\', '/')
|
||
templates[relative_path_str] = file_path
|
||
|
||
return templates
|
||
|
||
|
||
def get_db_templates(conn) -> Dict[str, Dict]:
|
||
"""从数据库获取所有模板配置"""
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
try:
|
||
sql = """
|
||
SELECT id, name, file_path, state, parent_id
|
||
FROM f_polic_file_config
|
||
WHERE tenant_id = %s
|
||
"""
|
||
cursor.execute(sql, (TENANT_ID,))
|
||
templates = cursor.fetchall()
|
||
|
||
result = {}
|
||
for template in templates:
|
||
file_path = template['file_path']
|
||
if file_path:
|
||
result[file_path] = {
|
||
'id': template['id'],
|
||
'name': template['name'],
|
||
'file_path': file_path,
|
||
'state': template['state'],
|
||
'parent_id': template['parent_id']
|
||
}
|
||
return result
|
||
finally:
|
||
cursor.close()
|
||
|
||
|
||
def extract_placeholders_from_docx(file_path: Path) -> Set[str]:
|
||
"""从docx文件中提取所有占位符"""
|
||
placeholders = set()
|
||
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
|
||
|
||
try:
|
||
doc = Document(file_path)
|
||
|
||
# 从段落中提取
|
||
for paragraph in doc.paragraphs:
|
||
text = paragraph.text
|
||
matches = placeholder_pattern.findall(text)
|
||
for match in matches:
|
||
field_code = match.strip()
|
||
if field_code:
|
||
placeholders.add(field_code)
|
||
|
||
# 从表格中提取
|
||
for table in doc.tables:
|
||
try:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for paragraph in cell.paragraphs:
|
||
text = paragraph.text
|
||
matches = placeholder_pattern.findall(text)
|
||
for match in matches:
|
||
field_code = match.strip()
|
||
if field_code:
|
||
placeholders.add(field_code)
|
||
except:
|
||
continue
|
||
except Exception as e:
|
||
print(f" [错误] 读取文件失败: {str(e)}")
|
||
|
||
return placeholders
|
||
|
||
|
||
def update_minio_paths_to_local(conn):
|
||
"""更新MinIO路径为本地路径"""
|
||
print_section("更新MinIO路径为本地路径")
|
||
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
try:
|
||
# 查找所有MinIO路径
|
||
sql = """
|
||
SELECT id, name, file_path
|
||
FROM f_polic_file_config
|
||
WHERE tenant_id = %s
|
||
AND file_path LIKE '/%%TEMPLATE/%%'
|
||
"""
|
||
cursor.execute(sql, (TENANT_ID,))
|
||
minio_templates = cursor.fetchall()
|
||
|
||
if not minio_templates:
|
||
print_result(True, "没有找到MinIO路径,无需更新")
|
||
return 0
|
||
|
||
print(f" 找到 {len(minio_templates)} 条MinIO路径记录")
|
||
|
||
# 扫描本地模板
|
||
local_templates = scan_local_templates(TEMPLATES_DIR)
|
||
|
||
updated_count = 0
|
||
for template in minio_templates:
|
||
old_path = template['file_path']
|
||
# 从MinIO路径中提取文件名
|
||
file_name = Path(old_path).name
|
||
|
||
# 在本地模板中查找匹配的文件
|
||
matched_path = None
|
||
for local_path, local_file in local_templates.items():
|
||
if local_file.name == file_name:
|
||
matched_path = local_path
|
||
break
|
||
|
||
if matched_path:
|
||
# 更新路径
|
||
update_cursor = conn.cursor()
|
||
try:
|
||
update_cursor.execute("""
|
||
UPDATE f_polic_file_config
|
||
SET file_path = %s
|
||
WHERE id = %s
|
||
""", (matched_path, template['id']))
|
||
conn.commit()
|
||
print(f" [更新] ID={template['id']}, 名称={template['name']}")
|
||
print(f" 旧路径: {old_path}")
|
||
print(f" 新路径: {matched_path}")
|
||
updated_count += 1
|
||
except Exception as e:
|
||
conn.rollback()
|
||
print(f" [错误] 更新失败: {str(e)}")
|
||
finally:
|
||
update_cursor.close()
|
||
else:
|
||
print(f" [未找到] ID={template['id']}, 名称={template['name']}, 文件名={file_name}")
|
||
|
||
return updated_count
|
||
finally:
|
||
cursor.close()
|
||
|
||
|
||
def create_missing_templates(conn, local_templates: Dict[str, Path], db_templates: Dict[str, Dict]):
|
||
"""为本地有但数据库中没有的模板创建记录"""
|
||
print_section("创建缺失的模板记录")
|
||
|
||
missing_templates = []
|
||
for local_path in local_templates.keys():
|
||
if local_path not in db_templates:
|
||
missing_templates.append(local_path)
|
||
|
||
if not missing_templates:
|
||
print_result(True, "所有本地模板都已存在于数据库中")
|
||
return 0
|
||
|
||
print(f" 找到 {len(missing_templates)} 个缺失的模板")
|
||
|
||
created_count = 0
|
||
cursor = conn.cursor()
|
||
|
||
try:
|
||
for local_path in missing_templates:
|
||
file_path = local_templates[local_path]
|
||
file_name = file_path.stem # 不含扩展名的文件名
|
||
|
||
# 生成模板ID
|
||
template_id = generate_id()
|
||
|
||
# 插入记录
|
||
try:
|
||
cursor.execute("""
|
||
INSERT INTO f_polic_file_config
|
||
(id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state)
|
||
VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
|
||
""", (
|
||
template_id,
|
||
TENANT_ID,
|
||
None, # parent_id,可以根据目录结构设置
|
||
file_name,
|
||
'{}', # input_data
|
||
local_path,
|
||
CREATED_BY,
|
||
UPDATED_BY
|
||
))
|
||
conn.commit()
|
||
print(f" [创建] ID={template_id}, 名称={file_name}, 路径={local_path}")
|
||
created_count += 1
|
||
except Exception as e:
|
||
conn.rollback()
|
||
print(f" [错误] 创建失败: {str(e)}")
|
||
|
||
finally:
|
||
cursor.close()
|
||
|
||
return created_count
|
||
|
||
|
||
def test_document_generation(conn, api_url: str = None):
|
||
"""测试文档生成接口"""
|
||
print_section("测试文档生成接口")
|
||
|
||
# 获取一个有本地路径的模板
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
try:
|
||
sql = """
|
||
SELECT id, name, file_path
|
||
FROM f_polic_file_config
|
||
WHERE tenant_id = %s
|
||
AND file_path IS NOT NULL
|
||
AND file_path != ''
|
||
AND file_path LIKE 'template_finish/%%'
|
||
AND state = 1
|
||
LIMIT 1
|
||
"""
|
||
cursor.execute(sql, (TENANT_ID,))
|
||
template = cursor.fetchone()
|
||
|
||
if not template:
|
||
print_result(False, "没有找到可测试的模板")
|
||
return
|
||
|
||
print(f" 测试模板:")
|
||
print(f" ID: {template['id']}")
|
||
print(f" 名称: {template['name']}")
|
||
print(f" 路径: {template['file_path']}")
|
||
|
||
# 检查本地文件是否存在
|
||
local_file = PROJECT_ROOT / template['file_path']
|
||
if not local_file.exists():
|
||
print_result(False, f"本地文件不存在: {local_file}")
|
||
return
|
||
|
||
# 提取占位符
|
||
placeholders = extract_placeholders_from_docx(local_file)
|
||
print(f" 占位符: {sorted(placeholders)}")
|
||
|
||
if not placeholders:
|
||
print_result(False, "模板中没有找到占位符,无法测试")
|
||
return
|
||
|
||
# 如果提供了API URL,测试接口
|
||
if api_url:
|
||
print(f"\n 测试API接口: {api_url}/api/document/generate")
|
||
|
||
# 构建测试数据
|
||
input_data = []
|
||
for placeholder in sorted(placeholders)[:10]: # 只测试前10个占位符
|
||
input_data.append({
|
||
"fieldCode": placeholder,
|
||
"fieldValue": f"测试值_{placeholder}"
|
||
})
|
||
|
||
payload = {
|
||
"fileId": template['id'],
|
||
"inputData": input_data
|
||
}
|
||
|
||
try:
|
||
response = requests.post(
|
||
f"{api_url}/api/document/generate",
|
||
json=payload,
|
||
timeout=30
|
||
)
|
||
|
||
if response.status_code == 200:
|
||
result = response.json()
|
||
print_result(True, "API调用成功")
|
||
print(f" 响应: {json.dumps(result, ensure_ascii=False, indent=2)}")
|
||
else:
|
||
print_result(False, f"API调用失败: {response.status_code}")
|
||
print(f" 响应: {response.text}")
|
||
except requests.exceptions.RequestException as e:
|
||
print_result(False, f"API请求异常: {str(e)}")
|
||
print(" [提示] 请确保服务已启动,或手动测试接口")
|
||
else:
|
||
print("\n [提示] 未提供API URL,跳过接口测试")
|
||
print(" 可以手动测试以下请求:")
|
||
print(f" POST {API_BASE_URL}/api/document/generate")
|
||
print(f" {{")
|
||
print(f" \"fileId\": {template['id']},")
|
||
print(f" \"inputData\": [")
|
||
for placeholder in sorted(placeholders)[:5]:
|
||
print(f" {{\"fieldCode\": \"{placeholder}\", \"fieldValue\": \"测试值\"}},")
|
||
print(f" ...")
|
||
print(f" ]")
|
||
print(f" }}")
|
||
|
||
finally:
|
||
cursor.close()
|
||
|
||
|
||
def scan_all_placeholders(local_templates: Dict[str, Path]):
|
||
"""扫描所有模板的占位符"""
|
||
print_section("扫描所有模板的占位符")
|
||
|
||
all_placeholders = set()
|
||
template_placeholders = {}
|
||
templates_with_placeholders = 0
|
||
templates_without_placeholders = 0
|
||
|
||
for rel_path, file_path in local_templates.items():
|
||
placeholders = extract_placeholders_from_docx(file_path)
|
||
template_placeholders[rel_path] = placeholders
|
||
all_placeholders.update(placeholders)
|
||
|
||
if placeholders:
|
||
templates_with_placeholders += 1
|
||
else:
|
||
templates_without_placeholders += 1
|
||
|
||
print(f" 扫描了 {len(local_templates)} 个模板")
|
||
print(f" - 有占位符的模板: {templates_with_placeholders} 个")
|
||
print(f" - 无占位符的模板: {templates_without_placeholders} 个")
|
||
print(f" - 发现的占位符总数: {len(all_placeholders)} 个")
|
||
print(f"\n 所有占位符列表:")
|
||
for placeholder in sorted(all_placeholders):
|
||
print(f" - {placeholder}")
|
||
|
||
return template_placeholders, all_placeholders
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print_section("模板路径更新和接口测试")
|
||
|
||
# 1. 扫描本地模板
|
||
print_section("1. 扫描本地模板文件")
|
||
local_templates = scan_local_templates(TEMPLATES_DIR)
|
||
print_result(True, f"找到 {len(local_templates)} 个本地模板文件")
|
||
|
||
if not local_templates:
|
||
print_result(False, "未找到本地模板文件")
|
||
return
|
||
|
||
# 2. 连接数据库
|
||
print_section("2. 连接数据库")
|
||
try:
|
||
conn = pymysql.connect(**DB_CONFIG)
|
||
print_result(True, "数据库连接成功")
|
||
except Exception as e:
|
||
print_result(False, f"数据库连接失败: {str(e)}")
|
||
return
|
||
|
||
try:
|
||
# 3. 获取数据库中的模板
|
||
print_section("3. 获取数据库中的模板配置")
|
||
db_templates = get_db_templates(conn)
|
||
print_result(True, f"找到 {len(db_templates)} 条数据库模板记录(有file_path的)")
|
||
|
||
# 4. 更新MinIO路径
|
||
updated_count = update_minio_paths_to_local(conn)
|
||
if updated_count > 0:
|
||
print_result(True, f"成功更新 {updated_count} 条MinIO路径")
|
||
|
||
# 5. 创建缺失的模板记录
|
||
created_count = create_missing_templates(conn, local_templates, db_templates)
|
||
if created_count > 0:
|
||
print_result(True, f"成功创建 {created_count} 条模板记录")
|
||
|
||
# 6. 扫描所有占位符
|
||
template_placeholders, all_placeholders = scan_all_placeholders(local_templates)
|
||
|
||
# 7. 测试文档生成接口
|
||
test_document_generation(conn, API_BASE_URL if API_BASE_URL != 'http://localhost:5000' else None)
|
||
|
||
finally:
|
||
conn.close()
|
||
print_result(True, "数据库连接已关闭")
|
||
|
||
print_section("完成")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|