ai-business-write/update_template_paths_to_local.py

256 lines
8.3 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
更新数据库中的模板路径将MinIO路径改为本地相对路径
"""
import os
import pymysql
from pathlib import Path
from typing import Dict, List, Optional
from dotenv import load_dotenv
import difflib
# 加载环境变量
load_dotenv()
# 数据库配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
TENANT_ID = 615873064429507639
# 本地模板目录(相对于项目根目录)
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
"""
扫描本地模板文件
Returns:
字典key为文件名不含路径value为相对路径相对于项目根目录
"""
templates = {}
if not base_dir.exists():
print_result(False, f"模板目录不存在: {base_dir}")
return templates
# 遍历所有文件
for file_path in base_dir.rglob('*'):
if file_path.is_file():
# 只处理文档文件
if file_path.suffix.lower() in ['.doc', '.docx', '.wps']:
# 获取相对路径(相对于项目根目录)
relative_path = file_path.relative_to(PROJECT_ROOT)
# 使用正斜杠作为路径分隔符(跨平台兼容)
relative_path_str = str(relative_path).replace('\\', '/')
# 使用文件名作为key不含路径
file_name = file_path.name
templates[file_name] = relative_path_str
return templates
def get_db_templates(conn) -> Dict[str, List[Dict]]:
"""
从数据库获取所有模板配置
Returns:
字典key为文件名从file_path中提取value为模板信息列表可能有多个同名文件
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
AND file_path IS NOT NULL
AND file_path != ''
"""
cursor.execute(sql, (TENANT_ID,))
templates = cursor.fetchall()
# 构建字典:文件名 -> 模板信息列表
result = {}
for template in templates:
file_path = template['file_path']
if file_path:
# 从file_path中提取文件名可能是MinIO路径或本地路径
# 处理各种路径格式
file_name = Path(file_path).name
if file_name not in result:
result[file_name] = []
result[file_name].append({
'id': template['id'],
'name': template['name'],
'file_path': file_path
})
return result
finally:
cursor.close()
def find_best_match(local_file_name: str, db_file_names: List[str], threshold: float = 0.8) -> Optional[str]:
"""
使用模糊匹配找到最佳匹配的文件名
Args:
local_file_name: 本地文件名
db_file_names: 数据库中的文件名列表
threshold: 相似度阈值0-1之间
Returns:
最佳匹配的文件名如果没有找到则返回None
"""
if not db_file_names:
return None
# 先尝试精确匹配
if local_file_name in db_file_names:
return local_file_name
# 使用模糊匹配
matches = difflib.get_close_matches(local_file_name, db_file_names, n=1, cutoff=threshold)
if matches:
return matches[0]
return None
def update_template_path(conn, template_id: int, new_path: str, old_path: str):
"""更新数据库中的模板路径"""
cursor = conn.cursor()
try:
sql = """
UPDATE f_polic_file_config
SET file_path = %s
WHERE id = %s
"""
cursor.execute(sql, (new_path, template_id))
conn.commit()
return cursor.rowcount > 0
except Exception as e:
conn.rollback()
raise e
finally:
cursor.close()
def main():
"""主函数"""
print_section("更新模板路径从MinIO路径改为本地相对路径")
# 1. 扫描本地模板文件
print_section("1. 扫描本地模板文件")
local_templates = scan_local_templates(TEMPLATES_DIR)
print_result(True, f"找到 {len(local_templates)} 个本地模板文件")
if not local_templates:
print_result(False, "未找到本地模板文件,请检查 template_finish 目录")
return
# 2. 连接数据库
print_section("2. 连接数据库")
try:
conn = pymysql.connect(**DB_CONFIG)
print_result(True, "数据库连接成功")
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return
try:
# 3. 获取数据库中的模板
print_section("3. 获取数据库中的模板配置")
db_templates = get_db_templates(conn)
print_result(True, f"找到 {sum(len(v) for v in db_templates.values())} 条数据库模板记录")
# 4. 匹配并更新路径
print_section("4. 匹配并更新路径")
updated_count = 0
skipped_count = 0
not_found_count = 0
# 遍历数据库中的模板
for db_file_name, template_list in db_templates.items():
# 查找本地匹配的文件
local_path = local_templates.get(db_file_name)
if not local_path:
# 尝试模糊匹配
local_file_names = list(local_templates.keys())
matched_name = find_best_match(db_file_name, local_file_names)
if matched_name:
local_path = local_templates[matched_name]
print(f" [模糊匹配] {db_file_name} -> {matched_name}")
if local_path:
# 更新所有匹配的模板记录
for template in template_list:
old_path = template['file_path']
# 检查是否已经是本地路径(避免重复更新)
if old_path.startswith('template_finish/'):
print(f" [跳过] ID={template['id']}, 名称={template['name']}, 已经是本地路径: {old_path}")
skipped_count += 1
continue
# 更新路径
try:
update_template_path(conn, template['id'], local_path, old_path)
print(f" [更新] ID={template['id']}, 名称={template['name']}")
print(f" 旧路径: {old_path}")
print(f" 新路径: {local_path}")
updated_count += 1
except Exception as e:
print(f" [错误] ID={template['id']}, 更新失败: {str(e)}")
else:
# 未找到匹配的本地文件
for template in template_list:
print(f" [未找到] ID={template['id']}, 名称={template['name']}, 文件名={db_file_name}")
not_found_count += 1
# 5. 输出统计信息
print_section("5. 更新结果统计")
print_result(True, f"成功更新: {updated_count} 条记录")
if skipped_count > 0:
print_result(True, f"跳过(已是本地路径): {skipped_count} 条记录")
if not_found_count > 0:
print_result(False, f"未找到匹配文件: {not_found_count} 条记录")
print_section("更新完成")
finally:
conn.close()
print_result(True, "数据库连接已关闭")
if __name__ == "__main__":
main()