ai-business-write/upload_templates_to_new_minio.py

329 lines
11 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
批量上传本地模板文件到新MinIO服务器
确保上传路径和文件名与数据库中的file_path字段值一致
"""
import os
import pymysql
from minio import Minio
from minio.error import S3Error
from pathlib import Path
from typing import Dict, List, Optional
from dotenv import load_dotenv
import difflib
# 加载环境变量
load_dotenv()
# 新MinIO配置
NEW_MINIO_CONFIG = {
'endpoint': '10.100.31.21:9000',
'access_key': 'minio_PC8dcY',
'secret_key': 'minio_7k7RNJ',
'secure': False # 注意根据测试结果应该是false但用户要求true如果失败会自动尝试false
}
BUCKET_NAME = 'finyx'
TENANT_ID = 615873064429507639
# 数据库配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
# 本地模板目录
TEMPLATES_DIR = Path('template_finish')
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def get_minio_client(secure=None):
"""获取MinIO客户端"""
if secure is None:
secure = NEW_MINIO_CONFIG['secure']
return Minio(
NEW_MINIO_CONFIG['endpoint'],
access_key=NEW_MINIO_CONFIG['access_key'],
secret_key=NEW_MINIO_CONFIG['secret_key'],
secure=secure
)
def test_minio_connection():
"""测试MinIO连接"""
print_section("1. 测试MinIO连接")
# 先尝试用户指定的secure值
for secure in [NEW_MINIO_CONFIG['secure'], not NEW_MINIO_CONFIG['secure']]:
try:
print(f"\n尝试连接secure={secure}...")
client = get_minio_client(secure=secure)
buckets = client.list_buckets()
print_result(True, f"MinIO连接成功secure={secure}")
# 检查存储桶
if client.bucket_exists(BUCKET_NAME):
print_result(True, f"存储桶 '{BUCKET_NAME}' 存在")
# 更新配置
NEW_MINIO_CONFIG['secure'] = secure
return client
else:
print_result(False, f"存储桶 '{BUCKET_NAME}' 不存在")
return None
except Exception as e:
if secure == NEW_MINIO_CONFIG['secure']:
print_result(False, f"使用secure={secure}连接失败: {str(e)}")
print(" 将尝试另一个secure值...")
continue
else:
print_result(False, f"MinIO连接失败: {str(e)}")
return None
return None
def get_db_templates(conn) -> Dict[str, Dict]:
"""从数据库获取所有模板配置"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
AND file_path IS NOT NULL
AND file_path != ''
"""
cursor.execute(sql, (TENANT_ID,))
templates = cursor.fetchall()
# 构建字典:文件名 -> 配置信息
result = {}
for template in templates:
# 从file_path中提取文件名
file_path = template['file_path']
if file_path:
# 提取文件名(去掉路径)
file_name = Path(file_path).name
result[file_name] = {
'id': template['id'],
'name': template['name'],
'file_path': file_path
}
return result
finally:
cursor.close()
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
"""扫描本地模板文件"""
templates = {}
if not base_dir.exists():
print(f"[WARN] 模板目录不存在: {base_dir}")
return templates
# 递归扫描所有.docx文件
for docx_file in base_dir.rglob('*.docx'):
file_name = docx_file.name
templates[file_name] = docx_file
return templates
def find_best_match(target_name: str, candidates: List[str], threshold=0.8) -> Optional[str]:
"""使用模糊匹配找到最佳匹配的文件名"""
if not candidates:
return None
# 精确匹配
if target_name in candidates:
return target_name
# 模糊匹配
matches = difflib.get_close_matches(target_name, candidates, n=1, cutoff=threshold)
if matches:
return matches[0]
return None
def upload_file_to_minio(client: Minio, local_file: Path, object_name: str) -> bool:
"""上传文件到MinIO"""
try:
# 检查文件是否存在
if not local_file.exists():
print(f" [ERROR] 本地文件不存在: {local_file}")
return False
file_size = local_file.stat().st_size
print(f" 上传: {local_file.name} ({file_size:,} 字节)")
print(f" 目标路径: {object_name}")
# 上传文件
client.fput_object(
BUCKET_NAME,
object_name,
str(local_file),
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
# 验证上传
stat = client.stat_object(BUCKET_NAME, object_name)
print(f" [OK] 上传成功(验证大小: {stat.size:,} 字节)")
return True
except S3Error as e:
print(f" [ERROR] MinIO错误: {str(e)}")
return False
except Exception as e:
print(f" [ERROR] 上传失败: {str(e)}")
import traceback
traceback.print_exc()
return False
def main():
"""主函数"""
print("\n" + "="*70)
print(" 批量上传模板文件到新MinIO服务器")
print("="*70)
# 1. 测试MinIO连接
client = test_minio_connection()
if not client:
print("\n[ERROR] 无法连接到MinIO服务器请检查配置")
return
# 2. 连接数据库
print_section("2. 连接数据库")
try:
conn = pymysql.connect(**DB_CONFIG)
print_result(True, "数据库连接成功")
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return
try:
# 3. 获取数据库中的模板配置
print_section("3. 获取数据库模板配置")
db_templates = get_db_templates(conn)
print(f"\n数据库中找到 {len(db_templates)} 个模板配置")
# 4. 扫描本地模板文件
print_section("4. 扫描本地模板文件")
local_templates = scan_local_templates(TEMPLATES_DIR)
print(f"\n本地找到 {len(local_templates)} 个模板文件")
if not local_templates:
print("[ERROR] 本地没有找到模板文件")
return
# 5. 匹配并上传
print_section("5. 匹配并上传文件")
matched_count = 0
uploaded_count = 0
failed_count = 0
unmatched_db = []
unmatched_local = []
# 匹配数据库中的模板
for db_file_name, db_config in db_templates.items():
file_path = db_config['file_path']
object_name = file_path.lstrip('/') # 去掉开头的/
# 查找匹配的本地文件
local_file = None
# 精确匹配
if db_file_name in local_templates:
local_file = local_templates[db_file_name]
matched_count += 1
else:
# 模糊匹配
best_match = find_best_match(db_file_name, list(local_templates.keys()))
if best_match:
local_file = local_templates[best_match]
matched_count += 1
print(f"\n[INFO] 使用模糊匹配: '{db_file_name}' -> '{best_match}'")
else:
unmatched_db.append((db_file_name, db_config))
print(f"\n[WARN] 未找到匹配的本地文件: {db_file_name}")
continue
# 上传文件
print(f"\n处理: {db_config['name']}")
print(f" 数据库路径: {file_path}")
if upload_file_to_minio(client, local_file, object_name):
uploaded_count += 1
else:
failed_count += 1
# 检查未匹配的本地文件
matched_local_names = set()
for db_file_name in db_templates.keys():
if db_file_name in local_templates:
matched_local_names.add(db_file_name)
else:
best_match = find_best_match(db_file_name, list(local_templates.keys()))
if best_match:
matched_local_names.add(best_match)
for local_name, local_path in local_templates.items():
if local_name not in matched_local_names:
unmatched_local.append((local_name, local_path))
# 6. 总结
print_section("6. 上传总结")
print(f"\n匹配统计:")
print(f" 数据库模板数: {len(db_templates)}")
print(f" 本地文件数: {len(local_templates)}")
print(f" 成功匹配: {matched_count}")
print(f" 成功上传: {uploaded_count}")
print(f" 上传失败: {failed_count}")
if unmatched_db:
print(f"\n[WARN] 数据库中有 {len(unmatched_db)} 个模板未找到本地文件:")
for file_name, config in unmatched_db[:10]: # 只显示前10个
print(f" - {file_name} (ID: {config['id']}, 名称: {config['name']})")
if len(unmatched_db) > 10:
print(f" ... 还有 {len(unmatched_db) - 10}")
if unmatched_local:
print(f"\n[INFO] 本地有 {len(unmatched_local)} 个文件未在数据库中找到:")
for file_name, file_path in unmatched_local[:10]: # 只显示前10个
print(f" - {file_name} ({file_path})")
if len(unmatched_local) > 10:
print(f" ... 还有 {len(unmatched_local) - 10}")
if uploaded_count == matched_count and matched_count > 0:
print_result(True, f"所有匹配的文件都已成功上传!")
elif uploaded_count > 0:
print_result(True, f"成功上传 {uploaded_count} 个文件")
else:
print_result(False, "没有文件被上传")
print(f"\n使用的MinIO配置:")
print(f" 端点: {NEW_MINIO_CONFIG['endpoint']}")
print(f" 存储桶: {BUCKET_NAME}")
print(f" 使用HTTPS: {NEW_MINIO_CONFIG['secure']}")
finally:
conn.close()
if __name__ == '__main__':
main()