329 lines
11 KiB
Python
329 lines
11 KiB
Python
"""
|
||
批量上传本地模板文件到新MinIO服务器
|
||
确保上传路径和文件名与数据库中的file_path字段值一致
|
||
"""
|
||
import os
|
||
import pymysql
|
||
from minio import Minio
|
||
from minio.error import S3Error
|
||
from pathlib import Path
|
||
from typing import Dict, List, Optional
|
||
from dotenv import load_dotenv
|
||
import difflib
|
||
|
||
# 加载环境变量
|
||
load_dotenv()
|
||
|
||
# 新MinIO配置
|
||
NEW_MINIO_CONFIG = {
|
||
'endpoint': '10.100.31.21:9000',
|
||
'access_key': 'minio_PC8dcY',
|
||
'secret_key': 'minio_7k7RNJ',
|
||
'secure': False # 注意:根据测试结果应该是false,但用户要求true,如果失败会自动尝试false
|
||
}
|
||
BUCKET_NAME = 'finyx'
|
||
TENANT_ID = 615873064429507639
|
||
|
||
# 数据库配置
|
||
DB_CONFIG = {
|
||
'host': os.getenv('DB_HOST', '152.136.177.240'),
|
||
'port': int(os.getenv('DB_PORT', 5012)),
|
||
'user': os.getenv('DB_USER', 'finyx'),
|
||
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
|
||
'database': os.getenv('DB_NAME', 'finyx'),
|
||
'charset': 'utf8mb4'
|
||
}
|
||
|
||
# 本地模板目录
|
||
TEMPLATES_DIR = Path('template_finish')
|
||
|
||
def print_section(title):
|
||
"""打印章节标题"""
|
||
print("\n" + "="*70)
|
||
print(f" {title}")
|
||
print("="*70)
|
||
|
||
def print_result(success, message):
|
||
"""打印结果"""
|
||
status = "[OK]" if success else "[FAIL]"
|
||
print(f"{status} {message}")
|
||
|
||
def get_minio_client(secure=None):
|
||
"""获取MinIO客户端"""
|
||
if secure is None:
|
||
secure = NEW_MINIO_CONFIG['secure']
|
||
|
||
return Minio(
|
||
NEW_MINIO_CONFIG['endpoint'],
|
||
access_key=NEW_MINIO_CONFIG['access_key'],
|
||
secret_key=NEW_MINIO_CONFIG['secret_key'],
|
||
secure=secure
|
||
)
|
||
|
||
def test_minio_connection():
|
||
"""测试MinIO连接"""
|
||
print_section("1. 测试MinIO连接")
|
||
|
||
# 先尝试用户指定的secure值
|
||
for secure in [NEW_MINIO_CONFIG['secure'], not NEW_MINIO_CONFIG['secure']]:
|
||
try:
|
||
print(f"\n尝试连接(secure={secure})...")
|
||
client = get_minio_client(secure=secure)
|
||
buckets = client.list_buckets()
|
||
print_result(True, f"MinIO连接成功(secure={secure})")
|
||
|
||
# 检查存储桶
|
||
if client.bucket_exists(BUCKET_NAME):
|
||
print_result(True, f"存储桶 '{BUCKET_NAME}' 存在")
|
||
# 更新配置
|
||
NEW_MINIO_CONFIG['secure'] = secure
|
||
return client
|
||
else:
|
||
print_result(False, f"存储桶 '{BUCKET_NAME}' 不存在")
|
||
return None
|
||
|
||
except Exception as e:
|
||
if secure == NEW_MINIO_CONFIG['secure']:
|
||
print_result(False, f"使用secure={secure}连接失败: {str(e)}")
|
||
print(" 将尝试另一个secure值...")
|
||
continue
|
||
else:
|
||
print_result(False, f"MinIO连接失败: {str(e)}")
|
||
return None
|
||
|
||
return None
|
||
|
||
def get_db_templates(conn) -> Dict[str, Dict]:
|
||
"""从数据库获取所有模板配置"""
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
|
||
try:
|
||
sql = """
|
||
SELECT id, name, file_path
|
||
FROM f_polic_file_config
|
||
WHERE tenant_id = %s
|
||
AND state = 1
|
||
AND file_path IS NOT NULL
|
||
AND file_path != ''
|
||
"""
|
||
cursor.execute(sql, (TENANT_ID,))
|
||
templates = cursor.fetchall()
|
||
|
||
# 构建字典:文件名 -> 配置信息
|
||
result = {}
|
||
for template in templates:
|
||
# 从file_path中提取文件名
|
||
file_path = template['file_path']
|
||
if file_path:
|
||
# 提取文件名(去掉路径)
|
||
file_name = Path(file_path).name
|
||
result[file_name] = {
|
||
'id': template['id'],
|
||
'name': template['name'],
|
||
'file_path': file_path
|
||
}
|
||
|
||
return result
|
||
|
||
finally:
|
||
cursor.close()
|
||
|
||
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
|
||
"""扫描本地模板文件"""
|
||
templates = {}
|
||
|
||
if not base_dir.exists():
|
||
print(f"[WARN] 模板目录不存在: {base_dir}")
|
||
return templates
|
||
|
||
# 递归扫描所有.docx文件
|
||
for docx_file in base_dir.rglob('*.docx'):
|
||
file_name = docx_file.name
|
||
templates[file_name] = docx_file
|
||
|
||
return templates
|
||
|
||
def find_best_match(target_name: str, candidates: List[str], threshold=0.8) -> Optional[str]:
|
||
"""使用模糊匹配找到最佳匹配的文件名"""
|
||
if not candidates:
|
||
return None
|
||
|
||
# 精确匹配
|
||
if target_name in candidates:
|
||
return target_name
|
||
|
||
# 模糊匹配
|
||
matches = difflib.get_close_matches(target_name, candidates, n=1, cutoff=threshold)
|
||
if matches:
|
||
return matches[0]
|
||
|
||
return None
|
||
|
||
def upload_file_to_minio(client: Minio, local_file: Path, object_name: str) -> bool:
|
||
"""上传文件到MinIO"""
|
||
try:
|
||
# 检查文件是否存在
|
||
if not local_file.exists():
|
||
print(f" [ERROR] 本地文件不存在: {local_file}")
|
||
return False
|
||
|
||
file_size = local_file.stat().st_size
|
||
print(f" 上传: {local_file.name} ({file_size:,} 字节)")
|
||
print(f" 目标路径: {object_name}")
|
||
|
||
# 上传文件
|
||
client.fput_object(
|
||
BUCKET_NAME,
|
||
object_name,
|
||
str(local_file),
|
||
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
||
)
|
||
|
||
# 验证上传
|
||
stat = client.stat_object(BUCKET_NAME, object_name)
|
||
print(f" [OK] 上传成功(验证大小: {stat.size:,} 字节)")
|
||
return True
|
||
|
||
except S3Error as e:
|
||
print(f" [ERROR] MinIO错误: {str(e)}")
|
||
return False
|
||
except Exception as e:
|
||
print(f" [ERROR] 上传失败: {str(e)}")
|
||
import traceback
|
||
traceback.print_exc()
|
||
return False
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print("\n" + "="*70)
|
||
print(" 批量上传模板文件到新MinIO服务器")
|
||
print("="*70)
|
||
|
||
# 1. 测试MinIO连接
|
||
client = test_minio_connection()
|
||
if not client:
|
||
print("\n[ERROR] 无法连接到MinIO服务器,请检查配置")
|
||
return
|
||
|
||
# 2. 连接数据库
|
||
print_section("2. 连接数据库")
|
||
try:
|
||
conn = pymysql.connect(**DB_CONFIG)
|
||
print_result(True, "数据库连接成功")
|
||
except Exception as e:
|
||
print_result(False, f"数据库连接失败: {str(e)}")
|
||
return
|
||
|
||
try:
|
||
# 3. 获取数据库中的模板配置
|
||
print_section("3. 获取数据库模板配置")
|
||
db_templates = get_db_templates(conn)
|
||
print(f"\n数据库中找到 {len(db_templates)} 个模板配置")
|
||
|
||
# 4. 扫描本地模板文件
|
||
print_section("4. 扫描本地模板文件")
|
||
local_templates = scan_local_templates(TEMPLATES_DIR)
|
||
print(f"\n本地找到 {len(local_templates)} 个模板文件")
|
||
|
||
if not local_templates:
|
||
print("[ERROR] 本地没有找到模板文件")
|
||
return
|
||
|
||
# 5. 匹配并上传
|
||
print_section("5. 匹配并上传文件")
|
||
|
||
matched_count = 0
|
||
uploaded_count = 0
|
||
failed_count = 0
|
||
unmatched_db = []
|
||
unmatched_local = []
|
||
|
||
# 匹配数据库中的模板
|
||
for db_file_name, db_config in db_templates.items():
|
||
file_path = db_config['file_path']
|
||
object_name = file_path.lstrip('/') # 去掉开头的/
|
||
|
||
# 查找匹配的本地文件
|
||
local_file = None
|
||
|
||
# 精确匹配
|
||
if db_file_name in local_templates:
|
||
local_file = local_templates[db_file_name]
|
||
matched_count += 1
|
||
else:
|
||
# 模糊匹配
|
||
best_match = find_best_match(db_file_name, list(local_templates.keys()))
|
||
if best_match:
|
||
local_file = local_templates[best_match]
|
||
matched_count += 1
|
||
print(f"\n[INFO] 使用模糊匹配: '{db_file_name}' -> '{best_match}'")
|
||
else:
|
||
unmatched_db.append((db_file_name, db_config))
|
||
print(f"\n[WARN] 未找到匹配的本地文件: {db_file_name}")
|
||
continue
|
||
|
||
# 上传文件
|
||
print(f"\n处理: {db_config['name']}")
|
||
print(f" 数据库路径: {file_path}")
|
||
if upload_file_to_minio(client, local_file, object_name):
|
||
uploaded_count += 1
|
||
else:
|
||
failed_count += 1
|
||
|
||
# 检查未匹配的本地文件
|
||
matched_local_names = set()
|
||
for db_file_name in db_templates.keys():
|
||
if db_file_name in local_templates:
|
||
matched_local_names.add(db_file_name)
|
||
else:
|
||
best_match = find_best_match(db_file_name, list(local_templates.keys()))
|
||
if best_match:
|
||
matched_local_names.add(best_match)
|
||
|
||
for local_name, local_path in local_templates.items():
|
||
if local_name not in matched_local_names:
|
||
unmatched_local.append((local_name, local_path))
|
||
|
||
# 6. 总结
|
||
print_section("6. 上传总结")
|
||
|
||
print(f"\n匹配统计:")
|
||
print(f" 数据库模板数: {len(db_templates)}")
|
||
print(f" 本地文件数: {len(local_templates)}")
|
||
print(f" 成功匹配: {matched_count}")
|
||
print(f" 成功上传: {uploaded_count}")
|
||
print(f" 上传失败: {failed_count}")
|
||
|
||
if unmatched_db:
|
||
print(f"\n[WARN] 数据库中有 {len(unmatched_db)} 个模板未找到本地文件:")
|
||
for file_name, config in unmatched_db[:10]: # 只显示前10个
|
||
print(f" - {file_name} (ID: {config['id']}, 名称: {config['name']})")
|
||
if len(unmatched_db) > 10:
|
||
print(f" ... 还有 {len(unmatched_db) - 10} 个")
|
||
|
||
if unmatched_local:
|
||
print(f"\n[INFO] 本地有 {len(unmatched_local)} 个文件未在数据库中找到:")
|
||
for file_name, file_path in unmatched_local[:10]: # 只显示前10个
|
||
print(f" - {file_name} ({file_path})")
|
||
if len(unmatched_local) > 10:
|
||
print(f" ... 还有 {len(unmatched_local) - 10} 个")
|
||
|
||
if uploaded_count == matched_count and matched_count > 0:
|
||
print_result(True, f"所有匹配的文件都已成功上传!")
|
||
elif uploaded_count > 0:
|
||
print_result(True, f"成功上传 {uploaded_count} 个文件")
|
||
else:
|
||
print_result(False, "没有文件被上传")
|
||
|
||
print(f"\n使用的MinIO配置:")
|
||
print(f" 端点: {NEW_MINIO_CONFIG['endpoint']}")
|
||
print(f" 存储桶: {BUCKET_NAME}")
|
||
print(f" 使用HTTPS: {NEW_MINIO_CONFIG['secure']}")
|
||
|
||
finally:
|
||
conn.close()
|
||
|
||
if __name__ == '__main__':
|
||
main()
|
||
|