""" 批量上传本地模板文件到新MinIO服务器 确保上传路径和文件名与数据库中的file_path字段值一致 """ import os import pymysql from minio import Minio from minio.error import S3Error from pathlib import Path from typing import Dict, List, Optional from dotenv import load_dotenv import difflib # 加载环境变量 load_dotenv() # 新MinIO配置 NEW_MINIO_CONFIG = { 'endpoint': '10.100.31.21:9000', 'access_key': 'minio_PC8dcY', 'secret_key': 'minio_7k7RNJ', 'secure': False # 注意:根据测试结果应该是false,但用户要求true,如果失败会自动尝试false } BUCKET_NAME = 'finyx' TENANT_ID = 615873064429507639 # 数据库配置 DB_CONFIG = { 'host': os.getenv('DB_HOST', '152.136.177.240'), 'port': int(os.getenv('DB_PORT', 5012)), 'user': os.getenv('DB_USER', 'finyx'), 'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'), 'database': os.getenv('DB_NAME', 'finyx'), 'charset': 'utf8mb4' } # 本地模板目录 TEMPLATES_DIR = Path('template_finish') def print_section(title): """打印章节标题""" print("\n" + "="*70) print(f" {title}") print("="*70) def print_result(success, message): """打印结果""" status = "[OK]" if success else "[FAIL]" print(f"{status} {message}") def get_minio_client(secure=None): """获取MinIO客户端""" if secure is None: secure = NEW_MINIO_CONFIG['secure'] return Minio( NEW_MINIO_CONFIG['endpoint'], access_key=NEW_MINIO_CONFIG['access_key'], secret_key=NEW_MINIO_CONFIG['secret_key'], secure=secure ) def test_minio_connection(): """测试MinIO连接""" print_section("1. 测试MinIO连接") # 先尝试用户指定的secure值 for secure in [NEW_MINIO_CONFIG['secure'], not NEW_MINIO_CONFIG['secure']]: try: print(f"\n尝试连接(secure={secure})...") client = get_minio_client(secure=secure) buckets = client.list_buckets() print_result(True, f"MinIO连接成功(secure={secure})") # 检查存储桶 if client.bucket_exists(BUCKET_NAME): print_result(True, f"存储桶 '{BUCKET_NAME}' 存在") # 更新配置 NEW_MINIO_CONFIG['secure'] = secure return client else: print_result(False, f"存储桶 '{BUCKET_NAME}' 不存在") return None except Exception as e: if secure == NEW_MINIO_CONFIG['secure']: print_result(False, f"使用secure={secure}连接失败: {str(e)}") print(" 将尝试另一个secure值...") continue else: print_result(False, f"MinIO连接失败: {str(e)}") return None return None def get_db_templates(conn) -> Dict[str, Dict]: """从数据库获取所有模板配置""" cursor = conn.cursor(pymysql.cursors.DictCursor) try: sql = """ SELECT id, name, file_path FROM f_polic_file_config WHERE tenant_id = %s AND state = 1 AND file_path IS NOT NULL AND file_path != '' """ cursor.execute(sql, (TENANT_ID,)) templates = cursor.fetchall() # 构建字典:文件名 -> 配置信息 result = {} for template in templates: # 从file_path中提取文件名 file_path = template['file_path'] if file_path: # 提取文件名(去掉路径) file_name = Path(file_path).name result[file_name] = { 'id': template['id'], 'name': template['name'], 'file_path': file_path } return result finally: cursor.close() def scan_local_templates(base_dir: Path) -> Dict[str, Path]: """扫描本地模板文件""" templates = {} if not base_dir.exists(): print(f"[WARN] 模板目录不存在: {base_dir}") return templates # 递归扫描所有.docx文件 for docx_file in base_dir.rglob('*.docx'): file_name = docx_file.name templates[file_name] = docx_file return templates def find_best_match(target_name: str, candidates: List[str], threshold=0.8) -> Optional[str]: """使用模糊匹配找到最佳匹配的文件名""" if not candidates: return None # 精确匹配 if target_name in candidates: return target_name # 模糊匹配 matches = difflib.get_close_matches(target_name, candidates, n=1, cutoff=threshold) if matches: return matches[0] return None def upload_file_to_minio(client: Minio, local_file: Path, object_name: str) -> bool: """上传文件到MinIO""" try: # 检查文件是否存在 if not local_file.exists(): print(f" [ERROR] 本地文件不存在: {local_file}") return False file_size = local_file.stat().st_size print(f" 上传: {local_file.name} ({file_size:,} 字节)") print(f" 目标路径: {object_name}") # 上传文件 client.fput_object( BUCKET_NAME, object_name, str(local_file), content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document' ) # 验证上传 stat = client.stat_object(BUCKET_NAME, object_name) print(f" [OK] 上传成功(验证大小: {stat.size:,} 字节)") return True except S3Error as e: print(f" [ERROR] MinIO错误: {str(e)}") return False except Exception as e: print(f" [ERROR] 上传失败: {str(e)}") import traceback traceback.print_exc() return False def main(): """主函数""" print("\n" + "="*70) print(" 批量上传模板文件到新MinIO服务器") print("="*70) # 1. 测试MinIO连接 client = test_minio_connection() if not client: print("\n[ERROR] 无法连接到MinIO服务器,请检查配置") return # 2. 连接数据库 print_section("2. 连接数据库") try: conn = pymysql.connect(**DB_CONFIG) print_result(True, "数据库连接成功") except Exception as e: print_result(False, f"数据库连接失败: {str(e)}") return try: # 3. 获取数据库中的模板配置 print_section("3. 获取数据库模板配置") db_templates = get_db_templates(conn) print(f"\n数据库中找到 {len(db_templates)} 个模板配置") # 4. 扫描本地模板文件 print_section("4. 扫描本地模板文件") local_templates = scan_local_templates(TEMPLATES_DIR) print(f"\n本地找到 {len(local_templates)} 个模板文件") if not local_templates: print("[ERROR] 本地没有找到模板文件") return # 5. 匹配并上传 print_section("5. 匹配并上传文件") matched_count = 0 uploaded_count = 0 failed_count = 0 unmatched_db = [] unmatched_local = [] # 匹配数据库中的模板 for db_file_name, db_config in db_templates.items(): file_path = db_config['file_path'] object_name = file_path.lstrip('/') # 去掉开头的/ # 查找匹配的本地文件 local_file = None # 精确匹配 if db_file_name in local_templates: local_file = local_templates[db_file_name] matched_count += 1 else: # 模糊匹配 best_match = find_best_match(db_file_name, list(local_templates.keys())) if best_match: local_file = local_templates[best_match] matched_count += 1 print(f"\n[INFO] 使用模糊匹配: '{db_file_name}' -> '{best_match}'") else: unmatched_db.append((db_file_name, db_config)) print(f"\n[WARN] 未找到匹配的本地文件: {db_file_name}") continue # 上传文件 print(f"\n处理: {db_config['name']}") print(f" 数据库路径: {file_path}") if upload_file_to_minio(client, local_file, object_name): uploaded_count += 1 else: failed_count += 1 # 检查未匹配的本地文件 matched_local_names = set() for db_file_name in db_templates.keys(): if db_file_name in local_templates: matched_local_names.add(db_file_name) else: best_match = find_best_match(db_file_name, list(local_templates.keys())) if best_match: matched_local_names.add(best_match) for local_name, local_path in local_templates.items(): if local_name not in matched_local_names: unmatched_local.append((local_name, local_path)) # 6. 总结 print_section("6. 上传总结") print(f"\n匹配统计:") print(f" 数据库模板数: {len(db_templates)}") print(f" 本地文件数: {len(local_templates)}") print(f" 成功匹配: {matched_count}") print(f" 成功上传: {uploaded_count}") print(f" 上传失败: {failed_count}") if unmatched_db: print(f"\n[WARN] 数据库中有 {len(unmatched_db)} 个模板未找到本地文件:") for file_name, config in unmatched_db[:10]: # 只显示前10个 print(f" - {file_name} (ID: {config['id']}, 名称: {config['name']})") if len(unmatched_db) > 10: print(f" ... 还有 {len(unmatched_db) - 10} 个") if unmatched_local: print(f"\n[INFO] 本地有 {len(unmatched_local)} 个文件未在数据库中找到:") for file_name, file_path in unmatched_local[:10]: # 只显示前10个 print(f" - {file_name} ({file_path})") if len(unmatched_local) > 10: print(f" ... 还有 {len(unmatched_local) - 10} 个") if uploaded_count == matched_count and matched_count > 0: print_result(True, f"所有匹配的文件都已成功上传!") elif uploaded_count > 0: print_result(True, f"成功上传 {uploaded_count} 个文件") else: print_result(False, "没有文件被上传") print(f"\n使用的MinIO配置:") print(f" 端点: {NEW_MINIO_CONFIG['endpoint']}") print(f" 存储桶: {BUCKET_NAME}") print(f" 使用HTTPS: {NEW_MINIO_CONFIG['secure']}") finally: conn.close() if __name__ == '__main__': main()