ai-business-write/diagnose_minio_document_generation.py

483 lines
16 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
诊断MinIO文档生成问题
测试新MinIO服务器配置下的文档生成流程
"""
import os
import sys
from minio import Minio
from minio.error import S3Error
from dotenv import load_dotenv
# 加载环境变量
load_dotenv()
# 新MinIO配置用户提供
NEW_MINIO_CONFIG = {
'endpoint': '10.100.31.21:9000',
'access_key': 'minio_PC8dcY',
'secret_key': 'minio_7k7RNJ',
'secure': False # 重要根据测试结果应该是false
}
BUCKET_NAME = 'finyx'
TENANT_ID = 615873064429507639
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印测试结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def check_environment_variables():
"""检查环境变量配置"""
print_section("1. 检查环境变量配置")
env_vars = {
'MINIO_ENDPOINT': os.getenv('MINIO_ENDPOINT'),
'MINIO_ACCESS_KEY': os.getenv('MINIO_ACCESS_KEY'),
'MINIO_SECRET_KEY': os.getenv('MINIO_SECRET_KEY'),
'MINIO_BUCKET': os.getenv('MINIO_BUCKET'),
'MINIO_SECURE': os.getenv('MINIO_SECURE')
}
print("\n当前环境变量配置:")
for key, value in env_vars.items():
if key == 'MINIO_SECRET_KEY' and value:
# 隐藏密钥的部分内容
masked_value = value[:8] + '***' if len(value) > 8 else '***'
print(f" {key}: {masked_value}")
else:
print(f" {key}: {value}")
# 检查配置是否正确
issues = []
if env_vars['MINIO_ENDPOINT'] != NEW_MINIO_CONFIG['endpoint']:
issues.append(f"MINIO_ENDPOINT 应该是 '{NEW_MINIO_CONFIG['endpoint']}',当前是 '{env_vars['MINIO_ENDPOINT']}'")
if env_vars['MINIO_ACCESS_KEY'] != NEW_MINIO_CONFIG['access_key']:
issues.append(f"MINIO_ACCESS_KEY 应该是 '{NEW_MINIO_CONFIG['access_key']}',当前是 '{env_vars['MINIO_ACCESS_KEY']}'")
secure_value = env_vars['MINIO_SECURE']
if secure_value and secure_value.lower() == 'true':
issues.append(f"[WARN] MINIO_SECURE 设置为 'true'但新服务器使用HTTP应该设置为 'false'")
if issues:
print("\n[WARN] 发现配置问题:")
for issue in issues:
print(f" - {issue}")
print_result(False, "环境变量配置需要更新")
return False
else:
print_result(True, "环境变量配置正确")
return True
def test_minio_connection():
"""测试MinIO连接"""
print_section("2. 测试MinIO连接")
# 先尝试用户配置的secure值
secure_values = [False, True] # 优先尝试false根据测试结果
for secure in secure_values:
try:
print(f"\n尝试连接secure={secure}...")
client = Minio(
NEW_MINIO_CONFIG['endpoint'],
access_key=NEW_MINIO_CONFIG['access_key'],
secret_key=NEW_MINIO_CONFIG['secret_key'],
secure=secure
)
# 测试连接:列出存储桶
buckets = client.list_buckets()
print_result(True, f"MinIO连接成功secure={secure}")
print(f"\n 连接信息:")
print(f" 端点: {NEW_MINIO_CONFIG['endpoint']}")
print(f" 使用HTTPS: {secure}")
print(f" 访问密钥: {NEW_MINIO_CONFIG['access_key']}")
print(f"\n 可用存储桶:")
for bucket in buckets:
print(f" - {bucket.name} (创建时间: {bucket.creation_date})")
# 检查目标存储桶
bucket_exists = client.bucket_exists(BUCKET_NAME)
if bucket_exists:
print_result(True, f"存储桶 '{BUCKET_NAME}' 存在")
else:
print_result(False, f"存储桶 '{BUCKET_NAME}' 不存在")
print(f" 建议:需要创建存储桶 '{BUCKET_NAME}'")
return None, False
return client, True
except Exception as e:
error_msg = str(e)
if secure == True:
print_result(False, f"使用HTTPS连接失败: {error_msg}")
print(f" 将尝试使用HTTP连接...")
continue
else:
print_result(False, f"MinIO连接失败: {error_msg}")
import traceback
traceback.print_exc()
return None, False
return None, False
def test_template_download(client):
"""测试模板下载功能"""
print_section("3. 测试模板下载功能")
if not client:
print_result(False, "MinIO客户端未连接跳过测试")
return False
try:
# 查询数据库获取一个模板文件路径
import pymysql
db_config = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
conn = pymysql.connect(**db_config)
cursor = conn.cursor(pymysql.cursors.DictCursor)
# 查询一个启用的模板
sql = """
SELECT id, name, file_path
FROM f_polic_file_config
WHERE tenant_id = %s
AND state = 1
AND file_path IS NOT NULL
AND file_path != ''
LIMIT 1
"""
cursor.execute(sql, (TENANT_ID,))
template = cursor.fetchone()
cursor.close()
conn.close()
if not template:
print_result(False, "数据库中没有找到可用的模板文件")
print(" 建议:检查数据库中的 f_polic_file_config 表")
return False
print(f"\n找到模板:")
print(f" ID: {template['id']}")
print(f" 名称: {template['name']}")
print(f" 文件路径: {template['file_path']}")
# 尝试下载模板
object_name = template['file_path'].lstrip('/')
print(f"\n尝试下载模板...")
print(f" 对象名称: {object_name}")
# 检查文件是否存在
try:
stat = client.stat_object(BUCKET_NAME, object_name)
print_result(True, f"模板文件存在(大小: {stat.size:,} 字节)")
except S3Error as e:
if e.code == 'NoSuchKey':
print_result(False, f"模板文件不存在: {object_name}")
print(f" 错误: {str(e)}")
print(f" 建议检查MinIO服务器上是否存在该文件")
return False
else:
raise
# 尝试下载(使用临时文件)
import tempfile
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx')
temp_file.close()
try:
client.fget_object(BUCKET_NAME, object_name, temp_file.name)
file_size = os.path.getsize(temp_file.name)
print_result(True, f"模板下载成功(大小: {file_size:,} 字节)")
# 清理临时文件
os.unlink(temp_file.name)
return True
except Exception as e:
print_result(False, f"模板下载失败: {str(e)}")
# 清理临时文件
if os.path.exists(temp_file.name):
os.unlink(temp_file.name)
return False
except Exception as e:
print_result(False, f"测试模板下载时出错: {str(e)}")
import traceback
traceback.print_exc()
return False
def test_file_upload(client):
"""测试文件上传功能"""
print_section("4. 测试文件上传功能")
if not client:
print_result(False, "MinIO客户端未连接跳过测试")
return False
try:
# 创建一个测试文件
import tempfile
from datetime import datetime
test_content = b"Test document content for MinIO upload test"
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx')
temp_file.write(test_content)
temp_file.close()
print(f"\n创建测试文件: {temp_file.name}")
# 生成上传路径
now = datetime.now()
timestamp = f"{now.strftime('%Y%m%d%H%M%S')}{now.microsecond:06d}"
object_name = f"{TENANT_ID}/TEST/{timestamp}/test_upload.docx"
print(f"\n尝试上传文件...")
print(f" 对象名称: {object_name}")
# 上传文件
client.fput_object(
BUCKET_NAME,
object_name,
temp_file.name,
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
print_result(True, "文件上传成功")
# 验证文件是否存在
stat = client.stat_object(BUCKET_NAME, object_name)
print(f" 上传的文件大小: {stat.size:,} 字节")
# 清理测试文件
os.unlink(temp_file.name)
# 可选:删除测试文件
try:
client.remove_object(BUCKET_NAME, object_name)
print(f" 已清理测试文件: {object_name}")
except:
pass
return True
except Exception as e:
print_result(False, f"文件上传失败: {str(e)}")
import traceback
traceback.print_exc()
# 清理临时文件
if 'temp_file' in locals() and os.path.exists(temp_file.name):
os.unlink(temp_file.name)
return False
def test_presigned_url(client):
"""测试预签名URL生成"""
print_section("5. 测试预签名URL生成")
if not client:
print_result(False, "MinIO客户端未连接跳过测试")
return False
try:
# 使用一个测试对象名称
from datetime import datetime, timedelta
now = datetime.now()
timestamp = f"{now.strftime('%Y%m%d%H%M%S')}{now.microsecond:06d}"
test_object_name = f"{TENANT_ID}/TEST/{timestamp}/test_url.docx"
# 先创建一个测试文件
import tempfile
test_content = b"Test content"
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.docx')
temp_file.write(test_content)
temp_file.close()
# 上传测试文件
client.fput_object(
BUCKET_NAME,
test_object_name,
temp_file.name,
content_type='application/vnd.openxmlformats-officedocument.wordprocessingml.document'
)
os.unlink(temp_file.name)
print(f"\n生成预签名URL...")
print(f" 对象名称: {test_object_name}")
# 生成预签名URL
url = client.presigned_get_object(
BUCKET_NAME,
test_object_name,
expires=timedelta(days=7)
)
print_result(True, "预签名URL生成成功")
print(f"\n URL: {url[:100]}...")
# 清理测试文件
try:
client.remove_object(BUCKET_NAME, test_object_name)
except:
pass
return True
except Exception as e:
print_result(False, f"预签名URL生成失败: {str(e)}")
import traceback
traceback.print_exc()
return False
def check_directory_structure(client):
"""检查目录结构MinIO是对象存储不需要创建目录"""
print_section("6. 检查目录结构")
if not client:
print_result(False, "MinIO客户端未连接跳过测试")
return False
print("\n说明MinIO是对象存储不需要创建目录。")
print("对象名称可以包含路径分隔符(如 '/'MinIO会自动处理。")
print("\n检查存储桶中的对象结构...")
try:
# 列出一些对象,查看目录结构
objects = client.list_objects(BUCKET_NAME, prefix=f"{TENANT_ID}/", recursive=False)
prefixes = set()
count = 0
for obj in objects:
count += 1
if count <= 20: # 只显示前20个
# 提取前缀(目录)
parts = obj.object_name.split('/')
if len(parts) > 1:
prefix = '/'.join(parts[:-1])
prefixes.add(prefix)
if prefixes:
print(f"\n发现的前缀目录结构前20个对象:")
for prefix in sorted(prefixes):
print(f" - {prefix}/")
print_result(True, f"存储桶结构正常(已检查 {count} 个对象)")
return True
except Exception as e:
print_result(False, f"检查目录结构失败: {str(e)}")
import traceback
traceback.print_exc()
return False
def print_recommendations():
"""打印修复建议"""
print_section("修复建议")
print("\n根据诊断结果,请执行以下步骤:")
print("\n1. 更新环境变量配置(.env文件或系统环境变量:")
print(" MINIO_ENDPOINT=10.100.31.21:9000")
print(" MINIO_ACCESS_KEY=minio_PC8dcY")
print(" MINIO_SECRET_KEY=minio_7k7RNJ")
print(" MINIO_BUCKET=finyx")
print(" MINIO_SECURE=false # [IMPORTANT] 重要必须是false不是true")
print("\n2. 确保存储桶存在:")
print(f" 存储桶名称: {BUCKET_NAME}")
print(" 如果不存在,需要创建存储桶")
print("\n3. 确保模板文件已上传到MinIO:")
print(" 检查数据库中的 f_polic_file_config 表的 file_path 字段")
print(" 确保对应的文件在MinIO服务器上存在")
print("\n4. 关于目录:")
print(" MinIO是对象存储不需要创建目录")
print(" 对象名称可以包含路径分隔符(如 '/'MinIO会自动处理")
print(" 例如: 615873064429507639/TEMPLATE/2024/12/template.docx")
print("\n5. 重启应用:")
print(" 更新环境变量后,需要重启应用服务才能生效")
print("\n[IMPORTANT] MINIO_SECURE=false # 注意必须是false不是true")
def main():
"""主函数"""
print("\n" + "="*70)
print(" MinIO文档生成问题诊断工具")
print("="*70)
print(f"\n新MinIO服务器配置:")
print(f" 端点: {NEW_MINIO_CONFIG['endpoint']}")
print(f" 存储桶: {BUCKET_NAME}")
print(f" 访问密钥: {NEW_MINIO_CONFIG['access_key']}")
print(f" 使用HTTPS: {NEW_MINIO_CONFIG['secure']}")
results = {}
try:
# 1. 检查环境变量
results['环境变量'] = check_environment_variables()
# 2. 测试MinIO连接
client, bucket_exists = test_minio_connection()
results['MinIO连接'] = client is not None and bucket_exists
if client and bucket_exists:
# 3. 测试模板下载
results['模板下载'] = test_template_download(client)
# 4. 测试文件上传
results['文件上传'] = test_file_upload(client)
# 5. 测试预签名URL
results['预签名URL'] = test_presigned_url(client)
# 6. 检查目录结构
results['目录结构'] = check_directory_structure(client)
# 总结
print_section("诊断总结")
print("\n测试结果:")
for test_name, success in results.items():
status = "[OK] 通过" if success else "[FAIL] 失败"
print(f" {test_name}: {status}")
passed = sum(1 for v in results.values() if v)
total = len(results)
print(f"\n通过率: {passed}/{total} ({passed*100//total if total > 0 else 0}%)")
if passed == total:
print("\n[OK] 所有测试通过MinIO配置正确文档生成应该可以正常工作。")
else:
print("\n[WARN] 部分测试失败,请查看上面的错误信息并按照建议进行修复。")
print_recommendations()
except KeyboardInterrupt:
print("\n\n诊断已中断")
except Exception as e:
print(f"\n[ERROR] 诊断过程中发生错误: {e}")
import traceback
traceback.print_exc()
print_recommendations()
if __name__ == '__main__':
main()