ai-business-write/update_template_hierarchy_from_directory.py
2025-12-26 09:32:15 +08:00

413 lines
14 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
根据template_finish/目录结构更新数据库中的parent_id层级关系
1. 扫描目录结构
2. 创建/更新目录节点(作为父级)
3. 更新文件节点的parent_id
"""
import os
import pymysql
from pathlib import Path
from typing import Dict, List, Optional
from dotenv import load_dotenv
# 加载环境变量
load_dotenv()
# 数据库配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def generate_id():
"""生成ID"""
import time
return int(time.time() * 1000000)
def get_actual_tenant_id(conn) -> int:
"""获取数据库中的实际tenant_id"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1")
result = cursor.fetchone()
if result:
return result['tenant_id']
return 1
finally:
cursor.close()
def scan_directory_structure(base_dir: Path) -> Dict:
"""
扫描目录结构,构建层级关系
Returns:
字典,包含目录和文件的层级信息
"""
structure = {
'directories': [], # 目录节点列表
'files': [] # 文件节点列表
}
if not base_dir.exists():
return structure
# 遍历所有目录和文件
for item in base_dir.rglob("*"):
relative_path = item.relative_to(PROJECT_ROOT)
relative_path_str = str(relative_path).replace('\\', '/')
parts = relative_path.parts
if item.is_dir():
# 目录节点
level = len(parts) - 1 # 层级从0开始template_finish是0级
dir_name = parts[-1]
parent_path = str(Path(*parts[:-1])).replace('\\', '/') if len(parts) > 1 else None
structure['directories'].append({
'name': dir_name,
'path': relative_path_str,
'level': level,
'parent_path': parent_path
})
elif item.is_file() and item.suffix.lower() in ['.docx', '.doc', '.wps']:
# 文件节点
level = len(parts) - 1
file_name = item.name
parent_path = str(Path(*parts[:-1])).replace('\\', '/') if len(parts) > 1 else None
structure['files'].append({
'name': file_name,
'path': relative_path_str,
'level': level,
'parent_path': parent_path
})
# 按层级排序目录(确保父目录先处理)
structure['directories'].sort(key=lambda x: (x['level'], x['path']))
return structure
def get_existing_templates(conn, tenant_id: int) -> Dict:
"""
获取数据库中现有的模板记录
Returns:
字典,包含按路径和名称索引的模板
"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, parent_id, file_path, state
FROM f_polic_file_config
WHERE tenant_id = %s
"""
cursor.execute(sql, (tenant_id,))
templates = cursor.fetchall()
# 按file_path索引用于文件匹配
by_path = {}
# 按name索引用于目录匹配
by_name = {}
for template in templates:
if template['file_path']:
by_path[template['file_path']] = template
else:
# 目录节点file_path为NULL
name = template['name']
if name not in by_name:
by_name[name] = []
by_name[name].append(template)
return {
'by_path': by_path,
'by_name': by_name
}
finally:
cursor.close()
def create_or_update_directory(conn, tenant_id: int, dir_name: str, dir_path: str, parent_id: Optional[int], existing_templates: Dict) -> int:
"""
创建或更新目录节点
Returns:
目录节点的ID
"""
cursor = conn.cursor()
try:
# 查找现有目录优先通过名称查找且file_path为NULL
existing = None
if dir_name in existing_templates['by_name']:
# 找到同名的目录节点
for template in existing_templates['by_name'][dir_name]:
if template['file_path'] is None:
existing = template
break
if existing:
# 更新现有目录记录
template_id = existing['id']
if existing['parent_id'] != parent_id:
update_sql = """
UPDATE f_polic_file_config
SET parent_id = %s, updated_time = NOW(), updated_by = %s, state = 1
WHERE id = %s AND tenant_id = %s
"""
cursor.execute(update_sql, (parent_id, UPDATED_BY, template_id, tenant_id))
conn.commit()
print(f" [UPDATE] 更新目录: {dir_name} (ID: {template_id}, parent_id: {parent_id})")
else:
print(f" [KEEP] 保持目录: {dir_name} (ID: {template_id})")
return template_id
else:
# 创建新目录记录
template_id = generate_id()
insert_sql = """
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
template_id,
tenant_id,
parent_id,
dir_name,
'{}', # input_data
None, # file_path目录节点没有文件路径
CREATED_BY,
UPDATED_BY
))
conn.commit()
print(f" [CREATE] 创建目录: {dir_name} (ID: {template_id}, parent_id: {parent_id})")
# 更新existing_templates
if dir_name not in existing_templates['by_name']:
existing_templates['by_name'][dir_name] = []
existing_templates['by_name'][dir_name].append({
'id': template_id,
'name': dir_name,
'parent_id': parent_id,
'file_path': None,
'state': 1
})
return template_id
finally:
cursor.close()
def update_file_parent(conn, tenant_id: int, file_info: Dict, parent_id: Optional[int], existing_templates: Dict):
"""更新文件节点的parent_id"""
cursor = conn.cursor()
try:
file_path = file_info['path']
# 查找现有文件记录
existing = existing_templates['by_path'].get(file_path)
if existing:
template_id = existing['id']
if existing['parent_id'] != parent_id:
update_sql = """
UPDATE f_polic_file_config
SET parent_id = %s, updated_time = NOW(), updated_by = %s
WHERE id = %s AND tenant_id = %s
"""
cursor.execute(update_sql, (parent_id, UPDATED_BY, template_id, tenant_id))
conn.commit()
print(f" [UPDATE] 更新文件: {file_info['name']} (ID: {template_id}, parent_id: {parent_id})")
else:
print(f" [KEEP] 保持文件: {file_info['name']} (ID: {template_id})")
else:
# 文件不存在,创建新记录
template_id = generate_id()
insert_sql = """
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
"""
cursor.execute(insert_sql, (
template_id,
tenant_id,
parent_id,
file_info['name'],
'{}', # input_data
file_path,
CREATED_BY,
UPDATED_BY
))
conn.commit()
print(f" [CREATE] 创建文件: {file_info['name']} (ID: {template_id}, parent_id: {parent_id})")
finally:
cursor.close()
def main():
"""主函数"""
print_section("更新模板层级结构")
# 1. 连接数据库
print_section("1. 连接数据库")
try:
conn = pymysql.connect(**DB_CONFIG)
print_result(True, "数据库连接成功")
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return
try:
# 2. 获取实际的tenant_id
print_section("2. 获取实际的tenant_id")
tenant_id = get_actual_tenant_id(conn)
print_result(True, f"实际tenant_id: {tenant_id}")
# 3. 扫描目录结构
print_section("3. 扫描目录结构")
structure = scan_directory_structure(TEMPLATES_DIR)
if not structure['directories'] and not structure['files']:
print_result(False, "未找到任何目录或文件")
return
print(f" 找到 {len(structure['directories'])} 个目录")
print(f" 找到 {len(structure['files'])} 个文件")
# 4. 获取现有模板
print_section("4. 获取现有模板")
existing_templates = get_existing_templates(conn, tenant_id)
print(f" 现有文件模板: {len(existing_templates['by_path'])}")
print(f" 现有目录模板: {sum(len(v) for v in existing_templates['by_name'].values())}")
# 5. 创建/更新目录节点
print_section("5. 创建/更新目录节点")
path_to_id = {} # 路径到ID的映射
# 按层级顺序处理目录(确保父目录先处理)
for dir_info in structure['directories']:
parent_id = None
if dir_info['parent_path']:
parent_id = path_to_id.get(dir_info['parent_path'])
if parent_id is None:
print(f" [WARN] 未找到父目录: {dir_info['parent_path']}")
dir_id = create_or_update_directory(
conn, tenant_id, dir_info['name'], dir_info['path'],
parent_id, existing_templates
)
path_to_id[dir_info['path']] = dir_id
print_result(True, f"处理了 {len(path_to_id)} 个目录节点")
# 6. 更新文件节点的parent_id
print_section("6. 更新文件节点的parent_id")
updated_count = 0
created_count = 0
kept_count = 0
for file_info in structure['files']:
parent_id = None
if file_info['parent_path']:
parent_id = path_to_id.get(file_info['parent_path'])
if parent_id is None:
print(f" [WARN] 未找到父目录: {file_info['parent_path']}")
# 检查是否需要更新
file_path = file_info['path']
existing = existing_templates['by_path'].get(file_path)
if existing:
if existing['parent_id'] != parent_id:
update_file_parent(conn, tenant_id, file_info, parent_id, existing_templates)
updated_count += 1
else:
kept_count += 1
else:
update_file_parent(conn, tenant_id, file_info, parent_id, existing_templates)
created_count += 1
print_result(True, f"处理了 {len(structure['files'])} 个文件节点")
print(f" - 更新: {updated_count}")
print(f" - 创建: {created_count}")
print(f" - 保持: {kept_count}")
# 7. 验证层级结构
print_section("7. 验证层级结构")
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
# 统计各层级的节点数
cursor.execute("""
SELECT
CASE
WHEN parent_id IS NULL THEN '根节点'
ELSE '子节点'
END as node_type,
COUNT(*) as count
FROM f_polic_file_config
WHERE tenant_id = %s
GROUP BY node_type
""", (tenant_id,))
stats = cursor.fetchall()
for stat in stats:
print(f" {stat['node_type']}: {stat['count']}")
# 统计有parent_id的文件
cursor.execute("""
SELECT COUNT(*) as count
FROM f_polic_file_config
WHERE tenant_id = %s
AND file_path IS NOT NULL
AND parent_id IS NOT NULL
""", (tenant_id,))
result = cursor.fetchone()
print(f" 有父级的文件: {result['count']}")
finally:
cursor.close()
finally:
conn.close()
print_result(True, "数据库连接已关闭")
print_section("完成")
if __name__ == "__main__":
main()