406 lines
14 KiB
Python
406 lines
14 KiB
Python
"""
|
||
重新扫描模板占位符并更新数据库
|
||
1. 扫描所有本地模板文件(包括新转换的.docx文件)
|
||
2. 提取所有占位符
|
||
3. 检查数据库中的模板记录
|
||
4. 更新数据库(如有变化)
|
||
"""
|
||
import os
|
||
import pymysql
|
||
from pathlib import Path
|
||
from typing import Dict, List, Set, Tuple
|
||
from dotenv import load_dotenv
|
||
import re
|
||
from docx import Document
|
||
|
||
# 加载环境变量
|
||
load_dotenv()
|
||
|
||
# 数据库配置
|
||
DB_CONFIG = {
|
||
'host': os.getenv('DB_HOST', '152.136.177.240'),
|
||
'port': int(os.getenv('DB_PORT', 5012)),
|
||
'user': os.getenv('DB_USER', 'finyx'),
|
||
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
|
||
'database': os.getenv('DB_NAME', 'finyx'),
|
||
'charset': 'utf8mb4'
|
||
}
|
||
|
||
CREATED_BY = 655162080928945152
|
||
UPDATED_BY = 655162080928945152
|
||
|
||
# 项目根目录
|
||
PROJECT_ROOT = Path(__file__).parent
|
||
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
|
||
|
||
|
||
def print_section(title):
|
||
"""打印章节标题"""
|
||
print("\n" + "="*70)
|
||
print(f" {title}")
|
||
print("="*70)
|
||
|
||
|
||
def print_result(success, message):
|
||
"""打印结果"""
|
||
status = "[OK]" if success else "[FAIL]"
|
||
print(f"{status} {message}")
|
||
|
||
|
||
def generate_id():
|
||
"""生成ID"""
|
||
import time
|
||
return int(time.time() * 1000000)
|
||
|
||
|
||
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
|
||
"""扫描本地模板文件"""
|
||
templates = {}
|
||
if not base_dir.exists():
|
||
return templates
|
||
|
||
for file_path in base_dir.rglob('*'):
|
||
if file_path.is_file():
|
||
# 只处理文档文件(优先处理.docx,也包含.doc和.wps用于检查)
|
||
if file_path.suffix.lower() in ['.doc', '.docx', '.wps']:
|
||
relative_path = file_path.relative_to(PROJECT_ROOT)
|
||
relative_path_str = str(relative_path).replace('\\', '/')
|
||
templates[relative_path_str] = file_path
|
||
|
||
return templates
|
||
|
||
|
||
def get_actual_tenant_id(conn) -> int:
|
||
"""获取数据库中的实际tenant_id"""
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
try:
|
||
cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1")
|
||
result = cursor.fetchone()
|
||
if result:
|
||
return result['tenant_id']
|
||
return 1 # 默认值
|
||
finally:
|
||
cursor.close()
|
||
|
||
|
||
def get_db_templates(conn, tenant_id: int) -> Dict[str, Dict]:
|
||
"""从数据库获取所有模板配置"""
|
||
cursor = conn.cursor(pymysql.cursors.DictCursor)
|
||
try:
|
||
sql = """
|
||
SELECT id, name, file_path, state, parent_id
|
||
FROM f_polic_file_config
|
||
WHERE tenant_id = %s
|
||
"""
|
||
cursor.execute(sql, (tenant_id,))
|
||
templates = cursor.fetchall()
|
||
|
||
result = {}
|
||
for template in templates:
|
||
file_path = template['file_path']
|
||
if file_path:
|
||
result[file_path] = {
|
||
'id': template['id'],
|
||
'name': template['name'],
|
||
'file_path': file_path,
|
||
'state': template['state'],
|
||
'parent_id': template['parent_id']
|
||
}
|
||
return result
|
||
finally:
|
||
cursor.close()
|
||
|
||
|
||
def extract_placeholders_from_docx(file_path: Path) -> Tuple[Set[str], bool]:
|
||
"""
|
||
从docx文件中提取所有占位符
|
||
|
||
Returns:
|
||
(占位符集合, 是否成功读取)
|
||
"""
|
||
placeholders = set()
|
||
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
|
||
success = False
|
||
|
||
try:
|
||
doc = Document(file_path)
|
||
success = True
|
||
|
||
# 从段落中提取占位符
|
||
for paragraph in doc.paragraphs:
|
||
text = paragraph.text
|
||
matches = placeholder_pattern.findall(text)
|
||
for match in matches:
|
||
field_code = match.strip()
|
||
if field_code:
|
||
placeholders.add(field_code)
|
||
|
||
# 从表格中提取占位符
|
||
for table in doc.tables:
|
||
try:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
for paragraph in cell.paragraphs:
|
||
text = paragraph.text
|
||
matches = placeholder_pattern.findall(text)
|
||
for match in matches:
|
||
field_code = match.strip()
|
||
if field_code:
|
||
placeholders.add(field_code)
|
||
except Exception as e:
|
||
# 某些表格结构可能导致错误,跳过
|
||
continue
|
||
except Exception as e:
|
||
# 文件读取失败(可能是.doc格式或其他问题)
|
||
return placeholders, False
|
||
|
||
return placeholders, success
|
||
|
||
|
||
def scan_all_templates_placeholders(local_templates: Dict[str, Path]) -> Dict[str, Tuple[Set[str], bool, str]]:
|
||
"""
|
||
扫描所有模板的占位符
|
||
|
||
Returns:
|
||
字典,key为相对路径,value为(占位符集合, 是否成功读取, 文件扩展名)
|
||
"""
|
||
results = {}
|
||
|
||
for rel_path, file_path in local_templates.items():
|
||
file_ext = file_path.suffix.lower()
|
||
placeholders, success = extract_placeholders_from_docx(file_path)
|
||
results[rel_path] = (placeholders, success, file_ext)
|
||
|
||
return results
|
||
|
||
|
||
def update_or_create_template(conn, tenant_id: int, rel_path: str, file_path: Path, db_templates: Dict[str, Dict]):
|
||
"""更新或创建模板记录"""
|
||
cursor = conn.cursor()
|
||
|
||
try:
|
||
# 检查是否已存在
|
||
if rel_path in db_templates:
|
||
# 已存在,检查是否需要更新
|
||
template_id = db_templates[rel_path]['id']
|
||
# 这里可以添加更新逻辑,比如更新名称等
|
||
return template_id, 'exists'
|
||
else:
|
||
# 不存在,创建新记录
|
||
template_id = generate_id()
|
||
file_name = file_path.stem # 不含扩展名的文件名
|
||
|
||
cursor.execute("""
|
||
INSERT INTO f_polic_file_config
|
||
(id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state)
|
||
VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
|
||
""", (
|
||
template_id,
|
||
tenant_id,
|
||
None, # parent_id
|
||
file_name,
|
||
'{}', # input_data
|
||
rel_path,
|
||
CREATED_BY,
|
||
UPDATED_BY
|
||
))
|
||
conn.commit()
|
||
return template_id, 'created'
|
||
except Exception as e:
|
||
conn.rollback()
|
||
raise e
|
||
finally:
|
||
cursor.close()
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
print_section("重新扫描模板占位符并更新数据库")
|
||
|
||
# 1. 扫描本地模板
|
||
print_section("1. 扫描本地模板文件")
|
||
local_templates = scan_local_templates(TEMPLATES_DIR)
|
||
print_result(True, f"找到 {len(local_templates)} 个本地模板文件")
|
||
|
||
# 统计文件类型
|
||
file_types = {}
|
||
for file_path in local_templates.values():
|
||
ext = file_path.suffix.lower()
|
||
file_types[ext] = file_types.get(ext, 0) + 1
|
||
|
||
print("\n文件类型统计:")
|
||
for ext, count in sorted(file_types.items()):
|
||
print(f" {ext}: {count} 个")
|
||
|
||
if not local_templates:
|
||
print_result(False, "未找到本地模板文件")
|
||
return
|
||
|
||
# 2. 连接数据库
|
||
print_section("2. 连接数据库")
|
||
try:
|
||
conn = pymysql.connect(**DB_CONFIG)
|
||
print_result(True, "数据库连接成功")
|
||
except Exception as e:
|
||
print_result(False, f"数据库连接失败: {str(e)}")
|
||
return
|
||
|
||
try:
|
||
# 3. 获取实际的tenant_id
|
||
print_section("3. 获取实际的tenant_id")
|
||
tenant_id = get_actual_tenant_id(conn)
|
||
print_result(True, f"实际tenant_id: {tenant_id}")
|
||
|
||
# 4. 获取数据库中的模板
|
||
print_section("4. 获取数据库中的模板配置")
|
||
db_templates = get_db_templates(conn, tenant_id)
|
||
print_result(True, f"找到 {len(db_templates)} 条数据库模板记录(有file_path的)")
|
||
|
||
# 5. 扫描所有模板的占位符
|
||
print_section("5. 扫描所有模板的占位符")
|
||
print(" 正在扫描,请稍候...")
|
||
|
||
template_placeholders = scan_all_templates_placeholders(local_templates)
|
||
|
||
# 统计结果
|
||
all_placeholders = set()
|
||
templates_with_placeholders = 0
|
||
templates_without_placeholders = 0
|
||
templates_read_success = 0
|
||
templates_read_failed = 0
|
||
doc_files = []
|
||
docx_files = []
|
||
|
||
for rel_path, (placeholders, success, file_ext) in template_placeholders.items():
|
||
all_placeholders.update(placeholders)
|
||
|
||
if success:
|
||
templates_read_success += 1
|
||
if placeholders:
|
||
templates_with_placeholders += 1
|
||
else:
|
||
templates_without_placeholders += 1
|
||
else:
|
||
templates_read_failed += 1
|
||
if file_ext == '.doc':
|
||
doc_files.append(rel_path)
|
||
|
||
if file_ext == '.docx':
|
||
docx_files.append(rel_path)
|
||
elif file_ext == '.doc':
|
||
doc_files.append(rel_path)
|
||
|
||
print(f"\n扫描结果统计:")
|
||
print(f" - 成功读取: {templates_read_success} 个")
|
||
print(f" - 读取失败: {templates_read_failed} 个")
|
||
print(f" - 有占位符: {templates_with_placeholders} 个")
|
||
print(f" - 无占位符: {templates_without_placeholders} 个")
|
||
print(f" - 发现的占位符总数: {len(all_placeholders)} 个不同的占位符")
|
||
|
||
if doc_files:
|
||
print(f"\n [注意] 发现 {len(doc_files)} 个.doc文件(可能无法读取):")
|
||
for doc_file in doc_files[:5]:
|
||
print(f" - {doc_file}")
|
||
if len(doc_files) > 5:
|
||
print(f" ... 还有 {len(doc_files) - 5} 个")
|
||
|
||
print(f"\n .docx文件: {len(docx_files)} 个")
|
||
|
||
# 6. 显示所有占位符
|
||
print_section("6. 所有占位符列表")
|
||
if all_placeholders:
|
||
for placeholder in sorted(all_placeholders):
|
||
print(f" - {placeholder}")
|
||
else:
|
||
print(" 未发现占位符")
|
||
|
||
# 7. 检查并更新数据库
|
||
print_section("7. 检查并更新数据库")
|
||
|
||
missing_templates = []
|
||
for rel_path in local_templates.keys():
|
||
if rel_path not in db_templates:
|
||
missing_templates.append(rel_path)
|
||
|
||
if missing_templates:
|
||
print(f" 发现 {len(missing_templates)} 个缺失的模板记录")
|
||
created_count = 0
|
||
for rel_path in missing_templates:
|
||
file_path = local_templates[rel_path]
|
||
try:
|
||
template_id, status = update_or_create_template(conn, tenant_id, rel_path, file_path, db_templates)
|
||
if status == 'created':
|
||
print(f" [创建] ID={template_id}, 路径={rel_path}")
|
||
created_count += 1
|
||
except Exception as e:
|
||
print(f" [错误] 创建失败: {rel_path}, 错误: {str(e)}")
|
||
|
||
if created_count > 0:
|
||
print_result(True, f"成功创建 {created_count} 条模板记录")
|
||
else:
|
||
print_result(True, "所有本地模板都已存在于数据库中")
|
||
|
||
# 8. 检查文件格式变化(.doc -> .docx)
|
||
print_section("8. 检查文件格式变化")
|
||
|
||
# 检查数据库中是否有.doc路径,但本地已经是.docx
|
||
format_changes = []
|
||
for db_path, db_info in db_templates.items():
|
||
if db_path.endswith('.doc'):
|
||
# 检查是否有对应的.docx文件
|
||
docx_path = db_path.replace('.doc', '.docx')
|
||
if docx_path in local_templates:
|
||
format_changes.append((db_path, docx_path, db_info))
|
||
|
||
if format_changes:
|
||
print(f" 发现 {len(format_changes)} 个文件格式变化(.doc -> .docx)")
|
||
updated_count = 0
|
||
for old_path, new_path, db_info in format_changes:
|
||
try:
|
||
cursor = conn.cursor()
|
||
cursor.execute("""
|
||
UPDATE f_polic_file_config
|
||
SET file_path = %s
|
||
WHERE id = %s
|
||
""", (new_path, db_info['id']))
|
||
conn.commit()
|
||
cursor.close()
|
||
print(f" [更新] ID={db_info['id']}, 名称={db_info['name']}")
|
||
print(f" 旧路径: {old_path}")
|
||
print(f" 新路径: {new_path}")
|
||
updated_count += 1
|
||
except Exception as e:
|
||
print(f" [错误] 更新失败: {str(e)}")
|
||
|
||
if updated_count > 0:
|
||
print_result(True, f"成功更新 {updated_count} 条路径记录")
|
||
else:
|
||
print_result(True, "未发现文件格式变化")
|
||
|
||
# 9. 生成详细报告
|
||
print_section("9. 详细报告")
|
||
|
||
# 找出有占位符的模板示例
|
||
templates_with_placeholders_list = []
|
||
for rel_path, (placeholders, success, file_ext) in template_placeholders.items():
|
||
if success and placeholders and file_ext == '.docx':
|
||
templates_with_placeholders_list.append((rel_path, placeholders))
|
||
|
||
if templates_with_placeholders_list:
|
||
print(f"\n 有占位符的模板示例(前5个):")
|
||
for i, (rel_path, placeholders) in enumerate(templates_with_placeholders_list[:5], 1):
|
||
print(f"\n {i}. {Path(rel_path).name}")
|
||
print(f" 路径: {rel_path}")
|
||
print(f" 占位符数量: {len(placeholders)}")
|
||
print(f" 占位符: {sorted(placeholders)}")
|
||
|
||
finally:
|
||
conn.close()
|
||
print_result(True, "数据库连接已关闭")
|
||
|
||
print_section("完成")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|