完成121个模板更新和测试。

This commit is contained in:
python 2025-12-26 09:16:31 +08:00
parent eec66cbe05
commit ac8bdba941
157 changed files with 2792 additions and 1 deletions

202
check_database_templates.py Normal file
View File

@ -0,0 +1,202 @@
"""
检查数据库中的模板记录情况
"""
import os
import pymysql
from pathlib import Path
from dotenv import load_dotenv
# 加载环境变量
load_dotenv()
# 数据库配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
# 先检查数据库中的实际 tenant_id
TENANT_ID = 615873064429507639 # 默认值,会在检查时自动发现实际的 tenant_id
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def check_database():
"""检查数据库记录"""
print_section("数据库模板记录检查")
try:
conn = pymysql.connect(**DB_CONFIG)
cursor = conn.cursor(pymysql.cursors.DictCursor)
# 0. 先检查所有 tenant_id确定实际使用的 tenant_id
print_section("0. 检查所有不同的 tenant_id")
cursor.execute("SELECT DISTINCT tenant_id, COUNT(*) as count FROM f_polic_file_config GROUP BY tenant_id")
tenant_ids = cursor.fetchall()
actual_tenant_id = None
for row in tenant_ids:
print(f" tenant_id={row['tenant_id']}: {row['count']} 条记录")
if actual_tenant_id is None:
actual_tenant_id = row['tenant_id']
# 使用实际的 tenant_id
if actual_tenant_id:
print(f"\n [使用] tenant_id={actual_tenant_id} 进行后续检查")
tenant_id = actual_tenant_id
else:
tenant_id = TENANT_ID
print(f"\n [使用] 默认 tenant_id={tenant_id}")
# 1. 检查 f_polic_file_config 表的所有记录(不限制条件)
print_section("1. 检查 f_polic_file_config 表(所有记录)")
cursor.execute("SELECT COUNT(*) as count FROM f_polic_file_config")
total_count = cursor.fetchone()['count']
print(f" 总记录数: {total_count}")
# 2. 检查按 tenant_id 过滤
print_section("2. 检查 f_polic_file_config 表(按 tenant_id 过滤)")
cursor.execute("SELECT COUNT(*) as count FROM f_polic_file_config WHERE tenant_id = %s", (tenant_id,))
tenant_count = cursor.fetchone()['count']
print(f" tenant_id={tenant_id} 的记录数: {tenant_count}")
# 3. 检查有 file_path 的记录
print_section("3. 检查 f_polic_file_config 表(有 file_path 的记录)")
cursor.execute("""
SELECT COUNT(*) as count
FROM f_polic_file_config
WHERE tenant_id = %s
AND file_path IS NOT NULL
AND file_path != ''
""", (tenant_id,))
path_count = cursor.fetchone()['count']
print(f" 有 file_path 的记录数: {path_count}")
# 4. 检查不同状态的记录
print_section("4. 检查 f_polic_file_config 表(按 state 分组)")
cursor.execute("""
SELECT state, COUNT(*) as count
FROM f_polic_file_config
WHERE tenant_id = %s
GROUP BY state
""", (tenant_id,))
state_counts = cursor.fetchall()
for row in state_counts:
state_name = "已启用" if row['state'] == 1 else "已禁用"
print(f" state={row['state']} ({state_name}): {row['count']}")
# 5. 查看前10条记录示例
print_section("5. f_polic_file_config 表记录示例前10条")
cursor.execute("""
SELECT id, name, file_path, state, tenant_id, parent_id
FROM f_polic_file_config
WHERE tenant_id = %s
LIMIT 10
""", (tenant_id,))
samples = cursor.fetchall()
if samples:
for i, row in enumerate(samples, 1):
print(f"\n 记录 {i}:")
print(f" ID: {row['id']}")
print(f" 名称: {row['name']}")
print(f" 路径: {row['file_path']}")
print(f" 状态: {row['state']} ({'已启用' if row['state'] == 1 else '已禁用'})")
print(f" 租户ID: {row['tenant_id']}")
print(f" 父级ID: {row['parent_id']}")
else:
print(" 没有找到记录")
# 7. 检查 file_path 的类型分布
print_section("7. 检查 file_path 路径类型分布")
cursor.execute("""
SELECT
CASE
WHEN file_path LIKE 'template_finish/%%' THEN '本地路径'
WHEN file_path LIKE '/%%TEMPLATE/%%' THEN 'MinIO路径'
WHEN file_path IS NULL OR file_path = '' THEN '空路径'
ELSE '其他路径'
END as path_type,
COUNT(*) as count
FROM f_polic_file_config
WHERE tenant_id = %s
GROUP BY path_type
""", (tenant_id,))
path_types = cursor.fetchall()
for row in path_types:
print(f" {row['path_type']}: {row['count']}")
# 8. 检查 f_polic_file_field 关联表
print_section("8. 检查 f_polic_file_field 关联表")
cursor.execute("""
SELECT COUNT(*) as count
FROM f_polic_file_field
WHERE tenant_id = %s
""", (tenant_id,))
relation_count = cursor.fetchone()['count']
print(f" 关联记录数: {relation_count}")
# 9. 检查 f_polic_field 字段表
print_section("9. 检查 f_polic_field 字段表")
cursor.execute("""
SELECT
field_type,
CASE
WHEN field_type = 1 THEN '输入字段'
WHEN field_type = 2 THEN '输出字段'
ELSE '未知'
END as type_name,
COUNT(*) as count
FROM f_polic_field
WHERE tenant_id = %s
GROUP BY field_type
""", (tenant_id,))
field_types = cursor.fetchall()
for row in field_types:
print(f" {row['type_name']} (field_type={row['field_type']}): {row['count']}")
# 10. 检查完整的关联关系
print_section("10. 检查模板与字段的关联关系(示例)")
cursor.execute("""
SELECT
fc.id as file_id,
fc.name as file_name,
fc.file_path,
COUNT(ff.filed_id) as field_count
FROM f_polic_file_config fc
LEFT JOIN f_polic_file_field ff ON fc.id = ff.file_id AND ff.tenant_id = %s
WHERE fc.tenant_id = %s
GROUP BY fc.id, fc.name, fc.file_path
LIMIT 10
""", (tenant_id, tenant_id))
relations = cursor.fetchall()
if relations:
for i, row in enumerate(relations, 1):
print(f"\n 模板 {i}:")
print(f" ID: {row['file_id']}")
print(f" 名称: {row['file_name']}")
print(f" 路径: {row['file_path']}")
print(f" 关联字段数: {row['field_count']}")
else:
print(" 没有找到关联记录")
cursor.close()
conn.close()
print_section("检查完成")
except Exception as e:
print(f"检查失败: {str(e)}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
check_database()

View File

@ -0,0 +1,405 @@
"""
重新扫描模板占位符并更新数据库
1. 扫描所有本地模板文件包括新转换的.docx文件
2. 提取所有占位符
3. 检查数据库中的模板记录
4. 更新数据库如有变化
"""
import os
import pymysql
from pathlib import Path
from typing import Dict, List, Set, Tuple
from dotenv import load_dotenv
import re
from docx import Document
# 加载环境变量
load_dotenv()
# 数据库配置
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
CREATED_BY = 655162080928945152
UPDATED_BY = 655162080928945152
# 项目根目录
PROJECT_ROOT = Path(__file__).parent
TEMPLATES_DIR = PROJECT_ROOT / "template_finish"
def print_section(title):
"""打印章节标题"""
print("\n" + "="*70)
print(f" {title}")
print("="*70)
def print_result(success, message):
"""打印结果"""
status = "[OK]" if success else "[FAIL]"
print(f"{status} {message}")
def generate_id():
"""生成ID"""
import time
return int(time.time() * 1000000)
def scan_local_templates(base_dir: Path) -> Dict[str, Path]:
"""扫描本地模板文件"""
templates = {}
if not base_dir.exists():
return templates
for file_path in base_dir.rglob('*'):
if file_path.is_file():
# 只处理文档文件(优先处理.docx也包含.doc和.wps用于检查
if file_path.suffix.lower() in ['.doc', '.docx', '.wps']:
relative_path = file_path.relative_to(PROJECT_ROOT)
relative_path_str = str(relative_path).replace('\\', '/')
templates[relative_path_str] = file_path
return templates
def get_actual_tenant_id(conn) -> int:
"""获取数据库中的实际tenant_id"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
cursor.execute("SELECT DISTINCT tenant_id FROM f_polic_file_config LIMIT 1")
result = cursor.fetchone()
if result:
return result['tenant_id']
return 1 # 默认值
finally:
cursor.close()
def get_db_templates(conn, tenant_id: int) -> Dict[str, Dict]:
"""从数据库获取所有模板配置"""
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
sql = """
SELECT id, name, file_path, state, parent_id
FROM f_polic_file_config
WHERE tenant_id = %s
"""
cursor.execute(sql, (tenant_id,))
templates = cursor.fetchall()
result = {}
for template in templates:
file_path = template['file_path']
if file_path:
result[file_path] = {
'id': template['id'],
'name': template['name'],
'file_path': file_path,
'state': template['state'],
'parent_id': template['parent_id']
}
return result
finally:
cursor.close()
def extract_placeholders_from_docx(file_path: Path) -> Tuple[Set[str], bool]:
"""
从docx文件中提取所有占位符
Returns:
(占位符集合, 是否成功读取)
"""
placeholders = set()
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
success = False
try:
doc = Document(file_path)
success = True
# 从段落中提取占位符
for paragraph in doc.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
# 从表格中提取占位符
for table in doc.tables:
try:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
text = paragraph.text
matches = placeholder_pattern.findall(text)
for match in matches:
field_code = match.strip()
if field_code:
placeholders.add(field_code)
except Exception as e:
# 某些表格结构可能导致错误,跳过
continue
except Exception as e:
# 文件读取失败(可能是.doc格式或其他问题
return placeholders, False
return placeholders, success
def scan_all_templates_placeholders(local_templates: Dict[str, Path]) -> Dict[str, Tuple[Set[str], bool, str]]:
"""
扫描所有模板的占位符
Returns:
字典key为相对路径value为(占位符集合, 是否成功读取, 文件扩展名)
"""
results = {}
for rel_path, file_path in local_templates.items():
file_ext = file_path.suffix.lower()
placeholders, success = extract_placeholders_from_docx(file_path)
results[rel_path] = (placeholders, success, file_ext)
return results
def update_or_create_template(conn, tenant_id: int, rel_path: str, file_path: Path, db_templates: Dict[str, Dict]):
"""更新或创建模板记录"""
cursor = conn.cursor()
try:
# 检查是否已存在
if rel_path in db_templates:
# 已存在,检查是否需要更新
template_id = db_templates[rel_path]['id']
# 这里可以添加更新逻辑,比如更新名称等
return template_id, 'exists'
else:
# 不存在,创建新记录
template_id = generate_id()
file_name = file_path.stem # 不含扩展名的文件名
cursor.execute("""
INSERT INTO f_polic_file_config
(id, tenant_id, parent_id, name, input_data, file_path, created_time, created_by, updated_time, updated_by, state)
VALUES (%s, %s, %s, %s, %s, %s, NOW(), %s, NOW(), %s, 1)
""", (
template_id,
tenant_id,
None, # parent_id
file_name,
'{}', # input_data
rel_path,
CREATED_BY,
UPDATED_BY
))
conn.commit()
return template_id, 'created'
except Exception as e:
conn.rollback()
raise e
finally:
cursor.close()
def main():
"""主函数"""
print_section("重新扫描模板占位符并更新数据库")
# 1. 扫描本地模板
print_section("1. 扫描本地模板文件")
local_templates = scan_local_templates(TEMPLATES_DIR)
print_result(True, f"找到 {len(local_templates)} 个本地模板文件")
# 统计文件类型
file_types = {}
for file_path in local_templates.values():
ext = file_path.suffix.lower()
file_types[ext] = file_types.get(ext, 0) + 1
print("\n文件类型统计:")
for ext, count in sorted(file_types.items()):
print(f" {ext}: {count}")
if not local_templates:
print_result(False, "未找到本地模板文件")
return
# 2. 连接数据库
print_section("2. 连接数据库")
try:
conn = pymysql.connect(**DB_CONFIG)
print_result(True, "数据库连接成功")
except Exception as e:
print_result(False, f"数据库连接失败: {str(e)}")
return
try:
# 3. 获取实际的tenant_id
print_section("3. 获取实际的tenant_id")
tenant_id = get_actual_tenant_id(conn)
print_result(True, f"实际tenant_id: {tenant_id}")
# 4. 获取数据库中的模板
print_section("4. 获取数据库中的模板配置")
db_templates = get_db_templates(conn, tenant_id)
print_result(True, f"找到 {len(db_templates)} 条数据库模板记录有file_path的")
# 5. 扫描所有模板的占位符
print_section("5. 扫描所有模板的占位符")
print(" 正在扫描,请稍候...")
template_placeholders = scan_all_templates_placeholders(local_templates)
# 统计结果
all_placeholders = set()
templates_with_placeholders = 0
templates_without_placeholders = 0
templates_read_success = 0
templates_read_failed = 0
doc_files = []
docx_files = []
for rel_path, (placeholders, success, file_ext) in template_placeholders.items():
all_placeholders.update(placeholders)
if success:
templates_read_success += 1
if placeholders:
templates_with_placeholders += 1
else:
templates_without_placeholders += 1
else:
templates_read_failed += 1
if file_ext == '.doc':
doc_files.append(rel_path)
if file_ext == '.docx':
docx_files.append(rel_path)
elif file_ext == '.doc':
doc_files.append(rel_path)
print(f"\n扫描结果统计:")
print(f" - 成功读取: {templates_read_success}")
print(f" - 读取失败: {templates_read_failed}")
print(f" - 有占位符: {templates_with_placeholders}")
print(f" - 无占位符: {templates_without_placeholders}")
print(f" - 发现的占位符总数: {len(all_placeholders)} 个不同的占位符")
if doc_files:
print(f"\n [注意] 发现 {len(doc_files)} 个.doc文件可能无法读取:")
for doc_file in doc_files[:5]:
print(f" - {doc_file}")
if len(doc_files) > 5:
print(f" ... 还有 {len(doc_files) - 5}")
print(f"\n .docx文件: {len(docx_files)}")
# 6. 显示所有占位符
print_section("6. 所有占位符列表")
if all_placeholders:
for placeholder in sorted(all_placeholders):
print(f" - {placeholder}")
else:
print(" 未发现占位符")
# 7. 检查并更新数据库
print_section("7. 检查并更新数据库")
missing_templates = []
for rel_path in local_templates.keys():
if rel_path not in db_templates:
missing_templates.append(rel_path)
if missing_templates:
print(f" 发现 {len(missing_templates)} 个缺失的模板记录")
created_count = 0
for rel_path in missing_templates:
file_path = local_templates[rel_path]
try:
template_id, status = update_or_create_template(conn, tenant_id, rel_path, file_path, db_templates)
if status == 'created':
print(f" [创建] ID={template_id}, 路径={rel_path}")
created_count += 1
except Exception as e:
print(f" [错误] 创建失败: {rel_path}, 错误: {str(e)}")
if created_count > 0:
print_result(True, f"成功创建 {created_count} 条模板记录")
else:
print_result(True, "所有本地模板都已存在于数据库中")
# 8. 检查文件格式变化(.doc -> .docx
print_section("8. 检查文件格式变化")
# 检查数据库中是否有.doc路径但本地已经是.docx
format_changes = []
for db_path, db_info in db_templates.items():
if db_path.endswith('.doc'):
# 检查是否有对应的.docx文件
docx_path = db_path.replace('.doc', '.docx')
if docx_path in local_templates:
format_changes.append((db_path, docx_path, db_info))
if format_changes:
print(f" 发现 {len(format_changes)} 个文件格式变化(.doc -> .docx")
updated_count = 0
for old_path, new_path, db_info in format_changes:
try:
cursor = conn.cursor()
cursor.execute("""
UPDATE f_polic_file_config
SET file_path = %s
WHERE id = %s
""", (new_path, db_info['id']))
conn.commit()
cursor.close()
print(f" [更新] ID={db_info['id']}, 名称={db_info['name']}")
print(f" 旧路径: {old_path}")
print(f" 新路径: {new_path}")
updated_count += 1
except Exception as e:
print(f" [错误] 更新失败: {str(e)}")
if updated_count > 0:
print_result(True, f"成功更新 {updated_count} 条路径记录")
else:
print_result(True, "未发现文件格式变化")
# 9. 生成详细报告
print_section("9. 详细报告")
# 找出有占位符的模板示例
templates_with_placeholders_list = []
for rel_path, (placeholders, success, file_ext) in template_placeholders.items():
if success and placeholders and file_ext == '.docx':
templates_with_placeholders_list.append((rel_path, placeholders))
if templates_with_placeholders_list:
print(f"\n 有占位符的模板示例前5个:")
for i, (rel_path, placeholders) in enumerate(templates_with_placeholders_list[:5], 1):
print(f"\n {i}. {Path(rel_path).name}")
print(f" 路径: {rel_path}")
print(f" 占位符数量: {len(placeholders)}")
print(f" 占位符: {sorted(placeholders)}")
finally:
conn.close()
print_result(True, "数据库连接已关闭")
print_section("完成")
if __name__ == "__main__":
main()

View File

@ -757,7 +757,7 @@ class DocumentService:
print(f" - ⚠️ 仍有未替换的占位符: {sorted(remaining_placeholders)}") print(f" - ⚠️ 仍有未替换的占位符: {sorted(remaining_placeholders)}")
print(f" - ⚠️ 警告:文档中仍存在占位符,可能格式不正确或替换逻辑有问题") print(f" - ⚠️ 警告:文档中仍存在占位符,可能格式不正确或替换逻辑有问题")
else: else:
print(f" - 所有占位符已成功替换") print(f" - [OK] 所有占位符已成功替换")
# 保存到临时文件 # 保存到临时文件
temp_dir = tempfile.gettempdir() temp_dir = tempfile.gettempdir()

Some files were not shown because too many files have changed in this diff Show More