ai-business-write/check_specific_template_relations.py

199 lines
7.1 KiB
Python

"""
检查特定模板的关联关系
"""
import pymysql
import os
import re
from pathlib import Path
from docx import Document
from dotenv import load_dotenv
load_dotenv()
DB_CONFIG = {
'host': os.getenv('DB_HOST', '152.136.177.240'),
'port': int(os.getenv('DB_PORT', 5012)),
'user': os.getenv('DB_USER', 'finyx'),
'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
'database': os.getenv('DB_NAME', 'finyx'),
'charset': 'utf8mb4'
}
TENANT_ID = 615873064429507639
TEMPLATE_NAME = "1.请示报告卡(初核谈话)"
TEMPLATE_FILE = "template_finish/2-初核模版/2.谈话审批/走读式谈话审批/1.请示报告卡(初核谈话).docx"
def extract_placeholders_from_docx(file_path: str):
"""从docx文件中提取所有占位符"""
placeholders = set()
pattern = r'\{\{([^}]+)\}\}'
try:
doc = Document(file_path)
# 从段落中提取占位符
for paragraph in doc.paragraphs:
text = paragraph.text
matches = re.findall(pattern, text)
for match in matches:
cleaned = match.strip()
if cleaned and '{' not in cleaned and '}' not in cleaned:
placeholders.add(cleaned)
# 从表格中提取占位符
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
for paragraph in cell.paragraphs:
text = paragraph.text
matches = re.findall(pattern, text)
for match in matches:
cleaned = match.strip()
if cleaned and '{' not in cleaned and '}' not in cleaned:
placeholders.add(cleaned)
except Exception as e:
print(f"错误: 读取文件失败 - {str(e)}")
return []
return sorted(list(placeholders))
def check_template():
"""检查模板的关联关系"""
conn = pymysql.connect(**DB_CONFIG)
cursor = conn.cursor(pymysql.cursors.DictCursor)
try:
print(f"检查模板: {TEMPLATE_NAME}")
print("=" * 80)
# 1. 从文档提取占位符
print("\n1. 从文档提取占位符:")
if not Path(TEMPLATE_FILE).exists():
print(f" 文件不存在: {TEMPLATE_FILE}")
return
placeholders = extract_placeholders_from_docx(TEMPLATE_FILE)
print(f" 占位符数量: {len(placeholders)}")
print(f" 占位符列表: {placeholders}")
# 2. 查询模板ID
print(f"\n2. 查询模板ID:")
cursor.execute("""
SELECT id, name
FROM f_polic_file_config
WHERE tenant_id = %s AND name = %s
""", (TENANT_ID, TEMPLATE_NAME))
template = cursor.fetchone()
if not template:
print(f" 模板不存在")
return
template_id = template['id']
print(f" 模板ID: {template_id}")
# 3. 查询字段映射
print(f"\n3. 查询字段映射:")
cursor.execute("""
SELECT id, name, filed_code, field_type, state
FROM f_polic_field
WHERE tenant_id = %s
""", (TENANT_ID,))
fields = cursor.fetchall()
field_map = {}
for field in fields:
state = field['state']
if isinstance(state, bytes):
state = int.from_bytes(state, byteorder='big') if len(state) == 1 else 1
field_map[field['filed_code']] = {
'id': field['id'],
'name': field['name'],
'field_type': field['field_type'],
'state': state
}
print(f" 字段总数: {len(field_map)}")
# 4. 匹配占位符到字段
print(f"\n4. 匹配占位符到字段:")
input_field_ids = []
output_field_ids = []
not_found = []
for placeholder in placeholders:
if placeholder in field_map:
field_info = field_map[placeholder]
if field_info['state'] == 1:
if field_info['field_type'] == 1:
input_field_ids.append(field_info['id'])
elif field_info['field_type'] == 2:
output_field_ids.append(field_info['id'])
else:
not_found.append(placeholder)
# 添加必需的输入字段
required_input_fields = ['clue_info', 'target_basic_info_clue']
for req_field in required_input_fields:
if req_field in field_map:
field_info = field_map[req_field]
if field_info['state'] == 1 and field_info['id'] not in input_field_ids:
input_field_ids.append(field_info['id'])
print(f" 输入字段ID: {input_field_ids}")
print(f" 输出字段ID: {output_field_ids}")
if not_found:
print(f" 未找到的占位符: {not_found}")
# 5. 查询数据库中的关联关系
print(f"\n5. 查询数据库中的关联关系:")
cursor.execute("""
SELECT fff.filed_id, fff.state, f.name, f.field_type
FROM f_polic_file_field fff
INNER JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
WHERE fff.tenant_id = %s AND fff.file_id = %s
""", (TENANT_ID, template_id))
db_relations = cursor.fetchall()
db_input_ids = []
db_output_ids = []
for rel in db_relations:
state = rel['state']
if isinstance(state, bytes):
state = int.from_bytes(state, byteorder='big') if len(state) == 1 else 1
if state == 1:
if rel['field_type'] == 1:
db_input_ids.append(rel['filed_id'])
elif rel['field_type'] == 2:
db_output_ids.append(rel['filed_id'])
print(f" 数据库中的输入字段ID: {sorted(db_input_ids)}")
print(f" 数据库中的输出字段ID: {sorted(db_output_ids)}")
# 6. 对比
print(f"\n6. 对比结果:")
expected_input = set(input_field_ids)
expected_output = set(output_field_ids)
actual_input = set(db_input_ids)
actual_output = set(db_output_ids)
print(f" 输入字段 - 期望: {sorted(expected_input)}, 实际: {sorted(actual_input)}")
print(f" 输入字段匹配: {expected_input == actual_input}")
print(f" 输出字段 - 期望: {sorted(expected_output)}, 实际: {sorted(actual_output)}")
print(f" 输出字段匹配: {expected_output == actual_output}")
if expected_output != actual_output:
missing = expected_output - actual_output
extra = actual_output - expected_output
print(f" 缺少的输出字段: {sorted(missing)}")
print(f" 多余的输出字段: {sorted(extra)}")
finally:
cursor.close()
conn.close()
if __name__ == '__main__':
check_template()