ai-business-write/check_specific_template_relations.py

"""
检查特定模板的关联关系
"""
import pymysql
import os
import re
from pathlib import Path
from docx import Document
from dotenv import load_dotenv

load_dotenv()

DB_CONFIG = {
    'host': os.getenv('DB_HOST', '152.136.177.240'),
    'port': int(os.getenv('DB_PORT', 5012)),
    'user': os.getenv('DB_USER', 'finyx'),
    'password': os.getenv('DB_PASSWORD', '6QsGK6MpePZDE57Z'),
    'database': os.getenv('DB_NAME', 'finyx'),
    'charset': 'utf8mb4'
}

TENANT_ID = 615873064429507639
TEMPLATE_NAME = "1.请示报告卡（初核谈话）"
TEMPLATE_FILE = "template_finish/2-初核模版/2.谈话审批/走读式谈话审批/1.请示报告卡（初核谈话）.docx"

def extract_placeholders_from_docx(file_path: str):
    """从docx文件中提取所有占位符"""
    placeholders = set()
    pattern = r'\{\{([^}]+)\}\}'

    try:
        doc = Document(file_path)

        # 从段落中提取占位符
        for paragraph in doc.paragraphs:
            text = paragraph.text
            matches = re.findall(pattern, text)
            for match in matches:
                cleaned = match.strip()
                if cleaned and '{' not in cleaned and '}' not in cleaned:
                    placeholders.add(cleaned)

        # 从表格中提取占位符
        for table in doc.tables:
            for row in table.rows:
                for cell in row.cells:
                    for paragraph in cell.paragraphs:
                        text = paragraph.text
                        matches = re.findall(pattern, text)
                        for match in matches:
                            cleaned = match.strip()
                            if cleaned and '{' not in cleaned and '}' not in cleaned:
                                placeholders.add(cleaned)

    except Exception as e:
        print(f"错误: 读取文件失败 - {str(e)}")
        return []

    return sorted(list(placeholders))

def check_template():
    """检查模板的关联关系"""
    conn = pymysql.connect(**DB_CONFIG)
    cursor = conn.cursor(pymysql.cursors.DictCursor)

    try:
        print(f"检查模板: {TEMPLATE_NAME}")
        print("=" * 80)

        # 1. 从文档提取占位符
        print("\n1. 从文档提取占位符:")
        if not Path(TEMPLATE_FILE).exists():
            print(f"  文件不存在: {TEMPLATE_FILE}")
            return

        placeholders = extract_placeholders_from_docx(TEMPLATE_FILE)
        print(f"  占位符数量: {len(placeholders)}")
        print(f"  占位符列表: {placeholders}")

        # 2. 查询模板ID
        print(f"\n2. 查询模板ID:")
        cursor.execute("""
            SELECT id, name
            FROM f_polic_file_config
            WHERE tenant_id = %s AND name = %s
        """, (TENANT_ID, TEMPLATE_NAME))
        template = cursor.fetchone()
        if not template:
            print(f"  模板不存在")
            return

        template_id = template['id']
        print(f"  模板ID: {template_id}")

        # 3. 查询字段映射
        print(f"\n3. 查询字段映射:")
        cursor.execute("""
            SELECT id, name, filed_code, field_type, state
            FROM f_polic_field
            WHERE tenant_id = %s
        """, (TENANT_ID,))
        fields = cursor.fetchall()

        field_map = {}
        for field in fields:
            state = field['state']
            if isinstance(state, bytes):
                state = int.from_bytes(state, byteorder='big') if len(state) == 1 else 1
            field_map[field['filed_code']] = {
                'id': field['id'],
                'name': field['name'],
                'field_type': field['field_type'],
                'state': state
            }

        print(f"  字段总数: {len(field_map)}")

        # 4. 匹配占位符到字段
        print(f"\n4. 匹配占位符到字段:")
        input_field_ids = []
        output_field_ids = []
        not_found = []

        for placeholder in placeholders:
            if placeholder in field_map:
                field_info = field_map[placeholder]
                if field_info['state'] == 1:
                    if field_info['field_type'] == 1:
                        input_field_ids.append(field_info['id'])
                    elif field_info['field_type'] == 2:
                        output_field_ids.append(field_info['id'])
            else:
                not_found.append(placeholder)

        # 添加必需的输入字段
        required_input_fields = ['clue_info', 'target_basic_info_clue']
        for req_field in required_input_fields:
            if req_field in field_map:
                field_info = field_map[req_field]
                if field_info['state'] == 1 and field_info['id'] not in input_field_ids:
                    input_field_ids.append(field_info['id'])

        print(f"  输入字段ID: {input_field_ids}")
        print(f"  输出字段ID: {output_field_ids}")
        if not_found:
            print(f"  未找到的占位符: {not_found}")

        # 5. 查询数据库中的关联关系
        print(f"\n5. 查询数据库中的关联关系:")
        cursor.execute("""
            SELECT fff.filed_id, fff.state, f.name, f.field_type
            FROM f_polic_file_field fff
            INNER JOIN f_polic_field f ON fff.filed_id = f.id AND fff.tenant_id = f.tenant_id
            WHERE fff.tenant_id = %s AND fff.file_id = %s
        """, (TENANT_ID, template_id))
        db_relations = cursor.fetchall()

        db_input_ids = []
        db_output_ids = []
        for rel in db_relations:
            state = rel['state']
            if isinstance(state, bytes):
                state = int.from_bytes(state, byteorder='big') if len(state) == 1 else 1

            if state == 1:
                if rel['field_type'] == 1:
                    db_input_ids.append(rel['filed_id'])
                elif rel['field_type'] == 2:
                    db_output_ids.append(rel['filed_id'])

        print(f"  数据库中的输入字段ID: {sorted(db_input_ids)}")
        print(f"  数据库中的输出字段ID: {sorted(db_output_ids)}")

        # 6. 对比
        print(f"\n6. 对比结果:")
        expected_input = set(input_field_ids)
        expected_output = set(output_field_ids)
        actual_input = set(db_input_ids)
        actual_output = set(db_output_ids)

        print(f"  输入字段 - 期望: {sorted(expected_input)}, 实际: {sorted(actual_input)}")
        print(f"  输入字段匹配: {expected_input == actual_input}")

        print(f"  输出字段 - 期望: {sorted(expected_output)}, 实际: {sorted(actual_output)}")
        print(f"  输出字段匹配: {expected_output == actual_output}")

        if expected_output != actual_output:
            missing = expected_output - actual_output
            extra = actual_output - expected_output
            print(f"  缺少的输出字段: {sorted(missing)}")
            print(f"  多余的输出字段: {sorted(extra)}")

    finally:
        cursor.close()
        conn.close()

if __name__ == '__main__':
    check_template()