""" 检查template_finish文件夹下的模板文件占位符是否可以被正确识别 """ import os import re from pathlib import Path from docx import Document from collections import defaultdict def extract_placeholders_from_docx(file_path): """ 从docx文件中提取所有占位符 Args: file_path: docx文件路径 Returns: 占位符列表,格式: ['field_code1', 'field_code2', ...] """ placeholders = set() pattern = r'\{\{([^}]+)\}\}' # 匹配 {{field_code}} 格式 try: doc = Document(file_path) # 从段落中提取占位符 for paragraph in doc.paragraphs: text = paragraph.text matches = re.findall(pattern, text) for match in matches: placeholders.add(match.strip()) # 从表格中提取占位符 for table in doc.tables: for row in table.rows: for cell in row.cells: for paragraph in cell.paragraphs: text = paragraph.text matches = re.findall(pattern, text) for match in matches: placeholders.add(match.strip()) except Exception as e: print(f" 错误: 读取文件失败 - {str(e)}") return [] return sorted(list(placeholders)) def check_templates_in_directory(base_dir): """ 检查目录下所有模板文件的占位符 Args: base_dir: 模板文件根目录 """ base_path = Path(base_dir) if not base_path.exists(): print(f"错误: 目录不存在 - {base_dir}") return # 统计信息 total_files = 0 valid_files = 0 invalid_files = 0 all_placeholders = defaultdict(set) # 文件路径 -> 占位符集合 all_unique_placeholders = set() # 所有唯一的占位符 print("=" * 80) print("模板文件占位符检查报告") print("=" * 80) print() # 遍历所有docx文件 for docx_file in base_path.rglob("*.docx"): # 跳过临时文件(以~$开头的文件) if docx_file.name.startswith("~$"): continue total_files += 1 relative_path = docx_file.relative_to(base_path) print(f"[{total_files}] 检查文件: {relative_path}") # 提取占位符 placeholders = extract_placeholders_from_docx(str(docx_file)) if placeholders: valid_files += 1 all_placeholders[str(relative_path)] = placeholders all_unique_placeholders.update(placeholders) print(f" ✓ 找到 {len(placeholders)} 个占位符:") for i, placeholder in enumerate(placeholders, 1): print(f" {i}. {{{{ {placeholder} }}}}") else: invalid_files += 1 print(f" ⚠ 未找到占位符") print() # 打印汇总信息 print("=" * 80) print("检查汇总") print("=" * 80) print(f"总文件数: {total_files}") print(f"包含占位符的文件: {valid_files}") print(f"未找到占位符的文件: {invalid_files}") print(f"唯一占位符总数: {len(all_unique_placeholders)}") print() # 打印所有唯一占位符 if all_unique_placeholders: print("所有唯一占位符列表:") for i, placeholder in enumerate(sorted(all_unique_placeholders), 1): print(f" {i}. {{{{ {placeholder} }}}}") print() # 打印每个文件的占位符详情 print("=" * 80) print("各文件占位符详情") print("=" * 80) for file_path, placeholders in sorted(all_placeholders.items()): print(f"\n文件: {file_path}") print(f"占位符数量: {len(placeholders)}") for placeholder in placeholders: print(f" - {{{{ {placeholder} }}}}") # 返回结果供其他脚本使用 return { 'total_files': total_files, 'valid_files': valid_files, 'invalid_files': invalid_files, 'all_placeholders': dict(all_placeholders), 'unique_placeholders': sorted(all_unique_placeholders) } def main(): """主函数""" template_dir = os.path.join(os.path.dirname(__file__), 'template_finish') print(f"检查目录: {template_dir}") print() result = check_templates_in_directory(template_dir) if result: print("\n" + "=" * 80) print("检查完成!") print("=" * 80) if __name__ == '__main__': main()