""" 验证生成的Word文档中是否还有未替换的占位符 """ import re from pathlib import Path from docx import Document def check_placeholders_in_docx(file_path: str): """ 检查docx文件中是否还有占位符 Args: file_path: docx文件路径 """ placeholders = set() pattern = r'\{\{([^}]+)\}\}' # 匹配 {{field_code}} 格式 try: doc = Document(file_path) # 从段落中提取占位符 for paragraph in doc.paragraphs: text = ''.join([run.text for run in paragraph.runs]) if not text: text = paragraph.text matches = re.findall(pattern, text) for match in matches: cleaned = match.strip() if cleaned and '{' not in cleaned and '}' not in cleaned: placeholders.add(cleaned) # 从表格中提取占位符 for table in doc.tables: for row in table.rows: for cell in row.cells: cell_text = ''.join([run.text for para in cell.paragraphs for run in para.runs]) if not cell_text: cell_text = cell.text matches = re.findall(pattern, cell_text) for match in matches: cleaned = match.strip() if cleaned and '{' not in cleaned and '}' not in cleaned: placeholders.add(cleaned) return placeholders except Exception as e: print(f" 错误: 读取文件失败 - {str(e)}") return None def main(): """主函数""" project_root = Path(__file__).parent output_dir = project_root / "output_temp" print("="*80) print("验证生成的Word文档") print("="*80) # 检查两个生成的文件 files_to_check = [ output_dir / "2谈话审批表_已填充.docx", output_dir / "8-1请示报告卡(初核报告结论) _已填充.docx" ] all_success = True for file_path in files_to_check: print(f"\n检查文件: {file_path.name}") if not file_path.exists(): print(f" [错误] 文件不存在") all_success = False continue placeholders = check_placeholders_in_docx(str(file_path)) if placeholders is None: print(f" [错误] 无法读取文件") all_success = False elif placeholders: print(f" [警告] 发现 {len(placeholders)} 个未替换的占位符:") for placeholder in sorted(placeholders): print(f" - {{{{ {placeholder} }}}}") all_success = False else: print(f" [成功] 所有占位符已成功替换,文档可以正常使用") print(f"\n{'='*80}") if all_success: print("验证结果: 所有文件验证通过!") else: print("验证结果: 部分文件存在问题,请检查") print(f"{'='*80}\n") if __name__ == "__main__": main()