ai-business-write/verify_generated_files.py
2025-12-15 14:45:42 +08:00

101 lines
3.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

"""
验证生成的Word文档中是否还有未替换的占位符
"""
import re
from pathlib import Path
from docx import Document
def check_placeholders_in_docx(file_path: str):
"""
检查docx文件中是否还有占位符
Args:
file_path: docx文件路径
"""
placeholders = set()
pattern = r'\{\{([^}]+)\}\}' # 匹配 {{field_code}} 格式
try:
doc = Document(file_path)
# 从段落中提取占位符
for paragraph in doc.paragraphs:
text = ''.join([run.text for run in paragraph.runs])
if not text:
text = paragraph.text
matches = re.findall(pattern, text)
for match in matches:
cleaned = match.strip()
if cleaned and '{' not in cleaned and '}' not in cleaned:
placeholders.add(cleaned)
# 从表格中提取占位符
for table in doc.tables:
for row in table.rows:
for cell in row.cells:
cell_text = ''.join([run.text for para in cell.paragraphs for run in para.runs])
if not cell_text:
cell_text = cell.text
matches = re.findall(pattern, cell_text)
for match in matches:
cleaned = match.strip()
if cleaned and '{' not in cleaned and '}' not in cleaned:
placeholders.add(cleaned)
return placeholders
except Exception as e:
print(f" 错误: 读取文件失败 - {str(e)}")
return None
def main():
"""主函数"""
project_root = Path(__file__).parent
output_dir = project_root / "output_temp"
print("="*80)
print("验证生成的Word文档")
print("="*80)
# 检查两个生成的文件
files_to_check = [
output_dir / "2谈话审批表_已填充.docx",
output_dir / "8-1请示报告卡初核报告结论 _已填充.docx"
]
all_success = True
for file_path in files_to_check:
print(f"\n检查文件: {file_path.name}")
if not file_path.exists():
print(f" [错误] 文件不存在")
all_success = False
continue
placeholders = check_placeholders_in_docx(str(file_path))
if placeholders is None:
print(f" [错误] 无法读取文件")
all_success = False
elif placeholders:
print(f" [警告] 发现 {len(placeholders)} 个未替换的占位符:")
for placeholder in sorted(placeholders):
print(f" - {{{{ {placeholder} }}}}")
all_success = False
else:
print(f" [成功] 所有占位符已成功替换,文档可以正常使用")
print(f"\n{'='*80}")
if all_success:
print("验证结果: 所有文件验证通过!")
else:
print("验证结果: 部分文件存在问题,请检查")
print(f"{'='*80}\n")
if __name__ == "__main__":
main()