101 lines
3.1 KiB
Python
101 lines
3.1 KiB
Python
"""
|
||
验证生成的Word文档中是否还有未替换的占位符
|
||
"""
|
||
import re
|
||
from pathlib import Path
|
||
from docx import Document
|
||
|
||
|
||
def check_placeholders_in_docx(file_path: str):
|
||
"""
|
||
检查docx文件中是否还有占位符
|
||
|
||
Args:
|
||
file_path: docx文件路径
|
||
"""
|
||
placeholders = set()
|
||
pattern = r'\{\{([^}]+)\}\}' # 匹配 {{field_code}} 格式
|
||
|
||
try:
|
||
doc = Document(file_path)
|
||
|
||
# 从段落中提取占位符
|
||
for paragraph in doc.paragraphs:
|
||
text = ''.join([run.text for run in paragraph.runs])
|
||
if not text:
|
||
text = paragraph.text
|
||
|
||
matches = re.findall(pattern, text)
|
||
for match in matches:
|
||
cleaned = match.strip()
|
||
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||
placeholders.add(cleaned)
|
||
|
||
# 从表格中提取占位符
|
||
for table in doc.tables:
|
||
for row in table.rows:
|
||
for cell in row.cells:
|
||
cell_text = ''.join([run.text for para in cell.paragraphs for run in para.runs])
|
||
if not cell_text:
|
||
cell_text = cell.text
|
||
|
||
matches = re.findall(pattern, cell_text)
|
||
for match in matches:
|
||
cleaned = match.strip()
|
||
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||
placeholders.add(cleaned)
|
||
|
||
return placeholders
|
||
|
||
except Exception as e:
|
||
print(f" 错误: 读取文件失败 - {str(e)}")
|
||
return None
|
||
|
||
|
||
def main():
|
||
"""主函数"""
|
||
project_root = Path(__file__).parent
|
||
output_dir = project_root / "output_temp"
|
||
|
||
print("="*80)
|
||
print("验证生成的Word文档")
|
||
print("="*80)
|
||
|
||
# 检查两个生成的文件
|
||
files_to_check = [
|
||
output_dir / "2谈话审批表_已填充.docx",
|
||
output_dir / "8-1请示报告卡(初核报告结论) _已填充.docx"
|
||
]
|
||
|
||
all_success = True
|
||
for file_path in files_to_check:
|
||
print(f"\n检查文件: {file_path.name}")
|
||
if not file_path.exists():
|
||
print(f" [错误] 文件不存在")
|
||
all_success = False
|
||
continue
|
||
|
||
placeholders = check_placeholders_in_docx(str(file_path))
|
||
|
||
if placeholders is None:
|
||
print(f" [错误] 无法读取文件")
|
||
all_success = False
|
||
elif placeholders:
|
||
print(f" [警告] 发现 {len(placeholders)} 个未替换的占位符:")
|
||
for placeholder in sorted(placeholders):
|
||
print(f" - {{{{ {placeholder} }}}}")
|
||
all_success = False
|
||
else:
|
||
print(f" [成功] 所有占位符已成功替换,文档可以正常使用")
|
||
|
||
print(f"\n{'='*80}")
|
||
if all_success:
|
||
print("验证结果: 所有文件验证通过!")
|
||
else:
|
||
print("验证结果: 部分文件存在问题,请检查")
|
||
print(f"{'='*80}\n")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|