156 lines
4.6 KiB
Python
156 lines
4.6 KiB
Python
"""
|
|
检查template_finish文件夹下的模板文件占位符是否可以被正确识别
|
|
"""
|
|
import os
|
|
import re
|
|
from pathlib import Path
|
|
from docx import Document
|
|
from collections import defaultdict
|
|
|
|
|
|
def extract_placeholders_from_docx(file_path):
|
|
"""
|
|
从docx文件中提取所有占位符
|
|
|
|
Args:
|
|
file_path: docx文件路径
|
|
|
|
Returns:
|
|
占位符列表,格式: ['field_code1', 'field_code2', ...]
|
|
"""
|
|
placeholders = set()
|
|
pattern = r'\{\{([^}]+)\}\}' # 匹配 {{field_code}} 格式
|
|
|
|
try:
|
|
doc = Document(file_path)
|
|
|
|
# 从段落中提取占位符
|
|
for paragraph in doc.paragraphs:
|
|
text = paragraph.text
|
|
matches = re.findall(pattern, text)
|
|
for match in matches:
|
|
placeholders.add(match.strip())
|
|
|
|
# 从表格中提取占位符
|
|
for table in doc.tables:
|
|
for row in table.rows:
|
|
for cell in row.cells:
|
|
for paragraph in cell.paragraphs:
|
|
text = paragraph.text
|
|
matches = re.findall(pattern, text)
|
|
for match in matches:
|
|
placeholders.add(match.strip())
|
|
|
|
except Exception as e:
|
|
print(f" 错误: 读取文件失败 - {str(e)}")
|
|
return []
|
|
|
|
return sorted(list(placeholders))
|
|
|
|
|
|
def check_templates_in_directory(base_dir):
|
|
"""
|
|
检查目录下所有模板文件的占位符
|
|
|
|
Args:
|
|
base_dir: 模板文件根目录
|
|
"""
|
|
base_path = Path(base_dir)
|
|
if not base_path.exists():
|
|
print(f"错误: 目录不存在 - {base_dir}")
|
|
return
|
|
|
|
# 统计信息
|
|
total_files = 0
|
|
valid_files = 0
|
|
invalid_files = 0
|
|
all_placeholders = defaultdict(set) # 文件路径 -> 占位符集合
|
|
all_unique_placeholders = set() # 所有唯一的占位符
|
|
|
|
print("=" * 80)
|
|
print("模板文件占位符检查报告")
|
|
print("=" * 80)
|
|
print()
|
|
|
|
# 遍历所有docx文件
|
|
for docx_file in base_path.rglob("*.docx"):
|
|
# 跳过临时文件(以~$开头的文件)
|
|
if docx_file.name.startswith("~$"):
|
|
continue
|
|
|
|
total_files += 1
|
|
relative_path = docx_file.relative_to(base_path)
|
|
|
|
print(f"[{total_files}] 检查文件: {relative_path}")
|
|
|
|
# 提取占位符
|
|
placeholders = extract_placeholders_from_docx(str(docx_file))
|
|
|
|
if placeholders:
|
|
valid_files += 1
|
|
all_placeholders[str(relative_path)] = placeholders
|
|
all_unique_placeholders.update(placeholders)
|
|
|
|
print(f" ✓ 找到 {len(placeholders)} 个占位符:")
|
|
for i, placeholder in enumerate(placeholders, 1):
|
|
print(f" {i}. {{{{ {placeholder} }}}}")
|
|
else:
|
|
invalid_files += 1
|
|
print(f" ⚠ 未找到占位符")
|
|
|
|
print()
|
|
|
|
# 打印汇总信息
|
|
print("=" * 80)
|
|
print("检查汇总")
|
|
print("=" * 80)
|
|
print(f"总文件数: {total_files}")
|
|
print(f"包含占位符的文件: {valid_files}")
|
|
print(f"未找到占位符的文件: {invalid_files}")
|
|
print(f"唯一占位符总数: {len(all_unique_placeholders)}")
|
|
print()
|
|
|
|
# 打印所有唯一占位符
|
|
if all_unique_placeholders:
|
|
print("所有唯一占位符列表:")
|
|
for i, placeholder in enumerate(sorted(all_unique_placeholders), 1):
|
|
print(f" {i}. {{{{ {placeholder} }}}}")
|
|
print()
|
|
|
|
# 打印每个文件的占位符详情
|
|
print("=" * 80)
|
|
print("各文件占位符详情")
|
|
print("=" * 80)
|
|
for file_path, placeholders in sorted(all_placeholders.items()):
|
|
print(f"\n文件: {file_path}")
|
|
print(f"占位符数量: {len(placeholders)}")
|
|
for placeholder in placeholders:
|
|
print(f" - {{{{ {placeholder} }}}}")
|
|
|
|
# 返回结果供其他脚本使用
|
|
return {
|
|
'total_files': total_files,
|
|
'valid_files': valid_files,
|
|
'invalid_files': invalid_files,
|
|
'all_placeholders': dict(all_placeholders),
|
|
'unique_placeholders': sorted(all_unique_placeholders)
|
|
}
|
|
|
|
|
|
def main():
|
|
"""主函数"""
|
|
template_dir = os.path.join(os.path.dirname(__file__), 'template_finish')
|
|
|
|
print(f"检查目录: {template_dir}")
|
|
print()
|
|
|
|
result = check_templates_in_directory(template_dir)
|
|
|
|
if result:
|
|
print("\n" + "=" * 80)
|
|
print("检查完成!")
|
|
print("=" * 80)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
main() |