增加占位符替换测试脚本
This commit is contained in:
parent
cb4a07f148
commit
557c9ae351
Binary file not shown.
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
333
test_template_placeholder_replacement.py
Normal file
333
test_template_placeholder_replacement.py
Normal file
@ -0,0 +1,333 @@
|
|||||||
|
"""
|
||||||
|
测试模板占位符识别和替换功能
|
||||||
|
读取模板文件,识别占位符,用虚拟数据替换并生成新文档
|
||||||
|
"""
|
||||||
|
import os
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from docx import Document
|
||||||
|
from services.document_service import DocumentService
|
||||||
|
|
||||||
|
|
||||||
|
def extract_placeholders_from_docx(file_path: str) -> set:
|
||||||
|
"""
|
||||||
|
从docx文件中提取所有占位符
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: docx文件路径
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
占位符集合,格式: {'field_code1', 'field_code2', ...}
|
||||||
|
"""
|
||||||
|
placeholders = set()
|
||||||
|
pattern = r'\{\{([^}]+)\}\}' # 匹配 {{field_code}} 格式
|
||||||
|
|
||||||
|
try:
|
||||||
|
doc = Document(file_path)
|
||||||
|
|
||||||
|
# 从段落中提取占位符
|
||||||
|
for paragraph in doc.paragraphs:
|
||||||
|
# 获取段落的所有文本(包括 run 中的文本)
|
||||||
|
text = ''.join([run.text for run in paragraph.runs])
|
||||||
|
if not text:
|
||||||
|
text = paragraph.text
|
||||||
|
|
||||||
|
matches = re.findall(pattern, text)
|
||||||
|
for match in matches:
|
||||||
|
cleaned = match.strip()
|
||||||
|
# 过滤掉不完整的占位符(包含 { 或 } 的)
|
||||||
|
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||||||
|
placeholders.add(cleaned)
|
||||||
|
|
||||||
|
# 从表格中提取占位符
|
||||||
|
for table in doc.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
for cell in row.cells:
|
||||||
|
# 获取单元格的所有文本(包括 run 中的文本)
|
||||||
|
cell_text = ''.join([run.text for para in cell.paragraphs for run in para.runs])
|
||||||
|
if not cell_text:
|
||||||
|
cell_text = cell.text
|
||||||
|
|
||||||
|
matches = re.findall(pattern, cell_text)
|
||||||
|
for match in matches:
|
||||||
|
cleaned = match.strip()
|
||||||
|
# 过滤掉不完整的占位符(包含 { 或 } 的)
|
||||||
|
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||||||
|
placeholders.add(cleaned)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [错误] 读取文件失败 - {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
|
||||||
|
return placeholders
|
||||||
|
|
||||||
|
|
||||||
|
def generate_mock_data(placeholders: set) -> dict:
|
||||||
|
"""
|
||||||
|
根据占位符生成虚拟数据
|
||||||
|
|
||||||
|
Args:
|
||||||
|
placeholders: 占位符集合
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
虚拟数据字典
|
||||||
|
"""
|
||||||
|
# 常见字段的虚拟数据映射
|
||||||
|
mock_data_map = {
|
||||||
|
# 基本信息
|
||||||
|
'target_name': '张三',
|
||||||
|
'target_organization_and_position': '某市某区某局副局长',
|
||||||
|
'target_gender': '男',
|
||||||
|
'target_date_of_birth': '198005',
|
||||||
|
'target_date_of_birth_full': '1980年05月15日',
|
||||||
|
'target_political_status': '中共党员',
|
||||||
|
'target_professional_rank': '副处级',
|
||||||
|
'target_id_number': '110101198005151234',
|
||||||
|
'target_address': '某市某区某街道某小区1号楼101室',
|
||||||
|
'target_registered_address': '某市某区某街道某小区1号楼101室',
|
||||||
|
'target_contact': '13800138000',
|
||||||
|
'target_place_of_origin': '某省某市',
|
||||||
|
'target_ethnicity': '汉族',
|
||||||
|
|
||||||
|
# 问题线索
|
||||||
|
'clue_source': '群众举报',
|
||||||
|
'target_issue_description': '涉嫌违反工作纪律,在项目审批过程中存在不当行为',
|
||||||
|
'target_problem_description': '在项目审批过程中,未严格按照规定程序执行,存在程序不规范问题',
|
||||||
|
'target_work_basic_info': '2005年参加工作,先后在某局多个科室工作,2018年任现职',
|
||||||
|
'target_attitude': '被核查人表示认识到问题的严重性,愿意积极配合调查',
|
||||||
|
|
||||||
|
# 审批信息
|
||||||
|
'filler_name': '李四',
|
||||||
|
'department_opinion': '同意开展初步核实',
|
||||||
|
'approval_time': '2024年12月15日',
|
||||||
|
'report_card_request_time': '2024年12月15日',
|
||||||
|
'handling_department': '某市纪委监委第一监督检查室',
|
||||||
|
'handler_name': '王五',
|
||||||
|
|
||||||
|
# 谈话相关
|
||||||
|
'appointment_time': '2024年12月20日上午9:00',
|
||||||
|
'appointment_location': '某市纪委监委谈话室',
|
||||||
|
'notification_time': '2024年12月18日',
|
||||||
|
'notification_location': '某市纪委监委',
|
||||||
|
|
||||||
|
# 核查信息
|
||||||
|
'investigation_unit_name': '某市纪委监委',
|
||||||
|
'investigation_team_leader_name': '赵六',
|
||||||
|
'investigation_team_member_names': '赵六、孙七、周八',
|
||||||
|
'investigation_location': '某市纪委监委',
|
||||||
|
'investigation_team_code': '2024-001',
|
||||||
|
|
||||||
|
# 其他
|
||||||
|
'commission_name': '某市纪律检查委员会',
|
||||||
|
'backup_personnel': '钱九',
|
||||||
|
'assessment_opinion': '风险评估为低风险,可以开展谈话',
|
||||||
|
}
|
||||||
|
|
||||||
|
# 为所有占位符生成数据
|
||||||
|
field_data = {}
|
||||||
|
for placeholder in placeholders:
|
||||||
|
if placeholder in mock_data_map:
|
||||||
|
field_data[placeholder] = mock_data_map[placeholder]
|
||||||
|
else:
|
||||||
|
# 如果占位符不在映射中,生成默认值
|
||||||
|
field_data[placeholder] = f'[虚拟数据-{placeholder}]'
|
||||||
|
|
||||||
|
return field_data
|
||||||
|
|
||||||
|
|
||||||
|
def test_template_replacement(template_path: str, output_dir: str):
|
||||||
|
"""
|
||||||
|
测试模板占位符识别和替换
|
||||||
|
|
||||||
|
Args:
|
||||||
|
template_path: 模板文件路径
|
||||||
|
output_dir: 输出目录
|
||||||
|
"""
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print(f"测试模板: {template_path}")
|
||||||
|
print(f"{'='*80}")
|
||||||
|
|
||||||
|
# 检查模板文件是否存在
|
||||||
|
if not os.path.exists(template_path):
|
||||||
|
print(f" [错误] 模板文件不存在: {template_path}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 提取占位符
|
||||||
|
print(f"\n[步骤1] 提取占位符...")
|
||||||
|
placeholders = extract_placeholders_from_docx(template_path)
|
||||||
|
print(f" 发现 {len(placeholders)} 个不同的占位符:")
|
||||||
|
for placeholder in sorted(placeholders):
|
||||||
|
print(f" - {{{{ {placeholder} }}}}")
|
||||||
|
|
||||||
|
if not placeholders:
|
||||||
|
print(f" [警告] 未发现任何占位符,模板可能不需要填充数据")
|
||||||
|
return False
|
||||||
|
|
||||||
|
# 生成虚拟数据
|
||||||
|
print(f"\n[步骤2] 生成虚拟数据...")
|
||||||
|
field_data = generate_mock_data(placeholders)
|
||||||
|
print(f" 生成了 {len(field_data)} 个字段的虚拟数据")
|
||||||
|
|
||||||
|
# 创建输出目录
|
||||||
|
os.makedirs(output_dir, exist_ok=True)
|
||||||
|
|
||||||
|
# 使用DocumentService填充模板
|
||||||
|
print(f"\n[步骤3] 填充模板...")
|
||||||
|
try:
|
||||||
|
# 注意:DocumentService需要环境变量配置,但我们可以直接使用fill_template方法
|
||||||
|
# 为了测试,我们创建一个简化的填充方法
|
||||||
|
doc = Document(template_path)
|
||||||
|
|
||||||
|
placeholder_pattern = re.compile(r'\{\{([^}]+)\}\}')
|
||||||
|
|
||||||
|
# 替换段落中的占位符
|
||||||
|
for paragraph in doc.paragraphs:
|
||||||
|
full_text = paragraph.text
|
||||||
|
if not full_text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 检查是否有占位符
|
||||||
|
has_placeholder = False
|
||||||
|
for field_code in field_data.keys():
|
||||||
|
placeholder = f"{{{{{field_code}}}}}"
|
||||||
|
if placeholder in full_text:
|
||||||
|
has_placeholder = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if has_placeholder:
|
||||||
|
# 执行替换
|
||||||
|
final_text = full_text
|
||||||
|
for field_code, field_value in field_data.items():
|
||||||
|
placeholder = f"{{{{{field_code}}}}}"
|
||||||
|
replacement_value = str(field_value) if field_value else ''
|
||||||
|
final_text = final_text.replace(placeholder, replacement_value)
|
||||||
|
|
||||||
|
# 替换段落文本(保持格式)
|
||||||
|
if len(paragraph.runs) == 1:
|
||||||
|
paragraph.runs[0].text = final_text
|
||||||
|
else:
|
||||||
|
# 多个run的情况:合并为一个run
|
||||||
|
for run in paragraph.runs[1:]:
|
||||||
|
run.text = ''
|
||||||
|
if paragraph.runs:
|
||||||
|
paragraph.runs[0].text = final_text
|
||||||
|
|
||||||
|
# 替换表格中的占位符
|
||||||
|
for table in doc.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
for cell in row.cells:
|
||||||
|
for paragraph in cell.paragraphs:
|
||||||
|
full_text = paragraph.text
|
||||||
|
if not full_text:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 检查是否有占位符
|
||||||
|
has_placeholder = False
|
||||||
|
for field_code in field_data.keys():
|
||||||
|
placeholder = f"{{{{{field_code}}}}}"
|
||||||
|
if placeholder in full_text:
|
||||||
|
has_placeholder = True
|
||||||
|
break
|
||||||
|
|
||||||
|
if has_placeholder:
|
||||||
|
# 执行替换
|
||||||
|
final_text = full_text
|
||||||
|
for field_code, field_value in field_data.items():
|
||||||
|
placeholder = f"{{{{{field_code}}}}}"
|
||||||
|
replacement_value = str(field_value) if field_value else ''
|
||||||
|
final_text = final_text.replace(placeholder, replacement_value)
|
||||||
|
|
||||||
|
# 替换段落文本
|
||||||
|
if len(paragraph.runs) == 1:
|
||||||
|
paragraph.runs[0].text = final_text
|
||||||
|
else:
|
||||||
|
for run in paragraph.runs[1:]:
|
||||||
|
run.text = ''
|
||||||
|
if paragraph.runs:
|
||||||
|
paragraph.runs[0].text = final_text
|
||||||
|
|
||||||
|
# 生成输出文件名
|
||||||
|
template_name = Path(template_path).stem
|
||||||
|
output_file = os.path.join(output_dir, f"{template_name}_已填充.docx")
|
||||||
|
|
||||||
|
# 保存文档
|
||||||
|
doc.save(output_file)
|
||||||
|
print(f" [成功] 文档已保存到: {output_file}")
|
||||||
|
|
||||||
|
# 验证替换结果
|
||||||
|
print(f"\n[步骤4] 验证替换结果...")
|
||||||
|
verify_doc = Document(output_file)
|
||||||
|
remaining_placeholders = set()
|
||||||
|
|
||||||
|
# 检查段落
|
||||||
|
for paragraph in verify_doc.paragraphs:
|
||||||
|
text = paragraph.text
|
||||||
|
matches = placeholder_pattern.findall(text)
|
||||||
|
for match in matches:
|
||||||
|
cleaned = match.strip()
|
||||||
|
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||||||
|
remaining_placeholders.add(cleaned)
|
||||||
|
|
||||||
|
# 检查表格
|
||||||
|
for table in verify_doc.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
for cell in row.cells:
|
||||||
|
for paragraph in cell.paragraphs:
|
||||||
|
text = paragraph.text
|
||||||
|
matches = placeholder_pattern.findall(text)
|
||||||
|
for match in matches:
|
||||||
|
cleaned = match.strip()
|
||||||
|
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||||||
|
remaining_placeholders.add(cleaned)
|
||||||
|
|
||||||
|
if remaining_placeholders:
|
||||||
|
print(f" [警告] 仍有 {len(remaining_placeholders)} 个占位符未替换:")
|
||||||
|
for placeholder in sorted(remaining_placeholders):
|
||||||
|
print(f" - {{{{ {placeholder} }}}}")
|
||||||
|
else:
|
||||||
|
print(f" [成功] 所有占位符已成功替换!")
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" [错误] 填充模板失败: {str(e)}")
|
||||||
|
import traceback
|
||||||
|
traceback.print_exc()
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""主函数"""
|
||||||
|
# 项目根目录
|
||||||
|
project_root = Path(__file__).parent
|
||||||
|
|
||||||
|
# 模板文件路径
|
||||||
|
template1_path = project_root / "template_finish" / "2-初核模版" / "2.谈话审批" / "走读式谈话审批" / "2谈话审批表.docx"
|
||||||
|
template2_path = project_root / "template_finish" / "2-初核模版" / "3.初核结论" / "8-1请示报告卡(初核报告结论) .docx"
|
||||||
|
|
||||||
|
# 输出目录
|
||||||
|
output_dir = project_root / "output_temp"
|
||||||
|
|
||||||
|
print("="*80)
|
||||||
|
print("模板占位符识别和替换测试")
|
||||||
|
print("="*80)
|
||||||
|
|
||||||
|
# 测试第一个模板
|
||||||
|
success1 = test_template_replacement(str(template1_path), str(output_dir))
|
||||||
|
|
||||||
|
# 测试第二个模板
|
||||||
|
success2 = test_template_replacement(str(template2_path), str(output_dir))
|
||||||
|
|
||||||
|
# 总结
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
print("测试总结")
|
||||||
|
print(f"{'='*80}")
|
||||||
|
print(f"模板1 (2谈话审批表.docx): {'[成功]' if success1 else '[失败]'}")
|
||||||
|
print(f"模板2 (8-1请示报告卡(初核报告结论).docx): {'[成功]' if success2 else '[失败]'}")
|
||||||
|
print(f"\n输出目录: {output_dir}")
|
||||||
|
print(f"{'='*80}\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
100
verify_generated_files.py
Normal file
100
verify_generated_files.py
Normal file
@ -0,0 +1,100 @@
|
|||||||
|
"""
|
||||||
|
验证生成的Word文档中是否还有未替换的占位符
|
||||||
|
"""
|
||||||
|
import re
|
||||||
|
from pathlib import Path
|
||||||
|
from docx import Document
|
||||||
|
|
||||||
|
|
||||||
|
def check_placeholders_in_docx(file_path: str):
|
||||||
|
"""
|
||||||
|
检查docx文件中是否还有占位符
|
||||||
|
|
||||||
|
Args:
|
||||||
|
file_path: docx文件路径
|
||||||
|
"""
|
||||||
|
placeholders = set()
|
||||||
|
pattern = r'\{\{([^}]+)\}\}' # 匹配 {{field_code}} 格式
|
||||||
|
|
||||||
|
try:
|
||||||
|
doc = Document(file_path)
|
||||||
|
|
||||||
|
# 从段落中提取占位符
|
||||||
|
for paragraph in doc.paragraphs:
|
||||||
|
text = ''.join([run.text for run in paragraph.runs])
|
||||||
|
if not text:
|
||||||
|
text = paragraph.text
|
||||||
|
|
||||||
|
matches = re.findall(pattern, text)
|
||||||
|
for match in matches:
|
||||||
|
cleaned = match.strip()
|
||||||
|
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||||||
|
placeholders.add(cleaned)
|
||||||
|
|
||||||
|
# 从表格中提取占位符
|
||||||
|
for table in doc.tables:
|
||||||
|
for row in table.rows:
|
||||||
|
for cell in row.cells:
|
||||||
|
cell_text = ''.join([run.text for para in cell.paragraphs for run in para.runs])
|
||||||
|
if not cell_text:
|
||||||
|
cell_text = cell.text
|
||||||
|
|
||||||
|
matches = re.findall(pattern, cell_text)
|
||||||
|
for match in matches:
|
||||||
|
cleaned = match.strip()
|
||||||
|
if cleaned and '{' not in cleaned and '}' not in cleaned:
|
||||||
|
placeholders.add(cleaned)
|
||||||
|
|
||||||
|
return placeholders
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
print(f" 错误: 读取文件失败 - {str(e)}")
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def main():
|
||||||
|
"""主函数"""
|
||||||
|
project_root = Path(__file__).parent
|
||||||
|
output_dir = project_root / "output_temp"
|
||||||
|
|
||||||
|
print("="*80)
|
||||||
|
print("验证生成的Word文档")
|
||||||
|
print("="*80)
|
||||||
|
|
||||||
|
# 检查两个生成的文件
|
||||||
|
files_to_check = [
|
||||||
|
output_dir / "2谈话审批表_已填充.docx",
|
||||||
|
output_dir / "8-1请示报告卡(初核报告结论) _已填充.docx"
|
||||||
|
]
|
||||||
|
|
||||||
|
all_success = True
|
||||||
|
for file_path in files_to_check:
|
||||||
|
print(f"\n检查文件: {file_path.name}")
|
||||||
|
if not file_path.exists():
|
||||||
|
print(f" [错误] 文件不存在")
|
||||||
|
all_success = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
placeholders = check_placeholders_in_docx(str(file_path))
|
||||||
|
|
||||||
|
if placeholders is None:
|
||||||
|
print(f" [错误] 无法读取文件")
|
||||||
|
all_success = False
|
||||||
|
elif placeholders:
|
||||||
|
print(f" [警告] 发现 {len(placeholders)} 个未替换的占位符:")
|
||||||
|
for placeholder in sorted(placeholders):
|
||||||
|
print(f" - {{{{ {placeholder} }}}}")
|
||||||
|
all_success = False
|
||||||
|
else:
|
||||||
|
print(f" [成功] 所有占位符已成功替换,文档可以正常使用")
|
||||||
|
|
||||||
|
print(f"\n{'='*80}")
|
||||||
|
if all_success:
|
||||||
|
print("验证结果: 所有文件验证通过!")
|
||||||
|
else:
|
||||||
|
print("验证结果: 部分文件存在问题,请检查")
|
||||||
|
print(f"{'='*80}\n")
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
Loading…
x
Reference in New Issue
Block a user